neutralMesonTSSA/macro/LumiPolarAna.py

0001 import re
0002 from glob import glob
0003 import numpy as np
0004 import pandas as pd
0005 import matplotlib.pyplot as plt
0006 import sys
0007 # import seaborn as sns
0008
0009 # sns.set(style='whitegrid')
0010
0011 class LumiPolarAna:
0012     matches = []
0013     scalers = []
0014     runtimes = []
0015     errors = []
0016     has_scalers = False
0017     runinfo = {}
0018     logdir_nm = 'condor/out/NM'
0019     logdir_tssa = 'condor/out/tssahists'
0020     df = pd.DataFrame()
0021
0022     # def __init__(self):
0023
0024     def ScanFile(self, filename):
0025         hasmatch = False
0026         jobnum = ''
0027         # print(filename)
0028         m0 = re.search(r'job_(\d+)', filename)
0029         if m0:
0030             jobnum = m0.group(1)
0031             # print(f'Jobnum = {jobnum}')
0032             if int(jobnum) > 6892:
0033                 return
0034
0035         with open(filename, 'r') as f:
0036             lines = []
0037             error = ''
0038             for line in f:
0039                 lines.append(line)
0040                 # print(line, end='')
0041                 m = re.search(r'^\d{5},.*$', line) # runnum, pol, luminosity
0042                 if m:
0043                     # print('Found match in ', filename, ': ', m.group(), sep='')
0044                     self.matches.append(m.group())
0045                     hasmatch = True
0046                 m2 = re.search(r'MBDNS GL1p scalers: \[(\d.*)\]$', line) # GL1p scalers
0047                 if m2:
0048                     # print('Found GL1P scalers in ', filename, ': ', m2.group(1), sep='')
0049                     self.scalers.append(m2.group(1))
0050                 m3 = re.search(r'real\s+(\d+)m(\d+\.\d+)s', line) # job execution time
0051                 if m3:
0052                     time_min = int(m3.group(1))
0053                     time_sec = float(m3.group(2))
0054                     # print(f'Found time {time_min + (time_sec/60)}')
0055                     self.runtimes.append(time_min + (time_sec/60))
0056                 m4 = re.search(r'^error:.*$', line)
0057                 if m4:
0058                     self.errors.append(jobnum + ', ' + m4.group(0))
0059             if not hasmatch:
0060                 print(f'No matches found in log {filename}')
0061         return
0062
0063     def ScanAllFiles(self):
0064         # files = glob(self.logdir_nm + '/job_0.out')
0065         files = glob(self.logdir_nm + '/*.out')
0066         files = sorted(files)
0067         print(f'Reading {len(files)} log files')
0068         # print('Reading the following log files:')
0069         # print(files)
0070         for i, file in enumerate(files):
0071             if i % 1000 == 0:
0072                 print(f'Reading file {i}')
0073             # if i > 6892:
0074             #     break
0075             self.ScanFile(file)
0076         with open('NMerrors.txt', 'w') as outfile:
0077             outfile.writelines([i+'\n' for i in self.errors])
0078         return
0079
0080     def ScanTSSALogs(self):
0081         tssa_runtimes = []
0082         files = glob(self.logdir_tssa + '/*.out')
0083         for file in files:
0084             with open(file, 'r') as f:
0085                 for line in f:
0086                     m = re.search(r'real\s+(\d+)m(\d+\.\d+)s', line) # job execution time
0087                     if m:
0088                         time_min = int(m.group(1))
0089                         time_sec = float(m.group(2))
0090                         # print(f'Found time {time_min + (time_sec/60)}')
0091                         tssa_runtimes.append((time_min*60) + time_sec)
0092         return tssa_runtimes
0093
0094     def MakeDataFrame(self):
0095         print('Found', len(self.matches), 'matches,', len(self.scalers), 'scalers')
0096         for count, line in enumerate(self.matches):
0097             strvalues = line.split(',')
0098             runno = int(strvalues[0])
0099             bpol = float(strvalues[1])
0100             blumiup = int(float(strvalues[2]))
0101             blumidown = int(float(strvalues[3]))
0102             ypol = float(strvalues[4])
0103             ylumiup = int(float(strvalues[5]))
0104             ylumidown = int(float(strvalues[6]))
0105             values = [runno, bpol, blumiup, blumidown, ypol, ylumiup, ylumidown]
0106             if len(self.matches) == len(self.scalers):
0107                 self.has_scalers = True
0108                 # scaler = np.fromstring(self.scalers[count], dtype=int, sep=', ')
0109                 # print(scaler)
0110                 # values.append(scaler)
0111                 values.append(self.scalers[count])
0112
0113             if runno in self.runinfo:
0114                 # print('Found existing runno', runno)
0115                 oldvalues = self.runinfo[runno]
0116                 if (oldvalues[1] != values[1]) or (oldvalues[4] != values[4]):
0117                     print('Run #', runno, ': Got different polarization values!', sep='')
0118                     return
0119             else:
0120                 # print('Found new runno', runno)
0121                 self.runinfo[runno] = values
0122         # print(self.runinfo)
0123         if self.has_scalers:
0124             self.df = pd.DataFrame(columns=['RunNum', 'BluePol', 'BlueLumiUp', 'BlueLumiDown', 'YellowPol', 'YellowLumiUp', 'YellowLumiDown', 'GL1P'])
0125         else:
0126             self.df = pd.DataFrame(columns=['RunNum', 'BluePol', 'BlueLumiUp', 'BlueLumiDown', 'YellowPol', 'YellowLumiUp', 'YellowLumiDown'])
0127         for vals in self.runinfo.values():
0128             self.df.loc[len(self.df.index)] = vals
0129         print(self.df)
0130         print(self.df.describe())
0131         return
0132
0133     def ReadRunTimes(self, filename):
0134         with open(filename, 'r') as f:
0135             lines = f.readlines()
0136             self.runtimes = []
0137             for line in lines:
0138                 self.runtimes.append(float(line[:-2]))
0139
0140     def GetDataFrame(self, filename):
0141         try:
0142             self.df = pd.read_csv(filename)
0143         except:
0144             return False
0145         print(self.df)
0146         # print(self.df.describe())
0147         return True
0148
0149     def WriteRunTimes(self, filename):
0150         with open(filename, 'w') as f:
0151             for time in self.runtimes:
0152                 f.write(str(time)+'\n')
0153
0154     def WriteDataFrame(self, filename):
0155         self.df.to_csv(filename)
0156         return
0157
0158     def GetRelLumi(self):
0159         blue_up_total = self.df['BlueLumiUp'].sum()
0160         blue_down_total = self.df['BlueLumiDown'].sum()
0161         blue_rel = blue_up_total / blue_down_total
0162         yellow_up_total = self.df['YellowLumiUp'].sum()
0163         yellow_down_total = self.df['YellowLumiDown'].sum()
0164         yellow_rel = yellow_up_total / yellow_down_total
0165         print(f'Naive method blue relative luminosity = {blue_rel}')
0166         print(f'Naive method yellow relative luminosity = {yellow_rel}')
0167
0168         blue_runbyrun_total = self.df['BlueLumiUp'] + self.df['BlueLumiDown']
0169         blue_runbyrun_rel = self.df['BlueLumiUp'] / self.df['BlueLumiDown']
0170         yellow_runbyrun_total = self.df['YellowLumiUp'] + self.df['YellowLumiDown']
0171         yellow_runbyrun_rel = self.df['YellowLumiUp'] / self.df['YellowLumiDown']
0172         blue_rel = ((blue_runbyrun_total * blue_runbyrun_rel).sum()) / (blue_runbyrun_total.sum())
0173         yellow_rel = ((yellow_runbyrun_total * yellow_runbyrun_rel).sum()) / (yellow_runbyrun_total.sum())
0174         print(f'Correct method blue relative luminosity = {blue_rel}')
0175         print(f'Correct method yellow relative luminosity = {yellow_rel}')
0176
0177     def GetPol(self):
0178         blue_runbyrun_total = self.df['BlueLumiUp'] + self.df['BlueLumiDown']
0179         blue_runbyrun_pol = self.df['BluePol']
0180         yellow_runbyrun_total = self.df['YellowLumiUp'] + self.df['YellowLumiDown']
0181         yellow_runbyrun_pol = self.df['YellowPol']
0182         blue_pol = ((blue_runbyrun_total * blue_runbyrun_pol).sum()) / (blue_runbyrun_total.sum())
0183         yellow_pol = ((yellow_runbyrun_total * yellow_runbyrun_pol).sum()) / (yellow_runbyrun_total.sum())
0184         print(f'Correct method blue polarization = {blue_pol}')
0185         print(f'Correct method yellow polarization = {yellow_pol}')
0186
0187     def PlotRunTimes(self, outprefix='python_plots/'):
0188         x = np.array(self.runtimes)
0189         fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(18,12))
0190         ax.hist(x, 100)
0191         ax.set_title('neutralMesonTSSA Job Run Times')
0192         ax.set_xlabel('Run Time (min)')
0193         ax.set_ylabel('Counts')
0194         plt.savefig(outprefix + 'runtimes.png')
0195
0196     def PlotTSSATimes(self, outprefix='python_plots/'):
0197         tssa_runtimes = self.ScanTSSALogs()
0198         x = np.array(tssa_runtimes)
0199         fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(18,12))
0200         ax.hist(x, 20)
0201         ax.set_title('TSSAhistmaker Job Run Times')
0202         ax.set_xlabel('Run Time (sec)')
0203         ax.set_ylabel('Counts')
0204         plt.savefig(outprefix + 'tssa_runtimes.png')
0205
0206     def PlotPol(self, outprefix='python_plots/pol_'):
0207         x = self.df['RunNum']
0208         bpol = self.df['BluePol']
0209         ypol = self.df['YellowPol']
0210
0211         fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(18,6))
0212         ax1.scatter(x, bpol, color='tab:blue', s=2.0)
0213         ax1.set_xlabel('Run Number')
0214         ax1.tick_params(axis='x', labelrotation=60)
0215         # ax1.ticklabel_format(useMathText=True)
0216         ax1.set_ylabel('Blue Beam Polarization (%)')
0217
0218         ax2.scatter(x, ypol, color='orange', s=2.0)
0219         ax2.set_xlabel('Run Number')
0220         ax2.tick_params(axis='x', labelrotation=60)
0221         ax2.set_ylabel('Yellow Beam Polarization (%)')
0222
0223         ax3.scatter(x, bpol, color='tab:blue', s=2.0, label='Blue Beam')
0224         ax3.scatter(x, ypol, color='orange', s=2.0, label='Yellow Beam')
0225         ax3.set_xlabel('Run Number')
0226         ax3.tick_params(axis='x', labelrotation=60)
0227         ax3.set_ylabel('Beam Polarization (%)')
0228
0229         fig.suptitle('Beam Polarization Across Runs')
0230         fig.legend()
0231         plt.savefig(outprefix + 'runbyrun.png')
0232         # plt.show()
0233
0234
0235         fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12,6))
0236
0237         ax1.hist(bpol, bins=20, color='tab:blue', label='Blue Beam')
0238         ax1.set_xlabel('Blue Beam Polarization (%)')
0239         ax1.set_ylabel('Counts')
0240         #    ax1.ticklabel_format(useMathText=True)
0241         ax2.hist(ypol, bins=20, color='orange', label='Yellow Beam')
0242         ax2.set_xlabel('Yellow Beam Polarization (%)')
0243         ax2.set_ylabel('Counts')
0244         fig.suptitle('Beam Polarization Distributions')
0245         fig.legend()
0246         plt.savefig(outprefix + 'dist.png')
0247         # plt.show()
0248
0249     def PlotRel(self, outprefix='python_plots/rel_'):
0250         x = self.df['RunNum']
0251         brel = self.df['BlueLumiUp']/self.df['BlueLumiDown']
0252         yrel = self.df['YellowLumiUp']/self.df['YellowLumiDown']
0253
0254         fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(18,6))
0255
0256         ax1.scatter(x, brel, color='tab:blue', s=2.0)
0257         ax1.set_xlabel('Run Number')
0258         ax1.tick_params(axis='x', labelrotation=60)
0259         #    ax1.ticklabel_format(useMathText=True)
0260         ax1.set_ylabel('Blue Beam Relative Luminosity')
0261
0262         ax2.scatter(x, yrel, color='orange', s=2.0)
0263         ax2.set_xlabel('Run Number')
0264         ax2.tick_params(axis='x', labelrotation=60)
0265         ax2.set_ylabel('Yellow Beam Relative Luminosity')
0266
0267         ax3.scatter(x, brel, color='tab:blue', s=2.0, label='Blue Beam')
0268         ax3.scatter(x, yrel, color='orange', s=2.0, label='Yellow Beam')
0269         ax3.set_xlabel('Run Number')
0270         ax3.tick_params(axis='x', labelrotation=60)
0271         ax3.set_ylabel('Relative Luminosity')
0272
0273         fig.suptitle('Relative Luminosity Across Runs')
0274         fig.legend()
0275         plt.savefig(outprefix + 'runbyrun.png')
0276         # plt.show()
0277
0278
0279         fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12,6))
0280
0281         ax1.hist(brel, bins=20, color='tab:blue', label='Blue Beam')
0282         ax1.set_xlabel('Blue Beam Relative Luminosity')
0283         ax1.set_ylabel('Counts')
0284         #    ax1.ticklabel_format(useMathText=True)
0285         ax2.hist(yrel, bins=20, color='orange', label='Yellow Beam')
0286         ax2.set_xlabel('Yellow Beam Relative Luminosity')
0287         ax2.set_ylabel('Counts')
0288         fig.suptitle('Relative Luminosity Distributions')
0289         fig.legend()
0290         plt.savefig(outprefix + 'dist.png')
0291         # plt.show()
0292
0293 if __name__ == "__main__":
0294     rewrite_files = False
0295     if len(sys.argv) > 1 and sys.argv[1] == '--rewrite':
0296         rewrite_files = True
0297     lpa = LumiPolarAna()
0298     csvfile = 'lumipol.csv'
0299     csvisgood = lpa.GetDataFrame(csvfile)
0300     runtimefile = 'runtimes.csv'
0301     if rewrite_files or not csvisgood:
0302         lpa.ScanAllFiles()
0303         lpa.WriteRunTimes(runtimefile)
0304         lpa.MakeDataFrame()
0305         lpa.WriteDataFrame(csvfile)
0306     if not rewrite_files:
0307         lpa.ReadRunTimes(runtimefile)
0308     lpa.PlotRunTimes()
0309     lpa.PlotTSSATimes()
0310     lpa.GetRelLumi()
0311     lpa.GetPol()
0312     # lpa.PlotPol()
0313     # lpa.PlotRel()