File indexing completed on 2025-08-06 08:21:02
0001
0002 import os
0003 import pyodbc
0004 import subprocess
0005
0006 def get_unique_run_dataset_pairs(cursor):
0007 query = """
0008 SELECT runnumber, dataset
0009 FROM datasets
0010 WHERE filename LIKE 'DST_CALO%' AND runnumber > 42635
0011 GROUP BY runnumber, dataset
0012 HAVING SUM(events) > 500000;
0013 """
0014 cursor.execute(query)
0015 run_dataset_pairs = {(row.runnumber, row.dataset) for row in cursor.fetchall()}
0016 return run_dataset_pairs
0017
0018 def get_total_events(cursor, run):
0019 query = f"""
0020 SELECT
0021 d.dataset,
0022 SUM(d.events) AS total_events
0023 FROM
0024 datasets d
0025 JOIN
0026 (SELECT DISTINCT segment, dataset
0027 FROM datasets
0028 WHERE filename LIKE 'HIST_CALO%{run}%') h
0029 ON
0030 d.segment = h.segment AND d.dataset = h.dataset
0031 WHERE
0032 d.filename LIKE 'DST_CALO%{run}%'
0033 GROUP BY
0034 d.dataset;
0035 """
0036 cursor.execute(query)
0037 results = cursor.fetchall()
0038 return results
0039
0040 def get_file_paths(cursor, run, dataset):
0041 query = f"""
0042 SELECT full_file_path
0043 FROM files
0044 WHERE lfn LIKE 'HIST_CALO%{dataset}%-000{run}%.root';
0045 """
0046 cursor.execute(query)
0047 file_paths = [row.full_file_path for row in cursor.fetchall()]
0048 return file_paths
0049
0050 def main():
0051 conn = pyodbc.connect("DSN=FileCatalog;UID=phnxrc;READONLY=True")
0052 cursor = conn.cursor()
0053
0054 if not os.path.exists('completedruns.txt'):
0055 open('completedruns.txt', 'w').close()
0056
0057 if not os.path.exists('mergedQA'):
0058 os.makedirs('mergedQA')
0059 if not os.path.exists('hotMaps'):
0060 os.makedirs('hotMaps')
0061
0062 unique_run_dataset_pairs = get_unique_run_dataset_pairs(cursor)
0063
0064 with open('completedruns.txt') as f:
0065 completed_runs_datasets = set(line.strip() for line in f)
0066
0067 remaining_runs_datasets = [(run, dataset) for run, dataset in unique_run_dataset_pairs if f"{run},{dataset}" not in completed_runs_datasets]
0068
0069 for run, dataset in remaining_runs_datasets:
0070 results = get_total_events(cursor, run)
0071 print(f"looking at run {run}")
0072
0073 if not results:
0074 print(f"No histograms for run {run}")
0075 continue
0076
0077 for result in results:
0078 result_dataset, total_events = result
0079 if result_dataset != dataset:
0080 continue
0081 print(f"run: {run} dataset: {result_dataset} total events in hists = {total_events}")
0082
0083 if total_events > 500000:
0084 file_paths = get_file_paths(cursor, run, result_dataset)
0085 output_file = f"mergedQA/HIST_CALO_{result_dataset}-{run}.root"
0086
0087 if not os.path.exists(output_file):
0088 with open('files.txt', 'w') as f:
0089 for path in file_paths:
0090 f.write(f"{path}\n")
0091
0092 subprocess.run(["hadd", "-ff", output_file] + file_paths)
0093 else:
0094 print(f"{output_file} already exists, skipping hadd command.")
0095
0096 subprocess.run(["root", "-b", "-q", f"doFindTowersEMCal.C(\"{output_file}\",\"hotMaps/EMCalHotMap_{result_dataset}-{run}.root\")"])
0097
0098 with open('completedruns.txt', 'a') as f:
0099 f.write(f"{run},{result_dataset}\n")
0100 else:
0101 print("waiting on that run")
0102
0103 conn.close()
0104 print("All done")
0105
0106 if __name__ == "__main__":
0107 main()
0108