emcal_noisy_tower_macro/hotMapProd/runProd.py

0001 #!/usr/bin/env python
0002 import os
0003 import pyodbc
0004 import subprocess
0005
0006 def get_unique_run_dataset_pairs(cursor):
0007     query = """
0008     SELECT runnumber, dataset
0009     FROM datasets
0010     WHERE filename LIKE 'DST_CALO%' AND runnumber > 42635
0011     GROUP BY runnumber, dataset
0012     HAVING SUM(events) > 500000;
0013     """
0014     cursor.execute(query)
0015     run_dataset_pairs = {(row.runnumber, row.dataset) for row in cursor.fetchall()}
0016     return run_dataset_pairs
0017
0018 def get_total_events(cursor, run):
0019     query = f"""
0020     SELECT
0021         d.dataset,
0022         SUM(d.events) AS total_events
0023     FROM
0024         datasets d
0025     JOIN
0026         (SELECT DISTINCT segment, dataset
0027          FROM datasets
0028          WHERE filename LIKE 'HIST_CALO%{run}%') h
0029     ON
0030         d.segment = h.segment AND d.dataset = h.dataset
0031     WHERE
0032         d.filename LIKE 'DST_CALO%{run}%'
0033     GROUP BY
0034         d.dataset;
0035     """
0036     cursor.execute(query)
0037     results = cursor.fetchall()
0038     return results
0039
0040 def get_file_paths(cursor, run, dataset):
0041     query = f"""
0042     SELECT full_file_path
0043     FROM files
0044     WHERE lfn LIKE 'HIST_CALO%{dataset}%-000{run}%.root';
0045     """
0046     cursor.execute(query)
0047     file_paths = [row.full_file_path for row in cursor.fetchall()]
0048     return file_paths
0049
0050 def main():
0051     conn = pyodbc.connect("DSN=FileCatalog;UID=phnxrc;READONLY=True")
0052     cursor = conn.cursor()
0053
0054     if not os.path.exists('completedruns.txt'):
0055         open('completedruns.txt', 'w').close()
0056
0057     if not os.path.exists('mergedQA'):
0058         os.makedirs('mergedQA')
0059     if not os.path.exists('hotMaps'):
0060         os.makedirs('hotMaps')
0061
0062     unique_run_dataset_pairs = get_unique_run_dataset_pairs(cursor)
0063
0064     with open('completedruns.txt') as f:
0065         completed_runs_datasets = set(line.strip() for line in f)
0066
0067     remaining_runs_datasets = [(run, dataset) for run, dataset in unique_run_dataset_pairs if f"{run},{dataset}" not in completed_runs_datasets]
0068
0069     for run, dataset in remaining_runs_datasets:
0070         results = get_total_events(cursor, run)
0071         print(f"looking at run {run}")
0072
0073         if not results:
0074             print(f"No histograms for run {run}")
0075             continue
0076
0077         for result in results:
0078             result_dataset, total_events = result
0079             if result_dataset != dataset:
0080                 continue  # Skip if the dataset does not match
0081             print(f"run: {run}  dataset: {result_dataset}  total events in hists = {total_events}")
0082
0083             if total_events > 500000:
0084                 file_paths = get_file_paths(cursor, run, result_dataset)
0085                 output_file = f"mergedQA/HIST_CALO_{result_dataset}-{run}.root"
0086
0087                 if not os.path.exists(output_file):
0088                     with open('files.txt', 'w') as f:
0089                         for path in file_paths:
0090                             f.write(f"{path}\n")
0091
0092                     subprocess.run(["hadd", "-ff", output_file] + file_paths)
0093                 else:
0094                     print(f"{output_file} already exists, skipping hadd command.")
0095
0096                 subprocess.run(["root", "-b", "-q", f"doFindTowersEMCal.C(\"{output_file}\",\"hotMaps/EMCalHotMap_{result_dataset}-{run}.root\")"])
0097
0098                 with open('completedruns.txt', 'a') as f:
0099                     f.write(f"{run},{result_dataset}\n")
0100             else:
0101                 print("waiting on that run")
0102
0103     conn.close()
0104     print("All done")
0105
0106 if __name__ == "__main__":
0107     main()
0108