sEPDChannelAnalysis/macro/GoldenRunList_ConductorFile_old.sh

0001 #!/bin/bash
0002
0003 ########################################
0004 # Steps:
0005 # 1. Extract initial run numbers from databases.
0006 # 2. Apply Calorimeter QA filters.
0007 # 3. Apply run duration and livetime cuts.
0008 # 4. Identify runs with missing bad tower maps (without removing them).
0009 # 5. Generate DST lists for the selected runs.
0010 # 6. Summarize the statistics at each stage.
0011 ########################################
0012
0013 echo "========================================"
0014 echo "Starting the Golden Run List Generation"
0015 echo "========================================"
0016
0017 # Create necessary directories
0018 echo "Creating necessary directories..."
0019 mkdir -p FileLists/ dst_list/ list/
0020 echo "Directories created."
0021 echo "----------------------------------------"
0022
0023 # Set the working directory
0024 workplace=$(pwd)
0025 echo "Working directory is set to $workplace"
0026 echo "----------------------------------------"
0027
0028 # Step 1: Run the Python script to get the initial run numbers
0029 echo "Step 1: Running Python script to generate initial run lists..."
0030
0031 python_output=$(python3 << EOF
0032 import pyodbc
0033
0034 def get_all_run_numbers(cursor):
0035     # Query to get run numbers with at least 1 million events and runnumber >= 47289
0036     query = """
0037     SELECT runnumber
0038     FROM datasets
0039     WHERE dsttype='DST_JETCALO_run2pp' and dataset = 'ana446_2024p007'
0040     GROUP BY runnumber
0041     HAVING SUM(events) >= 1000000 AND runnumber >= 47289;
0042     """
0043     cursor.execute(query)
0044     run_numbers = [row.runnumber for row in cursor.fetchall()]
0045     return run_numbers
0046
0047 def get_golden_run_numbers(detector, file_catalog_run_numbers, production_cursor):
0048     query = f"""
0049     SELECT runnumber
0050     FROM goodruns
0051     WHERE ({detector}_auto).runclass = 'GOLDEN'
0052     """
0053     production_cursor.execute(query)
0054     golden_run_numbers = {row.runnumber for row in production_cursor.fetchall()}
0055     return golden_run_numbers.intersection(set(file_catalog_run_numbers))
0056
0057 def main():
0058     # Connect to the FileCatalog database
0059     file_catalog_conn = pyodbc.connect("DSN=FileCatalog;UID=phnxrc;READONLY=True")
0060     file_catalog_cursor = file_catalog_conn.cursor()
0061
0062     # Get unique run numbers with at least 1 million total events
0063     file_catalog_run_numbers = get_all_run_numbers(file_catalog_cursor)
0064     file_catalog_run_numbers.sort()
0065     with open('list/list_runnumber_all.txt', 'w') as f:
0066         for run_number in file_catalog_run_numbers:
0067             f.write(f"{run_number}\n")
0068     print(f"TOTAL_RUNS:{len(file_catalog_run_numbers)}")
0069
0070     # Close the FileCatalog database connection
0071     file_catalog_conn.close()
0072
0073     # Connect to the Production database
0074     production_conn = pyodbc.connect("DSN=Production_write")
0075     production_cursor = production_conn.cursor()
0076
0077     # Filter 'GOLDEN' run numbers for each calorimeter
0078     detectors = ['emcal', 'ihcal', 'ohcal']
0079     golden_run_numbers = set(file_catalog_run_numbers)
0080     for detector in detectors:
0081         detector_golden_runs = get_golden_run_numbers(detector, file_catalog_run_numbers, production_cursor)
0082         golden_run_numbers = golden_run_numbers.intersection(detector_golden_runs)
0083
0084     golden_run_numbers = sorted(golden_run_numbers)
0085     with open('list/Full_ppGoldenRunList.txt', 'w') as f:
0086         for run_number in golden_run_numbers:
0087             f.write(f"{run_number}\n")
0088     print(f"COMBINED_GOLDEN_RUNS:{len(golden_run_numbers)}")
0089
0090     # Close the Production database connection
0091     production_conn.close()
0092
0093 if __name__ == "__main__":
0094     main()
0095 EOF
0096 )
0097
0098 echo "Python script execution completed."
0099 echo "----------------------------------------"
0100
0101 # Parse the output to get the counts
0102 total_runs=$(echo "$python_output" | grep 'TOTAL_RUNS' | cut -d':' -f2)
0103 combined_golden_runs=$(echo "$python_output" | grep 'COMBINED_GOLDEN_RUNS' | cut -d':' -f2)
0104
0105 echo "Summary of initial counts:"
0106 echo "Total runs after firmware fix and >1M events: $total_runs"
0107 echo "Number of runs passing Calo QA (all three GOLDEN statuses): $combined_golden_runs"
0108 echo "----------------------------------------"
0109
0110 # Step 2: Check that the initial golden run list exists
0111 echo "Step 2: Checking for the initial golden run list..."
0112
0113 golden_run_list="list/Full_ppGoldenRunList.txt"
0114 if [[ ! -f "$golden_run_list" ]]; then
0115     echo "[ERROR] The file $golden_run_list does not exist. Please check the path and try again."
0116     exit 1
0117 fi
0118
0119 echo "Initial golden run list found: $golden_run_list"
0120 echo "----------------------------------------"
0121
0122 # Function to get total GL1 raw events for a list of runs
0123 get_total_events() {
0124     input_file=$1
0125     total_events=0
0126     batch_size=100
0127     run_numbers=()
0128     while IFS= read -r runnumber; do
0129         if [[ -n "$runnumber" ]]; then
0130             run_numbers+=("$runnumber")
0131             if [[ ${#run_numbers[@]} -ge $batch_size ]]; then
0132                 run_list=$(printf ",%s" "${run_numbers[@]}")
0133                 run_list=${run_list:1}
0134                 query="SELECT SUM(raw) FROM gl1_scalers WHERE runnumber IN (${run_list});"
0135                 result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query")
0136                 events=$(echo "$result" | xargs)
0137                 if [[ "$events" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
0138                     total_events=$(echo "$total_events + $events" | bc)
0139                 fi
0140                 run_numbers=()
0141             fi
0142         fi
0143     done < "$input_file"
0144     # Process remaining runs
0145     if [[ ${#run_numbers[@]} -gt 0 ]]; then
0146         run_list=$(printf ",%s" "${run_numbers[@]}")
0147         run_list=${run_list:1}
0148         query="SELECT SUM(raw) FROM gl1_scalers WHERE runnumber IN (${run_list});"
0149         result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query")
0150         events=$(echo "$result" | xargs)
0151         if [[ "$events" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
0152             total_events=$(echo "$total_events + $events" | bc)
0153         fi
0154     fi
0155     echo "$total_events"
0156 }
0157
0158 #################################
0159 # Version 1: Calo QA + run time > 5 mins + livetime > 80%
0160 #################################
0161
0162 echo "----------------------------------------"
0163 echo "Processing Version 1: Calo QA + run time > 5 mins + livetime > 80%"
0164 echo "----------------------------------------"
0165
0166 # Step 3a: Apply run duration filter (>300 seconds)
0167 input_file="list/Full_ppGoldenRunList.txt"  # Runs after Calo QA
0168 output_file_duration_v1="list/list_runnumber_runtime_v1.txt"
0169 > "$output_file_duration_v1"
0170
0171 total_runs_duration_v1=0
0172 runs_dropped_runtime_v1=0
0173
0174 while IFS= read -r runnumber; do
0175     if [[ -z "$runnumber" ]]; then
0176         continue
0177     fi
0178
0179     # Query to get the run duration in seconds
0180     query="SELECT runnumber, EXTRACT(EPOCH FROM (ertimestamp - brtimestamp)) AS duration FROM run WHERE runnumber = ${runnumber};"
0181
0182     result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query" | tr -d '[:space:]')
0183
0184     duration=$(echo "$result" | awk -F '|' '{print $2}')
0185
0186     if [[ $duration =~ ^[0-9]+([.][0-9]+)?$ ]]; then
0187         if (( $(echo "$duration > 300" | bc -l) )); then
0188             echo "$runnumber" >> "$output_file_duration_v1"
0189             total_runs_duration_v1=$((total_runs_duration_v1+1))
0190         else
0191             runs_dropped_runtime_v1=$((runs_dropped_runtime_v1+1))
0192         fi
0193     else
0194         runs_dropped_runtime_v1=$((runs_dropped_runtime_v1+1))
0195     fi
0196
0197 done < "$input_file"
0198
0199 echo "Total runs after run duration cut (>5 mins): $total_runs_duration_v1"
0200 echo "Number of runs dropped due to run duration cut: $runs_dropped_runtime_v1"
0201 echo "----------------------------------------"
0202
0203 # Step 4a: Apply live/raw ratio filter for trigger index 10 (>80%)
0204 input_file="$output_file_duration_v1"
0205 output_file_livetime_v1="list/list_runnumber_livetime_v1.txt"
0206 bad_file_livetime_v1="list/list_runnumber_bad_livetime_v1.txt"
0207 > "$output_file_livetime_v1"
0208 > "$bad_file_livetime_v1"
0209
0210 total_runs_livetime_v1=0
0211 runs_dropped_livetime_v1=0
0212
0213 while IFS= read -r runnumber; do
0214     if [[ -z "$runnumber" ]]; then
0215         continue
0216     fi
0217
0218     index_to_check=10  # Trigger index to check for livetime
0219
0220     # Query to get raw and live counts for the specified trigger index
0221     query="SELECT index, raw, live FROM gl1_scalers WHERE runnumber = ${runnumber} AND index = ${index_to_check};"
0222
0223     result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query")
0224
0225     index_pass=false
0226
0227     while IFS='|' read -r index raw live; do
0228         index=$(echo "$index" | xargs)
0229         raw=$(echo "$raw" | xargs)
0230         live=$(echo "$live" | xargs)
0231
0232         if [[ "$raw" =~ ^[0-9]+$ ]] && [[ "$live" =~ ^[0-9]+$ ]] && [ "$raw" -ne 0 ]; then
0233             ratio=$(echo "scale=2; $live / $raw * 100" | bc -l)
0234             if [ "$index" -eq "$index_to_check" ]; then
0235                 if (( $(echo "$ratio >= 80" | bc -l) )); then
0236                     index_pass=true
0237                 fi
0238             fi
0239         fi
0240     done <<< "$result"
0241
0242     if [[ "$index_pass" == true ]]; then
0243         echo "$runnumber" >> "$output_file_livetime_v1"
0244         total_runs_livetime_v1=$((total_runs_livetime_v1+1))
0245     else
0246         echo "$runnumber" >> "$bad_file_livetime_v1"
0247         runs_dropped_livetime_v1=$((runs_dropped_livetime_v1+1))
0248     fi
0249
0250 done < "$input_file"
0251
0252 echo "Total runs after livetime cut (>80% for trigger index 10): $total_runs_livetime_v1"
0253 echo "Number of runs dropped due to livetime cut: $runs_dropped_livetime_v1"
0254 echo "----------------------------------------"
0255
0256 # Step 5a: Identify runs with missing bad tower maps (without removing them)
0257 echo "Identifying runs with missing bad tower maps..."
0258
0259 input_file="$output_file_livetime_v1"
0260 output_file_final_v1="FileLists/Full_ppGoldenRunList_Version1.txt"
0261 bad_tower_runs_file="list/list_runs_missing_bad_tower_maps.txt"
0262
0263 # Copy the input file to the final output file (we are not removing runs)
0264 cp "$input_file" "$output_file_final_v1"
0265
0266 # Find runs with available bad tower maps
0267 available_bad_tower_runs=$(find /cvmfs/sphenix.sdcc.bnl.gov/calibrations/sphnxpro/cdb/CEMC_BadTowerMap -name "*p0*" | cut -d '-' -f2 | cut -d c -f1 | sort | uniq)
0268 echo "$available_bad_tower_runs" > list/available_bad_tower_runs.txt
0269
0270 # Identify runs missing bad tower maps
0271 grep -Fxvf list/available_bad_tower_runs.txt "$input_file" > "$bad_tower_runs_file"
0272
0273 # Count the number of runs with and without bad tower maps
0274 total_runs_with_bad_tower=$(grep -Fxf list/available_bad_tower_runs.txt "$input_file" | wc -l)
0275 total_runs_missing_bad_tower=$(wc -l < "$bad_tower_runs_file")
0276
0277 echo "Total runs with bad tower maps: $total_runs_with_bad_tower"
0278 echo "Total runs missing bad tower maps: $total_runs_missing_bad_tower"
0279 echo "List of runs missing bad tower maps saved to $bad_tower_runs_file"
0280 echo "----------------------------------------"
0281
0282 # Clean up temporary file
0283 rm list/available_bad_tower_runs.txt
0284
0285 # Copy the final run numbers to dst_list folder
0286 cp "$output_file_final_v1" dst_list/Final_RunNumbers_After_All_Cuts.txt
0287 echo "Final run numbers after all cuts have been copied to dst_list/Final_RunNumbers_After_All_Cuts.txt"
0288 echo "----------------------------------------"
0289
0290 # Step 6: Create .list file for CreateDstList.pl command
0291 echo "Creating .list file for CreateDstList.pl..."
0292 cp "$output_file_final_v1" Full_ppGoldenRunList_Version1.list
0293 echo ".list file created."
0294 echo "----------------------------------------"
0295
0296 # Step 7: Remove existing list files in dst_list directory
0297 echo "Removing existing list files in dst_list directory..."
0298 rm -f dst_list/*.list
0299 echo "Existing list files removed."
0300 echo "----------------------------------------"
0301
0302 # Step 8: Create DST lists
0303 echo "Changing to dst_list directory to generate DST lists..."
0304 cd dst_list
0305
0306 echo "Running CreateDstList.pl to generate the DST list..."
0307 CreateDstList.pl --build ana437 --cdb 2024p007 DST_JETCALO_run2pp --list ../Full_ppGoldenRunList_Version1.list
0308 echo "DST list generated and saved to dst_list."
0309 echo "----------------------------------------"
0310
0311 cd ..
0312
0313 # Calculate total events at each stage
0314 total_events_initial=$(get_total_events 'list/list_runnumber_all.txt')
0315 total_events_calo_qa=$(get_total_events 'list/Full_ppGoldenRunList.txt')
0316 total_events_after_runtime=$(get_total_events 'list/list_runnumber_runtime_v1.txt')
0317 total_events_after_livetime=$(get_total_events 'list/list_runnumber_livetime_v1.txt')
0318 total_events_after_all_cuts=$(get_total_events "$output_file_final_v1")
0319
0320 # Adjust the counts for runs with and without bad tower maps
0321 total_runs_after_all_cuts_v1=$(wc -l < "$output_file_final_v1")
0322
0323 # Compute percentages relative to initial totals
0324 percent_runs_calo_qa=$(echo "scale=2; $combined_golden_runs / $total_runs * 100" | bc)
0325 percent_runs_after_runtime=$(echo "scale=2; $total_runs_duration_v1 / $total_runs * 100" | bc)
0326 percent_runs_after_livetime=$(echo "scale=2; $total_runs_livetime_v1 / $total_runs * 100" | bc)
0327 percent_runs_after_badtower=$(echo "scale=2; $total_runs_after_all_cuts_v1 / $total_runs * 100" | bc)
0328
0329 percent_events_calo_qa=$(echo "scale=2; $total_events_calo_qa / $total_events_initial * 100" | bc)
0330 percent_events_after_runtime=$(echo "scale=2; $total_events_after_runtime / $total_events_initial * 100" | bc)
0331 percent_events_after_livetime=$(echo "scale=2; $total_events_after_livetime / $total_events_initial * 100" | bc)
0332 percent_events_after_all_cuts=$(echo "scale=2; $total_events_after_all_cuts / $total_events_initial * 100" | bc)
0333
0334 # Calculate percentages lost at each step
0335 percent_runs_lost_calo_qa=$(echo "scale=2; 100 - $percent_runs_calo_qa" | bc)
0336 percent_runs_lost_after_runtime=$(echo "scale=2; $percent_runs_calo_qa - $percent_runs_after_runtime" | bc)
0337 percent_runs_lost_after_livetime=$(echo "scale=2; $percent_runs_after_runtime - $percent_runs_after_livetime" | bc)
0338 percent_runs_lost_after_badtower=0
0339
0340 percent_events_lost_calo_qa=$(echo "scale=2; 100 - $percent_events_calo_qa" | bc)
0341 percent_events_lost_after_runtime=$(echo "scale=2; $percent_events_calo_qa - $percent_events_after_runtime" | bc)
0342 percent_events_lost_after_livetime=$(echo "scale=2; $percent_events_after_runtime - $percent_events_after_livetime" | bc)
0343 percent_events_lost_after_badtower=0
0344
0345 # Final Summary
0346 echo "========================================"
0347 echo "Final Summary: Version 1 (Calo QA → Runtime → Livetime)"
0348
0349 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" "Stage" "% Initial Events" "Total Events" "Runs" "% Initial Runs"
0350 echo "--------------------------------------------------|-----------------|-----------------|-----------|-----------------"
0351 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" \
0352 "1) After firmware fix and >1M events" "100%" "$total_events_initial" "$total_runs" "100%"
0353 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" \
0354 "2) & pass Calo QA" "${percent_events_calo_qa}%" "$total_events_calo_qa" "$combined_golden_runs" "${percent_runs_calo_qa}%"
0355 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" \
0356 "3) & > 5 mins" "${percent_events_after_runtime}%" "$total_events_after_runtime" "$total_runs_duration_v1" "${percent_runs_after_runtime}%"
0357 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" \
0358 "4) & livetime > 80% of MB trigger" "${percent_events_after_livetime}%" "$total_events_after_livetime" "$total_runs_livetime_v1" "${percent_runs_after_livetime}%"
0359 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" \
0360 "5) Final run list (no runs removed for bad tower maps)" "${percent_events_after_all_cuts}%" "$total_events_after_all_cuts" "$total_runs_after_all_cuts_v1" "${percent_runs_after_badtower}%"
0361
0362 echo "================================================="
0363
0364 echo ""
0365 echo "Percentage of runs lost at each step:"
0366 echo "After Calo QA: ${percent_runs_lost_calo_qa}% of runs lost"
0367 echo "After run time > 5 mins: ${percent_runs_lost_after_runtime}% of runs lost"
0368 echo "After livetime >80%: ${percent_runs_lost_after_livetime}% of runs lost"
0369 echo "After identifying runs missing bad tower maps: 0% of runs lost (runs are retained)"
0370 echo ""
0371 echo "Percentage of events lost at each step:"
0372 echo "After Calo QA: ${percent_events_lost_calo_qa}% of events lost"
0373 echo "After run time > 5 mins: ${percent_events_lost_after_runtime}% of events lost"
0374 echo "After livetime >80%: ${percent_events_lost_after_livetime}% of events lost"
0375 echo "After identifying runs missing bad tower maps: 0% of events lost (events are retained)"
0376 echo "========================================"
0377
0378 # Copy the missing bad tower maps file to the requested path
0379 cp "$bad_tower_runs_file" /sphenix/user/patsfan753/tutorials/tutorials/CaloDataAnaRun24pp/list_runs_missing_bad_tower_maps.txt
0380
0381 # Copy the runs that fail livetime cut to the requested path
0382 cp "$bad_file_livetime_v1" /sphenix/user/patsfan753/tutorials/tutorials/CaloDataAnaRun24pp/list_runnumber_bad_livetime_v1.txt
0383
0384 echo "Files for missing bad tower maps and livetime failures have been copied to:"
0385 echo "/sphenix/user/patsfan753/tutorials/tutorials/CaloDataAnaRun24pp/"
0386 echo "Done."