File indexing completed on 2025-08-05 08:14:56
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 echo "========================================"
0014 echo "Starting the Golden Run List Generation"
0015 echo "========================================"
0016
0017
0018 echo "Creating necessary directories..."
0019 mkdir -p FileLists/ dst_list/ list/
0020 echo "Directories created."
0021 echo "----------------------------------------"
0022
0023
0024 workplace=$(pwd)
0025 echo "Working directory is set to $workplace"
0026 echo "----------------------------------------"
0027
0028
0029 echo "Step 1: Running Python script to generate initial run lists..."
0030
0031 python_output=$(python3 << EOF
0032 import pyodbc
0033
0034 def get_all_run_numbers(cursor):
0035
0036 query = """
0037 SELECT runnumber
0038 FROM datasets
0039 WHERE dsttype='DST_JETCALO_run2pp' and dataset = 'ana446_2024p007'
0040 GROUP BY runnumber
0041 HAVING SUM(events) >= 1000000 AND runnumber >= 47289;
0042 """
0043 cursor.execute(query)
0044 run_numbers = [row.runnumber for row in cursor.fetchall()]
0045 return run_numbers
0046
0047 def get_golden_run_numbers(detector, file_catalog_run_numbers, production_cursor):
0048 query = f"""
0049 SELECT runnumber
0050 FROM goodruns
0051 WHERE ({detector}_auto).runclass = 'GOLDEN'
0052 """
0053 production_cursor.execute(query)
0054 golden_run_numbers = {row.runnumber for row in production_cursor.fetchall()}
0055 return golden_run_numbers.intersection(set(file_catalog_run_numbers))
0056
0057 def main():
0058
0059 file_catalog_conn = pyodbc.connect("DSN=FileCatalog;UID=phnxrc;READONLY=True")
0060 file_catalog_cursor = file_catalog_conn.cursor()
0061
0062
0063 file_catalog_run_numbers = get_all_run_numbers(file_catalog_cursor)
0064 file_catalog_run_numbers.sort()
0065 with open('list/list_runnumber_all.txt', 'w') as f:
0066 for run_number in file_catalog_run_numbers:
0067 f.write(f"{run_number}\n")
0068 print(f"TOTAL_RUNS:{len(file_catalog_run_numbers)}")
0069
0070
0071 file_catalog_conn.close()
0072
0073
0074 production_conn = pyodbc.connect("DSN=Production_write")
0075 production_cursor = production_conn.cursor()
0076
0077
0078 detectors = ['emcal', 'ihcal', 'ohcal']
0079 golden_run_numbers = set(file_catalog_run_numbers)
0080 for detector in detectors:
0081 detector_golden_runs = get_golden_run_numbers(detector, file_catalog_run_numbers, production_cursor)
0082 golden_run_numbers = golden_run_numbers.intersection(detector_golden_runs)
0083
0084 golden_run_numbers = sorted(golden_run_numbers)
0085 with open('list/Full_ppGoldenRunList.txt', 'w') as f:
0086 for run_number in golden_run_numbers:
0087 f.write(f"{run_number}\n")
0088 print(f"COMBINED_GOLDEN_RUNS:{len(golden_run_numbers)}")
0089
0090
0091 production_conn.close()
0092
0093 if __name__ == "__main__":
0094 main()
0095 EOF
0096 )
0097
0098 echo "Python script execution completed."
0099 echo "----------------------------------------"
0100
0101
0102 total_runs=$(echo "$python_output" | grep 'TOTAL_RUNS' | cut -d':' -f2)
0103 combined_golden_runs=$(echo "$python_output" | grep 'COMBINED_GOLDEN_RUNS' | cut -d':' -f2)
0104
0105 echo "Summary of initial counts:"
0106 echo "Total runs after firmware fix and >1M events: $total_runs"
0107 echo "Number of runs passing Calo QA (all three GOLDEN statuses): $combined_golden_runs"
0108 echo "----------------------------------------"
0109
0110
0111 echo "Step 2: Checking for the initial golden run list..."
0112
0113 golden_run_list="list/Full_ppGoldenRunList.txt"
0114 if [[ ! -f "$golden_run_list" ]]; then
0115 echo "[ERROR] The file $golden_run_list does not exist. Please check the path and try again."
0116 exit 1
0117 fi
0118
0119 echo "Initial golden run list found: $golden_run_list"
0120 echo "----------------------------------------"
0121
0122
0123 get_total_events() {
0124 input_file=$1
0125 total_events=0
0126 batch_size=100
0127 run_numbers=()
0128 while IFS= read -r runnumber; do
0129 if [[ -n "$runnumber" ]]; then
0130 run_numbers+=("$runnumber")
0131 if [[ ${
0132 run_list=$(printf ",%s" "${run_numbers[@]}")
0133 run_list=${run_list:1}
0134 query="SELECT SUM(raw) FROM gl1_scalers WHERE runnumber IN (${run_list});"
0135 result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query")
0136 events=$(echo "$result" | xargs)
0137 if [[ "$events" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
0138 total_events=$(echo "$total_events + $events" | bc)
0139 fi
0140 run_numbers=()
0141 fi
0142 fi
0143 done < "$input_file"
0144
0145 if [[ ${
0146 run_list=$(printf ",%s" "${run_numbers[@]}")
0147 run_list=${run_list:1}
0148 query="SELECT SUM(raw) FROM gl1_scalers WHERE runnumber IN (${run_list});"
0149 result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query")
0150 events=$(echo "$result" | xargs)
0151 if [[ "$events" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
0152 total_events=$(echo "$total_events + $events" | bc)
0153 fi
0154 fi
0155 echo "$total_events"
0156 }
0157
0158
0159
0160
0161
0162 echo "----------------------------------------"
0163 echo "Processing Version 1: Calo QA + run time > 5 mins + livetime > 80%"
0164 echo "----------------------------------------"
0165
0166
0167 input_file="list/Full_ppGoldenRunList.txt"
0168 output_file_duration_v1="list/list_runnumber_runtime_v1.txt"
0169 > "$output_file_duration_v1"
0170
0171 total_runs_duration_v1=0
0172 runs_dropped_runtime_v1=0
0173
0174 while IFS= read -r runnumber; do
0175 if [[ -z "$runnumber" ]]; then
0176 continue
0177 fi
0178
0179
0180 query="SELECT runnumber, EXTRACT(EPOCH FROM (ertimestamp - brtimestamp)) AS duration FROM run WHERE runnumber = ${runnumber};"
0181
0182 result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query" | tr -d '[:space:]')
0183
0184 duration=$(echo "$result" | awk -F '|' '{print $2}')
0185
0186 if [[ $duration =~ ^[0-9]+([.][0-9]+)?$ ]]; then
0187 if (( $(echo "$duration > 300" | bc -l) )); then
0188 echo "$runnumber" >> "$output_file_duration_v1"
0189 total_runs_duration_v1=$((total_runs_duration_v1+1))
0190 else
0191 runs_dropped_runtime_v1=$((runs_dropped_runtime_v1+1))
0192 fi
0193 else
0194 runs_dropped_runtime_v1=$((runs_dropped_runtime_v1+1))
0195 fi
0196
0197 done < "$input_file"
0198
0199 echo "Total runs after run duration cut (>5 mins): $total_runs_duration_v1"
0200 echo "Number of runs dropped due to run duration cut: $runs_dropped_runtime_v1"
0201 echo "----------------------------------------"
0202
0203
0204 input_file="$output_file_duration_v1"
0205 output_file_livetime_v1="list/list_runnumber_livetime_v1.txt"
0206 bad_file_livetime_v1="list/list_runnumber_bad_livetime_v1.txt"
0207 > "$output_file_livetime_v1"
0208 > "$bad_file_livetime_v1"
0209
0210 total_runs_livetime_v1=0
0211 runs_dropped_livetime_v1=0
0212
0213 while IFS= read -r runnumber; do
0214 if [[ -z "$runnumber" ]]; then
0215 continue
0216 fi
0217
0218 index_to_check=10
0219
0220
0221 query="SELECT index, raw, live FROM gl1_scalers WHERE runnumber = ${runnumber} AND index = ${index_to_check};"
0222
0223 result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query")
0224
0225 index_pass=false
0226
0227 while IFS='|' read -r index raw live; do
0228 index=$(echo "$index" | xargs)
0229 raw=$(echo "$raw" | xargs)
0230 live=$(echo "$live" | xargs)
0231
0232 if [[ "$raw" =~ ^[0-9]+$ ]] && [[ "$live" =~ ^[0-9]+$ ]] && [ "$raw" -ne 0 ]; then
0233 ratio=$(echo "scale=2; $live / $raw * 100" | bc -l)
0234 if [ "$index" -eq "$index_to_check" ]; then
0235 if (( $(echo "$ratio >= 80" | bc -l) )); then
0236 index_pass=true
0237 fi
0238 fi
0239 fi
0240 done <<< "$result"
0241
0242 if [[ "$index_pass" == true ]]; then
0243 echo "$runnumber" >> "$output_file_livetime_v1"
0244 total_runs_livetime_v1=$((total_runs_livetime_v1+1))
0245 else
0246 echo "$runnumber" >> "$bad_file_livetime_v1"
0247 runs_dropped_livetime_v1=$((runs_dropped_livetime_v1+1))
0248 fi
0249
0250 done < "$input_file"
0251
0252 echo "Total runs after livetime cut (>80% for trigger index 10): $total_runs_livetime_v1"
0253 echo "Number of runs dropped due to livetime cut: $runs_dropped_livetime_v1"
0254 echo "----------------------------------------"
0255
0256
0257 echo "Identifying runs with missing bad tower maps..."
0258
0259 input_file="$output_file_livetime_v1"
0260 output_file_final_v1="FileLists/Full_ppGoldenRunList_Version1.txt"
0261 bad_tower_runs_file="list/list_runs_missing_bad_tower_maps.txt"
0262
0263
0264 cp "$input_file" "$output_file_final_v1"
0265
0266
0267 available_bad_tower_runs=$(find /cvmfs/sphenix.sdcc.bnl.gov/calibrations/sphnxpro/cdb/CEMC_BadTowerMap -name "*p0*" | cut -d '-' -f2 | cut -d c -f1 | sort | uniq)
0268 echo "$available_bad_tower_runs" > list/available_bad_tower_runs.txt
0269
0270
0271 grep -Fxvf list/available_bad_tower_runs.txt "$input_file" > "$bad_tower_runs_file"
0272
0273
0274 total_runs_with_bad_tower=$(grep -Fxf list/available_bad_tower_runs.txt "$input_file" | wc -l)
0275 total_runs_missing_bad_tower=$(wc -l < "$bad_tower_runs_file")
0276
0277 echo "Total runs with bad tower maps: $total_runs_with_bad_tower"
0278 echo "Total runs missing bad tower maps: $total_runs_missing_bad_tower"
0279 echo "List of runs missing bad tower maps saved to $bad_tower_runs_file"
0280 echo "----------------------------------------"
0281
0282
0283 rm list/available_bad_tower_runs.txt
0284
0285
0286 cp "$output_file_final_v1" dst_list/Final_RunNumbers_After_All_Cuts.txt
0287 echo "Final run numbers after all cuts have been copied to dst_list/Final_RunNumbers_After_All_Cuts.txt"
0288 echo "----------------------------------------"
0289
0290
0291 echo "Creating .list file for CreateDstList.pl..."
0292 cp "$output_file_final_v1" Full_ppGoldenRunList_Version1.list
0293 echo ".list file created."
0294 echo "----------------------------------------"
0295
0296
0297 echo "Removing existing list files in dst_list directory..."
0298 rm -f dst_list/*.list
0299 echo "Existing list files removed."
0300 echo "----------------------------------------"
0301
0302
0303 echo "Changing to dst_list directory to generate DST lists..."
0304 cd dst_list
0305
0306 echo "Running CreateDstList.pl to generate the DST list..."
0307 CreateDstList.pl --build ana437 --cdb 2024p007 DST_JETCALO_run2pp --list ../Full_ppGoldenRunList_Version1.list
0308 echo "DST list generated and saved to dst_list."
0309 echo "----------------------------------------"
0310
0311 cd ..
0312
0313
0314 total_events_initial=$(get_total_events 'list/list_runnumber_all.txt')
0315 total_events_calo_qa=$(get_total_events 'list/Full_ppGoldenRunList.txt')
0316 total_events_after_runtime=$(get_total_events 'list/list_runnumber_runtime_v1.txt')
0317 total_events_after_livetime=$(get_total_events 'list/list_runnumber_livetime_v1.txt')
0318 total_events_after_all_cuts=$(get_total_events "$output_file_final_v1")
0319
0320
0321 total_runs_after_all_cuts_v1=$(wc -l < "$output_file_final_v1")
0322
0323
0324 percent_runs_calo_qa=$(echo "scale=2; $combined_golden_runs / $total_runs * 100" | bc)
0325 percent_runs_after_runtime=$(echo "scale=2; $total_runs_duration_v1 / $total_runs * 100" | bc)
0326 percent_runs_after_livetime=$(echo "scale=2; $total_runs_livetime_v1 / $total_runs * 100" | bc)
0327 percent_runs_after_badtower=$(echo "scale=2; $total_runs_after_all_cuts_v1 / $total_runs * 100" | bc)
0328
0329 percent_events_calo_qa=$(echo "scale=2; $total_events_calo_qa / $total_events_initial * 100" | bc)
0330 percent_events_after_runtime=$(echo "scale=2; $total_events_after_runtime / $total_events_initial * 100" | bc)
0331 percent_events_after_livetime=$(echo "scale=2; $total_events_after_livetime / $total_events_initial * 100" | bc)
0332 percent_events_after_all_cuts=$(echo "scale=2; $total_events_after_all_cuts / $total_events_initial * 100" | bc)
0333
0334
0335 percent_runs_lost_calo_qa=$(echo "scale=2; 100 - $percent_runs_calo_qa" | bc)
0336 percent_runs_lost_after_runtime=$(echo "scale=2; $percent_runs_calo_qa - $percent_runs_after_runtime" | bc)
0337 percent_runs_lost_after_livetime=$(echo "scale=2; $percent_runs_after_runtime - $percent_runs_after_livetime" | bc)
0338 percent_runs_lost_after_badtower=0
0339
0340 percent_events_lost_calo_qa=$(echo "scale=2; 100 - $percent_events_calo_qa" | bc)
0341 percent_events_lost_after_runtime=$(echo "scale=2; $percent_events_calo_qa - $percent_events_after_runtime" | bc)
0342 percent_events_lost_after_livetime=$(echo "scale=2; $percent_events_after_runtime - $percent_events_after_livetime" | bc)
0343 percent_events_lost_after_badtower=0
0344
0345
0346 echo "========================================"
0347 echo "Final Summary: Version 1 (Calo QA → Runtime → Livetime)"
0348
0349 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" "Stage" "% Initial Events" "Total Events" "Runs" "% Initial Runs"
0350 echo "--------------------------------------------------|-----------------|-----------------|-----------|-----------------"
0351 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" \
0352 "1) After firmware fix and >1M events" "100%" "$total_events_initial" "$total_runs" "100%"
0353 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" \
0354 "2) & pass Calo QA" "${percent_events_calo_qa}%" "$total_events_calo_qa" "$combined_golden_runs" "${percent_runs_calo_qa}%"
0355 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" \
0356 "3) & > 5 mins" "${percent_events_after_runtime}%" "$total_events_after_runtime" "$total_runs_duration_v1" "${percent_runs_after_runtime}%"
0357 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" \
0358 "4) & livetime > 80% of MB trigger" "${percent_events_after_livetime}%" "$total_events_after_livetime" "$total_runs_livetime_v1" "${percent_runs_after_livetime}%"
0359 printf "%-50s | %-15s | %-15s | %-9s | %-15s\n" \
0360 "5) Final run list (no runs removed for bad tower maps)" "${percent_events_after_all_cuts}%" "$total_events_after_all_cuts" "$total_runs_after_all_cuts_v1" "${percent_runs_after_badtower}%"
0361
0362 echo "================================================="
0363
0364 echo ""
0365 echo "Percentage of runs lost at each step:"
0366 echo "After Calo QA: ${percent_runs_lost_calo_qa}% of runs lost"
0367 echo "After run time > 5 mins: ${percent_runs_lost_after_runtime}% of runs lost"
0368 echo "After livetime >80%: ${percent_runs_lost_after_livetime}% of runs lost"
0369 echo "After identifying runs missing bad tower maps: 0% of runs lost (runs are retained)"
0370 echo ""
0371 echo "Percentage of events lost at each step:"
0372 echo "After Calo QA: ${percent_events_lost_calo_qa}% of events lost"
0373 echo "After run time > 5 mins: ${percent_events_lost_after_runtime}% of events lost"
0374 echo "After livetime >80%: ${percent_events_lost_after_livetime}% of events lost"
0375 echo "After identifying runs missing bad tower maps: 0% of events lost (events are retained)"
0376 echo "========================================"
0377
0378
0379 cp "$bad_tower_runs_file" /sphenix/user/patsfan753/tutorials/tutorials/CaloDataAnaRun24pp/list_runs_missing_bad_tower_maps.txt
0380
0381
0382 cp "$bad_file_livetime_v1" /sphenix/user/patsfan753/tutorials/tutorials/CaloDataAnaRun24pp/list_runnumber_bad_livetime_v1.txt
0383
0384 echo "Files for missing bad tower maps and livetime failures have been copied to:"
0385 echo "/sphenix/user/patsfan753/tutorials/tutorials/CaloDataAnaRun24pp/"
0386 echo "Done."