runListGeneration/Run2024pp/GoldenRunList_ConductorFile.sh

0001 #!/bin/bash
0002
0003 ##############################################################################################################################################################################
0004 # Golden Run List Generation Script
0005 #
0006 # Purpose:
0007 #   This script compiles a final list of "Golden Runs" for sPHENIX analysis. The selection criteria are:
0008 #     • Run contains ≥1M events (and runnumber ≥47289 unless 'noRunNumberLimit' is specified).
0009 #     • Calorimeter QA designates EMCal, IHCal, and OHCal as "Golden."
0010 #     • Runtime exceeds 5 minutes.
0011 #     • MBD (minimum bias) livetime is above 80%.
0012 #     • (Optional) Runs missing bad tower maps can be removed if 'removeRunsWithMissingMaps' is given.
0013 #     • (Default) Runs without a magnet_on='t' entry in 'magnet_info' are excluded, **unless** you specify 'addNoMagnet'.
0014 #   After constructing this list, the script may also create DST .list files and examine which runs succeeded or failed in generating those lists.
0015 #
0016 # Main Steps:
0017 #   1) FileCatalog extraction (≥1M events; run≥47289 unless 'noRunNumberLimit' is set).
0018 #   2) Calo QA filtering for EMCal/IHCal/OHCal "Golden" classification.
0019 #   3) Runtime >5 minutes.
0020 #   4) MBD livetime >80%.
0021 #   5) Optionally remove runs missing bad tower maps.
0022 #   6) Remove magnet-off runs unless 'addNoMagnet' is specified.
0023 #   7) Produce final run list, optionally create DST .list files, remove any problem segments if requested, then record success/fail.
0024 #   8) If 'noRunNumberLimit' is given, summarize no-limit vs. run≥47289 scenarios.
0025 #
0026 # Usage:
0027 #   ./GoldenRunList_ConductorFile.sh [removeRunsWithMissingMaps] [dontGenerateFileLists] [noRunNumberLimit] [addNoMagnet] [removeBadSegments]
0028 #     - removeRunsWithMissingMaps : Exclude runs missing bad tower maps.
0029 #     - dontGenerateFileLists     : Omit creation of DST .list files entirely.
0030 #     - noRunNumberLimit          : Omit the runnumber≥47289 cutoff.
0031 #     - addNoMagnet               : **Include** runs lacking magnet_on='t' in magnet_info (i.e., skip magnet-off removal).
0032 #     - removeBadSegments         : After DST lists are created, remove specific "bad" segments from each run’s .list file.
0033 ##############################################################################################################################################################################
0034
0035 ########################################
0036 # GLOBAL STYLES
0037 ########################################
0038 BOLD="\e[1m"
0039 RESET="\e[0m"
0040 GREEN="\e[32m"
0041 CYAN="\e[36m"
0042 MAGENTA="\e[35m"
0043 YELLOW="\e[33m"
0044
0045 ########################################
0046 # FUNCTIONS
0047 ########################################
0048
0049 # Prints an error message and exits. If 'dontGenerateFileLists' is set,
0050 # it clarifies that no DST lists would have been generated.
0051 error_exit() {
0052     echo -e "${BOLD}${YELLOW}[ERROR]:${RESET} $1"
0053     if $DONT_GENERATE_FILELISTS; then
0054         echo "Note: 'dontGenerateFileLists' was provided, so no DST lists would have been generated."
0055     fi
0056     exit 1
0057 }
0058
0059 # ------------------------------------------------------------------------------
0060 # PARSE ARGUMENTS
0061 # By default:
0062 #   - Runs with missing maps are kept (unless removeRunsWithMissingMaps).
0063 #   - DST file lists are created (unless dontGenerateFileLists).
0064 #   - Run≥47289 enforced (unless noRunNumberLimit).
0065 #   - Magnet-off runs are removed (unless addNoMagnet).
0066 #   - Problematic segments are *not* removed (unless removeBadSegments).
0067 # ------------------------------------------------------------------------------
0068 parse_arguments() {
0069     REMOVE_MISSING_MAPS=false
0070     DONT_GENERATE_FILELISTS=false
0071     NO_RUNNUMBER_LIMIT=false
0072     ADD_NO_MAGNET=false          # If false => magnet-off runs are removed.
0073     REMOVE_BAD_SEGMENTS=false    # If true => remove problematic segments from run-specific DST .list
0074
0075     for arg in "$@"; do
0076         case "$arg" in
0077             removeRunsWithMissingMaps)
0078                 REMOVE_MISSING_MAPS=true
0079                 echo -e "${BOLD}${CYAN}Argument detected:${RESET} Removing runs missing bad tower maps."
0080                 ;;
0081             dontGenerateFileLists)
0082                 DONT_GENERATE_FILELISTS=true
0083                 echo -e "${BOLD}${CYAN}Argument detected:${RESET} Will not generate DST lists."
0084                 ;;
0085             noRunNumberLimit)
0086                 NO_RUNNUMBER_LIMIT=true
0087                 echo -e "${BOLD}${CYAN}Argument detected:${RESET} No run number lower limit will be applied."
0088                 ;;
0089             addNoMagnet)
0090                 ADD_NO_MAGNET=true
0091                 echo -e "${BOLD}${CYAN}Argument detected:${RESET} Including runs with magnet_off (or missing)."
0092                 ;;
0093             removeBadSegments)
0094                 REMOVE_BAD_SEGMENTS=true
0095                 echo -e "${BOLD}${CYAN}Argument detected:${RESET} Will remove problematic DST segments from each run’s .list files."
0096                 ;;
0097         esac
0098     done
0099
0100     if ! $REMOVE_MISSING_MAPS; then
0101         echo "No removal option detected: Missing-map runs will be kept."
0102     fi
0103     if $ADD_NO_MAGNET; then
0104         echo "Runs with magnet_off or missing will be INCLUDED (via addNoMagnet)."
0105     else
0106         echo "Runs with magnet_off or missing will be EXCLUDED by default."
0107     fi
0108     if ! $REMOVE_BAD_SEGMENTS; then
0109         echo "No bad-segment removal requested: All segments remain."
0110     fi
0111
0112     echo "----------------------------------------"
0113 }
0114
0115 # Removes pre-existing lists, FileLists, and old .list files in ../dst_list.
0116 clean_previous_data() {
0117     echo -e "${BOLD}${GREEN}Cleaning old data (dst_list contents, intermediate files)...${RESET}"
0118
0119     rm -rf list/*
0120     rm -rf FileLists/*
0121     rm -f "${workplace}/../dst_list/"*.list
0122
0123     rm -f "${workplace}/../dst_list/Final_RunNumbers_After_All_Cuts.txt"
0124     rm -f "${workplace}/../dst_list/Final_RunNumbers_After_All_Cuts_ge47289.txt"
0125     rm -rf "${workplace}/../dst_list_scratch"
0126
0127     echo "All old data removed. Starting fresh."
0128     echo "----------------------------------------"
0129 }
0130
0131 # Creates or verifies directories: FileLists/, list/, and ../dst_list.
0132 setup_directories() {
0133     echo -e "${BOLD}${GREEN}Setting up directories...${RESET}"
0134     base_path="${workplace}/.."
0135     mkdir -p FileLists/
0136     mkdir -p "${base_path}/dst_list"
0137     mkdir -p list/
0138
0139     echo "Directories ready under ${base_path}/"
0140     echo "----------------------------------------"
0141 }
0142
0143 # Captures current working directory in 'workplace'.
0144 set_workplace() {
0145     workplace=$(pwd)
0146     echo -e "${BOLD}Working directory:${RESET} $workplace"
0147     echo "----------------------------------------"
0148 }
0149
0150 # (Step 1) Extract runs from FileCatalog with ≥1M events. If noRunNumberLimit
0151 # is false => also require run≥47289. Then intersect with Production_write
0152 # for Golden EMCal/IHCal/OHCal. Stores final in 'list/Full_ppGoldenRunList.txt'.
0153 extract_initial_runs() {
0154     echo -e "${BOLD}${MAGENTA}Step 1:${RESET} Extracting initial runs from databases..."
0155
0156     python_script=$(cat <<EOF
0157 import pyodbc
0158 import sys
0159
0160 no_limit = "NO_LIMIT" in sys.argv
0161
0162 try:
0163     fc_conn = pyodbc.connect("DSN=FileCatalog;UID=phnxrc;READONLY=True")
0164 except:
0165     print("TOTAL_RUNS:0")
0166     sys.exit(1)
0167
0168 fc_cursor = fc_conn.cursor()
0169
0170 if no_limit:
0171     query = """
0172     SELECT runnumber
0173     FROM datasets
0174     WHERE dsttype='DST_CALO_run2pp' AND dataset='ana462_2024p010_v001'
0175     GROUP BY runnumber
0176     HAVING SUM(events) >= 1000000;
0177     """
0178 else:
0179     query = """
0180     SELECT runnumber
0181     FROM datasets
0182     WHERE dsttype='DST_CALO_run2pp' AND dataset='ana462_2024p010_v001'
0183     GROUP BY runnumber
0184     HAVING SUM(events) >= 1000000 AND runnumber >= 47289;
0185     """
0186
0187 fc_cursor.execute(query)
0188 all_runs = [row.runnumber for row in fc_cursor.fetchall()]
0189 all_runs.sort()
0190
0191 with open('list/list_runnumber_all.txt', 'w') as f:
0192     for r in all_runs:
0193         f.write(f"{r}\\n")
0194 print(f"TOTAL_RUNS:{len(all_runs)}")
0195
0196 fc_conn.close()
0197
0198 try:
0199     prod_conn = pyodbc.connect("DSN=Production_write")
0200 except:
0201     print("COMBINED_GOLDEN_RUNS:0")
0202     sys.exit(1)
0203
0204 prod_cursor = prod_conn.cursor()
0205 detectors = ['emcal', 'ihcal', 'ohcal']
0206 golden_runs = set(all_runs)
0207
0208 for d in detectors:
0209     prod_cursor.execute(f"SELECT runnumber FROM goodruns WHERE ({d}_auto).runclass='GOLDEN'")
0210     detector_golden = {row.runnumber for row in prod_cursor.fetchall()}
0211     golden_runs = golden_runs.intersection(detector_golden)
0212
0213 golden_runs = sorted(golden_runs)
0214
0215 with open('list/Full_ppGoldenRunList.txt', 'w') as f:
0216     for r in golden_runs:
0217         f.write(f"{r}\\n")
0218 print(f"COMBINED_GOLDEN_RUNS:{len(golden_runs)}")
0219 prod_conn.close()
0220 EOF
0221     )
0222
0223     # If noRunNumberLimit => pass NO_LIMIT so we skip run≥47289
0224     if $NO_RUNNUMBER_LIMIT; then
0225         python_output=$(python3 <(echo "$python_script") NO_LIMIT)
0226     else
0227         python_output=$(python3 <(echo "$python_script"))
0228     fi
0229
0230     total_runs=$(echo "$python_output" | grep 'TOTAL_RUNS' | cut -d':' -f2)
0231     combined_golden_runs=$(echo "$python_output" | grep 'COMBINED_GOLDEN_RUNS' | cut -d':' -f2)
0232
0233     echo "Summary after initial extraction:"
0234     echo "Total initial runs: ${total_runs:-0}"
0235     echo "Runs after Calo QA: ${combined_golden_runs:-0}"
0236     echo "----------------------------------------"
0237
0238     if [[ "${total_runs:-0}" -eq 0 ]]; then
0239         error_exit "No runs found after initial extraction. No data matches your criteria."
0240     fi
0241 }
0242
0243 # (Step 2) Validate that 'Full_ppGoldenRunList.txt' exists and is non-empty.
0244 validate_golden_list() {
0245     echo -e "${BOLD}${MAGENTA}Step 2:${RESET} Validating golden run list..."
0246     if [[ ! -f "list/Full_ppGoldenRunList.txt" ]]; then
0247         error_exit "list/Full_ppGoldenRunList.txt not found. Possibly no runs qualify."
0248     fi
0249     echo "Golden run list found."
0250     echo "----------------------------------------"
0251 }
0252
0253 # Helper to sum events from .evt in the filelist table (psql queries).
0254 # Groups runs in batches of 100 for efficiency.
0255 get_actual_events_from_evt() {
0256     input_file=$1
0257     total_events=0
0258     batch_size=100
0259     run_numbers=()
0260
0261     while IFS= read -r runnumber; do
0262         [[ -z "$runnumber" ]] && continue
0263         run_numbers+=("$runnumber")
0264         if [[ ${#run_numbers[@]} -ge $batch_size ]]; then
0265             run_list=$(IFS=,; echo "${run_numbers[*]}")
0266             run_numbers=()
0267             query="SELECT SUM(lastevent - firstevent + 1)
0268                    FROM filelist
0269                    WHERE runnumber IN ($run_list)
0270                      AND filename LIKE '%GL1_physics_gl1daq%.evt';"
0271             result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query")
0272             events=$(echo "$result" | xargs)
0273             if [[ "$events" =~ ^[0-9]+$ ]]; then
0274                 total_events=$(echo "$total_events + $events" | bc)
0275             fi
0276         fi
0277     done < "$input_file"
0278
0279     # Leftover runs
0280     if [[ ${#run_numbers[@]} -gt 0 ]]; then
0281         run_list=$(IFS=,; echo "${run_numbers[*]}")
0282         query="SELECT SUM(lastevent - firstevent + 1)
0283                FROM filelist
0284                WHERE runnumber IN ($run_list)
0285                  AND filename LIKE '%GL1_physics_gl1daq%.evt';"
0286         result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query")
0287         events=$(echo "$result" | xargs)
0288         if [[ "$events" =~ ^[0-9]+$ ]]; then
0289             total_events=$(echo "$total_events + $events" | bc)
0290         fi
0291     fi
0292
0293     echo "$total_events"
0294 }
0295
0296 # Announces the incremental cuts to be applied next.
0297 apply_incremental_cuts_header() {
0298     echo "----------------------------------------"
0299     echo -e "${BOLD}${MAGENTA}Applying incremental cuts:${RESET} runtime, livetime, and missing bad tower maps"
0300     echo "----------------------------------------"
0301 }
0302
0303 mbd_scaledown_cut() {
0304     # 1. Input file: the full golden run list from earlier steps.
0305     input_file="list/Full_ppGoldenRunList.txt"
0306
0307     # 2. Define output files.
0308     output_file_mbdscaledown_v1="list/list_runnumber_mbdscaledown_v1.txt"
0309     bad_file_mbdscaledown_v1="list/list_runnumber_bad_mbdscaledown_v1.txt"
0310
0311     # Clear the output files before processing.
0312     > "$output_file_mbdscaledown_v1"
0313     > "$bad_file_mbdscaledown_v1"
0314
0315     total_runs_mbdscaledown_v1=0
0316     runs_dropped_mbdscaledown_v1=0
0317
0318     total_input_runs=$(wc -l < "$input_file")
0319     processed=0
0320
0321     echo "========================================"
0322     echo "[START] Processing MBD Scaledown Cut for $total_input_runs runs from $input_file"
0323     echo "========================================"
0324
0325     while IFS= read -r runnumber; do
0326         ((processed++))
0327         # Skip empty lines.
0328         [[ -z "$runnumber" ]] && continue
0329
0330         echo "[INFO] Processing run: $runnumber (Run $processed of $total_input_runs)"
0331
0332         # Build a query to retrieve the scaledown factor for index 10.
0333         query=$(printf "SELECT scaledown10 FROM gl1_scaledown WHERE runnumber = %d;" "$runnumber")
0334         echo "[DEBUG] Executing query for run $runnumber:"
0335         echo "$query"
0336
0337         # Execute the query using psql.
0338         result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query")
0339         echo "[DEBUG] Query result for run $runnumber: $result"
0340
0341         # Remove any extra whitespace.
0342         scaled_val=$(echo "$result" | xargs)
0343         echo "[DEBUG] Run $runnumber: scaledown10 value = '$scaled_val'"
0344
0345         # If the scaled factor equals -1, mark the run as trigger OFF.
0346         if [[ "$scaled_val" == "-1" ]]; then
0347             echo "[INFO] Run $runnumber has scaledown factor -1: MBD trigger OFF. Skipping run."
0348             echo "$runnumber" >> "$bad_file_mbdscaledown_v1"
0349             (( runs_dropped_mbdscaledown_v1++ ))
0350         else
0351             echo "[INFO] Run $runnumber accepted (scaledown factor = $scaled_val)."
0352             echo "$runnumber" >> "$output_file_mbdscaledown_v1"
0353             (( total_runs_mbdscaledown_v1++ ))
0354         fi
0355
0356         # Optional progress update every 100 runs.
0357         if (( processed % 100 == 0 )); then
0358             echo "[PROGRESS] Processed $processed out of $total_input_runs runs so far..."
0359         fi
0360
0361     done < "$input_file"
0362
0363     echo "========================================"
0364     echo "[SUMMARY] MBD Scaledown Cut Results:"
0365     echo "          Accepted runs: $total_runs_mbdscaledown_v1"
0366     echo "          Dropped runs:  $runs_dropped_mbdscaledown_v1"
0367     echo "          Dropped run numbers:"
0368     cat "$bad_file_mbdscaledown_v1"
0369     echo "========================================"
0370     # The next stage will now use the accepted runs stored in $output_file_mbdscaledown_v1.
0371 }
0372
0373
0374 # (Step 3) Applies runtime >5min by checking (ertimestamp - brtimestamp).
0375 # Also prints progress in blocks of 100 runs processed.
0376 runtime_cut() {
0377     input_file="list/list_runnumber_mbdscaledown_v1.txt"
0378     output_file_duration_v1="list/list_runnumber_runtime_v1.txt"
0379     > "$output_file_duration_v1"
0380
0381     total_runs_duration_v1=0
0382     runs_dropped_runtime_v1=0
0383
0384     total_input_runs=$(wc -l < "$input_file")
0385     processed=0
0386
0387     while IFS= read -r runnumber; do
0388         ((processed++))
0389         [[ -z "$runnumber" ]] && continue
0390
0391         query="SELECT EXTRACT(EPOCH FROM (ertimestamp - brtimestamp))
0392                FROM run
0393                WHERE runnumber = ${runnumber};"
0394         result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query" | tr -d '[:space:]')
0395         duration="$result"
0396
0397         if [[ "$duration" =~ ^[0-9]+(\.[0-9]+)?$ ]] && (( $(echo "$duration > 300" | bc -l) )); then
0398             echo "$runnumber" >> "$output_file_duration_v1"
0399             (( total_runs_duration_v1++ ))
0400         else
0401             (( runs_dropped_runtime_v1++ ))
0402         fi
0403
0404         # Print progress every 100 runs
0405         if (( processed % 100 == 0 )); then
0406             echo "  [Runtime Cut] Processed $processed / $total_input_runs runs so far..."
0407         fi
0408     done < "$input_file"
0409
0410     echo "After runtime cut (>5 mins): $total_runs_duration_v1 runs remain."
0411     echo "Dropped due to runtime: $runs_dropped_runtime_v1"
0412     echo "----------------------------------------"
0413 }
0414
0415 # (Step 4) Applies MBD livetime >80% by checking (live / raw *100).
0416 # Again prints progress every 100 runs.
0417 livetime_cut() {
0418     input_file="list/list_runnumber_runtime_v1.txt"
0419     output_file_livetime_v1="list/list_runnumber_livetime_v1.txt"
0420     bad_file_livetime_v1="list/list_runnumber_bad_livetime_v1.txt"
0421     > "$output_file_livetime_v1"
0422     > "$bad_file_livetime_v1"
0423
0424     total_runs_livetime_v1=0
0425     runs_dropped_livetime_v1=0
0426
0427     total_input_runs=$(wc -l < "$input_file")
0428     processed=0
0429
0430     while IFS= read -r runnumber; do
0431         ((processed++))
0432         [[ -z "$runnumber" ]] && continue
0433
0434         index_to_check=10
0435         query="SELECT raw, live
0436                FROM gl1_scalers
0437                WHERE runnumber = ${runnumber}
0438                  AND index = ${index_to_check};"
0439         result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query")
0440
0441         index_pass=false
0442         while IFS='|' read -r raw live; do
0443             raw=$(echo "$raw" | xargs)
0444             live=$(echo "$live" | xargs)
0445             if [[ "$raw" =~ ^[0-9]+$ && "$live" =~ ^[0-9]+$ && "$raw" -ne 0 ]]; then
0446                 ratio=$(echo "scale=2; $live / $raw * 100" | bc -l)
0447                 if (( $(echo "$ratio >= 80" | bc -l) )); then
0448                     index_pass=true
0449                 fi
0450             fi
0451         done <<< "$result"
0452
0453         if $index_pass; then
0454             echo "$runnumber" >> "$output_file_livetime_v1"
0455             (( total_runs_livetime_v1++ ))
0456         else
0457             echo "$runnumber" >> "$bad_file_livetime_v1"
0458             (( runs_dropped_livetime_v1++ ))
0459         fi
0460
0461         if (( processed % 100 == 0 )); then
0462             echo "  [Livetime Cut] Processed $processed / $total_input_runs runs so far..."
0463         fi
0464     done < "$input_file"
0465
0466     echo "After livetime cut (>80%): $total_runs_livetime_v1 runs remain."
0467     echo "Dropped due to livetime: $runs_dropped_livetime_v1"
0468     echo "----------------------------------------"
0469 }
0470
0471 # (Step 5) If removeRunsWithMissingMaps => remove runs not found in the known
0472 # bad tower map set. Otherwise keep them. Writes to pre-magnet file.
0473 # Prints progress every 100 runs.
0474 missing_bad_tower_maps_step() {
0475     input_file="list/list_runnumber_livetime_v1.txt"
0476     pre_magnet_file="FileLists/Full_ppGoldenRunList_Version1_preMagnet.txt"
0477     cp "$input_file" "$pre_magnet_file"
0478
0479     bad_tower_runs_file="list/list_runs_missing_bad_tower_maps.txt"
0480     available_bad_tower_runs=$(find /cvmfs/sphenix.sdcc.bnl.gov/calibrations/sphnxpro/cdb/CEMC_BadTowerMap \
0481                                -name "*p0*" | cut -d '-' -f2 | cut -dc -f1 | sort | uniq)
0482     echo "$available_bad_tower_runs" > list/available_bad_tower_runs.txt
0483
0484     total_input_runs=$(wc -l < "$input_file")
0485     processed=0
0486
0487     # We'll store "available_bad_tower_runs.txt" lines in an array for membership checks
0488     mapfile -t available_map_array < list/available_bad_tower_runs.txt
0489     declare -A avail_map
0490     for runmap in "${available_map_array[@]}"; do
0491         avail_map["$runmap"]=1
0492     done
0493
0494     > "$bad_tower_runs_file"
0495
0496     # Identify runs missing maps
0497     while IFS= read -r runnumber; do
0498         ((processed++))
0499         [[ -z "$runnumber" ]] && continue
0500
0501         if [[ -z "${avail_map[$runnumber]}" ]]; then
0502             echo "$runnumber" >> "$bad_tower_runs_file"
0503         fi
0504
0505         if (( processed % 100 == 0 )); then
0506             echo "  [Bad Tower Step] Processed $processed / $total_input_runs runs so far..."
0507         fi
0508     done < "$input_file"
0509
0510     total_runs_with_bad_tower=$(grep -Fxf list/available_bad_tower_runs.txt "$input_file" | wc -l)
0511     total_runs_missing_bad_tower=$(wc -l < "$bad_tower_runs_file")
0512
0513     echo "Runs with bad tower maps: $total_runs_with_bad_tower"
0514     echo "Runs missing bad tower maps: $total_runs_missing_bad_tower"
0515     echo "List of missing map runs: $bad_tower_runs_file"
0516     echo "----------------------------------------"
0517
0518     export total_runs_missing_bad_tower
0519     rm list/available_bad_tower_runs.txt
0520
0521     if $REMOVE_MISSING_MAPS; then
0522         echo "Removing runs missing bad tower maps..."
0523         grep -Fxf "$bad_tower_runs_file" -v "$pre_magnet_file" > tmp && mv tmp "$pre_magnet_file"
0524         echo "Removal complete."
0525         echo "----------------------------------------"
0526     fi
0527
0528     cp "$pre_magnet_file" "${workplace}/../dst_list/Final_RunNumbers_After_All_Cuts.txt"
0529     echo "Final run list (pre-magnet step) stored in ${workplace}/../dst_list/Final_RunNumbers_After_All_Cuts.txt"
0530     echo "----------------------------------------"
0531
0532     # If noRunNumberLimit => also create ge47289 final
0533     if $NO_RUNNUMBER_LIMIT; then
0534         awk '$1 >= 47289' "$pre_magnet_file" > FileLists/Full_ppGoldenRunList_ge47289_Version1.txt
0535         cp FileLists/Full_ppGoldenRunList_ge47289_Version1.txt \
0536            "${workplace}/../dst_list/Final_RunNumbers_After_All_Cuts_ge47289.txt"
0537
0538         if $REMOVE_MISSING_MAPS; then
0539             missing_maps_ge47289=$(grep -Fxf FileLists/Full_ppGoldenRunList_ge47289_Version1.txt \
0540                                        "$bad_tower_runs_file" | wc -l)
0541             export missing_maps_ge47289
0542         fi
0543     fi
0544 }
0545
0546 # (Step 6) By default, exclude any runs whose magnet_on != 't'.
0547 # If 'addNoMagnet' is true => we skip this check, keeping magnet-off runs.
0548 # Print progress in blocks of 100 processed.
0549 magnet_check_step() {
0550     if [[ "$ADD_NO_MAGNET" == true ]]; then
0551         echo "addNoMagnet argument was provided: skipping magnet check => keeping magnet-off runs..."
0552         mv FileLists/Full_ppGoldenRunList_Version1_preMagnet.txt FileLists/Full_ppGoldenRunList_Version1.txt
0553         return
0554     fi
0555
0556     echo "Step 6: Removing runs where magnet_on != 't'..."
0557
0558     pre_magnet_file="FileLists/Full_ppGoldenRunList_Version1_preMagnet.txt"
0559     if [[ ! -f "$pre_magnet_file" ]]; then
0560         echo "[ERROR] No 'preMagnet' file found: $pre_magnet_file"
0561         return
0562     fi
0563
0564     magnet_off_file="list/list_runs_no_magnet.txt"
0565     > "$magnet_off_file"
0566
0567     final_list_magnet="FileLists/Full_ppGoldenRunList_Version1.txt"
0568     > "$final_list_magnet"
0569
0570     total_runs_magnet_ok=0
0571     runs_dropped_magnet=0
0572
0573     total_input_runs=$(wc -l < "$pre_magnet_file")
0574     processed=0
0575
0576     while IFS= read -r runnumber; do
0577         ((processed++))
0578         [[ -z "$runnumber" ]] && continue
0579
0580         query="SELECT magnet_on
0581                FROM magnet_info
0582                WHERE runnumber=${runnumber};"
0583         result=$(psql -h sphnxdaqdbreplica -d daq -t -c "$query" | tr -d '[:space:]')
0584
0585         if [[ "$result" == "t" ]]; then
0586             echo "$runnumber" >> "$final_list_magnet"
0587             (( total_runs_magnet_ok++ ))
0588         else
0589             echo "$runnumber" >> "$magnet_off_file"
0590             (( runs_dropped_magnet++ ))
0591         fi
0592
0593         if (( processed % 100 == 0 )); then
0594             echo "  [Magnet Check] Processed $processed / $total_input_runs runs so far..."
0595         fi
0596     done < "$pre_magnet_file"
0597
0598     echo "Magnet On check: $total_runs_magnet_ok runs kept (magnet_on='t')."
0599     echo "Dropped (magnet_off or missing): $runs_dropped_magnet"
0600     echo "List of dropped runs: $magnet_off_file"
0601     echo "----------------------------------------"
0602
0603     # Overwrite final list with magnet-checked version
0604     cp "$final_list_magnet" "${workplace}/../dst_list/Final_RunNumbers_After_All_Cuts.txt"
0605
0606     export total_runs_magnet_off=$runs_dropped_magnet
0607 }
0608
0609 # Creates local .list file from the final text file in FileLists/.
0610 # If nothing is found => warns. Otherwise produce "Full_ppGoldenRunList_Version1.list".
0611 create_list_file() {
0612     echo "Creating final .list file from the final run list..."
0613
0614     if [[ ! -f "FileLists/Full_ppGoldenRunList_Version1.txt" ]]; then
0615         if [[ -f "FileLists/Full_ppGoldenRunList_Version1_preMagnet.txt" ]]; then
0616             mv FileLists/Full_ppGoldenRunList_Version1_preMagnet.txt FileLists/Full_ppGoldenRunList_Version1.txt
0617         fi
0618     fi
0619
0620     if [[ -f "FileLists/Full_ppGoldenRunList_Version1.txt" ]]; then
0621         cp "FileLists/Full_ppGoldenRunList_Version1.txt" Full_ppGoldenRunList_Version1.list
0622         echo ".list file created: Full_ppGoldenRunList_Version1.list"
0623     else
0624         echo "[WARNING] Could not find 'FileLists/Full_ppGoldenRunList_Version1.txt' to create .list!"
0625     fi
0626     echo "----------------------------------------"
0627 }
0628
0629 # Removes old DST lists from ../dst_list to avoid confusion.
0630 clean_old_dst_lists() {
0631     echo "Removing any old DST lists from the parent 'dst_list' directory..."
0632     rm -f "${workplace}/../dst_list/"*.list
0633     echo "Done removing old .list files in dst_list/."
0634     echo "----------------------------------------"
0635 }
0636
0637 # If 'dontGenerateFileLists' is not set => run CreateDstList.pl on the final .list
0638 # to build run-specific .list files. If the .list is missing => skip.
0639 generate_dst_lists() {
0640     if $DONT_GENERATE_FILELISTS; then
0641         echo "[INFO] Skipping DST list generation due to 'dontGenerateFileLists'."
0642         return
0643     fi
0644
0645     echo "Generating DST lists for the main scenario..."
0646     cd "${workplace}/../dst_list"
0647
0648     list_path="${workplace}/Full_ppGoldenRunList_Version1.list"
0649     if [[ ! -f "$list_path" ]]; then
0650         echo "[WARNING] Could not find final .list file at: $list_path"
0651         echo "No DST lists will be created."
0652     else
0653         CreateDstList.pl --build ana462 --cdb 2024p010_v001 DST_JET_run2pp --list "$list_path"
0654         echo "DST lists generated under ${workplace}/../dst_list"
0655     fi
0656
0657     if $NO_RUNNUMBER_LIMIT; then
0658         echo "No separate scratch folder is used for the ≥47289 subset."
0659     fi
0660
0661     echo "----------------------------------------"
0662     cd "$workplace"
0663 }
0664
0665 remove_problematic_segments() {
0666     if ! $REMOVE_BAD_SEGMENTS; then
0667         echo "[INFO] Skipping problematic-segment removal (removeBadSegments not set)."
0668         return
0669     fi
0670
0671     # Internal Boolean: if true, generate the bad segment file using the embedded Python script.
0672     # If false (the default), use the pre-generated file.
0673     generateBadSegmentFile=false
0674
0675     if $generateBadSegmentFile; then
0676         # Define where the problematic segments file will be generated.
0677         segments_file="list/list_problematic_segments.txt"
0678
0679         echo "========================================"
0680         echo "[INFO] Generating problematic segments file using embedded Python script... this may take some time..."
0681         echo "[DEBUG] Starting Python script execution..."
0682         python3 <<'EOF' > "$segments_file"
0683 import os
0684 import sys
0685 import subprocess
0686 import argparse
0687
0688 # Define a debug print function that flushes immediately.
0689 def dprint(*args, **kwargs):
0690     print(*args, **kwargs)
0691     sys.stdout.flush()
0692
0693 # Create the "lists" folder for the primary output file.
0694 lists_dir = os.path.join(os.getcwd(), "lists")
0695 os.makedirs(lists_dir, exist_ok=True)
0696 dprint("Created/verified 'lists' directory:", lists_dir)
0697
0698 # Create the "list" folder for the segments output file.
0699 list_dir = os.path.join(os.getcwd(), "list")
0700 os.makedirs(list_dir, exist_ok=True)
0701 dprint("Created/verified 'list' directory:", list_dir)
0702
0703 parser = argparse.ArgumentParser()
0704 parser.add_argument(
0705     '-i', '--log-dir',
0706     type=str,
0707     default='/sphenix/data/data02/sphnxpro/run2pp/calologs/ana446_2024p007',
0708     help='Location of the log directory.'
0709 )
0710 # Output file for logs (stays in the lists folder)
0711 parser.add_argument(
0712     '-o', '--output',
0713     type=str,
0714     default=os.path.join(lists_dir, 'bad-calologs-ana446-2024p007.list'),
0715     help='Output list file of all files that have an error.'
0716 )
0717 # Output file for segments (placed in the list folder)
0718 parser.add_argument(
0719     '-o2', '--output-segments',
0720     type=str,
0721     default=os.path.join(list_dir, 'list_problematic_segments.txt'),
0722     help='Output list file of all segments that have an error.'
0723 )
0724
0725 args = parser.parse_args()
0726
0727 if __name__ == '__main__':
0728     log_dir = os.path.realpath(args.log_dir)
0729     output = os.path.realpath(args.output)
0730     output_segments = os.path.realpath(args.output_segments)
0731
0732     dprint(f'Log Dir: {log_dir}')
0733     dprint(f'Output: {output}')
0734     dprint(f'Output Segments: {output_segments}')
0735
0736     # Remove the output file if it already exists.
0737     if os.path.exists(output):
0738         os.remove(output)
0739         dprint(f'File {output} deleted successfully.')
0740
0741     # Generate the list of log files that contain "Error".
0742     command = f'''while read d; do
0743         echo "Processing directory: $d"
0744         /direct/sphenix+u/anarde/.cargo/bin/rg -l "Error" $d >> {output}
0745         echo "Current log count in {output}: $(wc -l {output})"
0746     done < <(readlink -f {log_dir}/*)'''
0747     dprint("Running command to generate error log file...")
0748     subprocess.run(['bash', '-c', command], check=True)
0749     dprint("Completed generating error log file.")
0750
0751     # Extract only the run-segments from the list of log files.
0752     command = f"awk -F'/' '{{print $NF}}' {output} | cut -d'-' -f2,3 | cut -d'.' -f1 | sort > {output_segments}"
0753     dprint("Running command to extract run-segments...")
0754     subprocess.run(['bash', '-c', command], check=True)
0755     dprint("Extracted run-segments into", output_segments)
0756 EOF
0757     else
0758         segments_file="/sphenix/user/patsfan753/tutorials/tutorials/CaloDataAnaRun24pp/runListGeneration/bad-ana446-2024p007-segments.list"
0759         echo "========================================"
0760         echo "[INFO] Using pre-generated bad segments file: $segments_file"
0761     fi
0762
0763     echo "========================================"
0764     echo "[INFO] Problematic segments file is: $segments_file"
0765     if [[ ! -f "$segments_file" ]]; then
0766         echo "[WARNING] Could not find segments file at $segments_file. No segments removed."
0767         return
0768     fi
0769
0770     echo "[INFO] Starting removal of problematic segments from DST lists..."
0771     echo "========================================"
0772
0773     while IFS= read -r line; do
0774         [[ -z "$line" ]] && continue
0775
0776         echo "[DEBUG] Processing segment line: '$line'"
0777         # Each line is expected to be of the form "00046010-00003".
0778         run_part="${line%-*}"   # e.g. 00046010
0779         seg_part="${line#*-}"    # e.g. 00003
0780         echo "[DEBUG] Extracted run number: '$run_part'"
0781         echo "[DEBUG] Extracted segment number: '$seg_part'"
0782
0783         # Determine the corresponding DST .list file to modify.
0784         listfile="${workplace}/../dst_list/dst_jet_run2pp-${run_part}.list"
0785         echo "[DEBUG] Looking for DST list file: $listfile"
0786         if [[ -f "$listfile" ]]; then
0787             # The DST list lines are expected to contain something like:
0788             # DST_JET_run2pp_ana462_2024p010_v001-00046010-00003.root
0789             pattern="${run_part}-${seg_part}.root"
0790             echo "[DEBUG] Searching for pattern '$pattern' in $listfile"
0791             lines_to_remove=$(grep "$pattern" "$listfile")
0792             if [[ -n "$lines_to_remove" ]]; then
0793                 echo "[INFO] Found the following lines in $listfile matching pattern '$pattern':"
0794                 echo "$lines_to_remove"
0795                 grep -v "$pattern" "$listfile" > "${listfile}.tmp" && mv "${listfile}.tmp" "$listfile"
0796                 echo "[INFO] Removed segment '$pattern' from $listfile."
0797             else
0798                 echo "[INFO] No matching lines found for segment '$pattern' in $listfile. Skipping removal."
0799             fi
0800         else
0801             echo "[WARNING] DST list file $listfile does not exist. Skipping segment '$seg_part' for run '$run_part'."
0802         fi
0803         echo "----------------------------------------"
0804     done < "$segments_file"
0805
0806     echo "[INFO] Done removing problematic segments."
0807     echo "========================================"
0808 }
0809
0810
0811 # After DST .list files are generated, check which runs successfully produced
0812 # run-specific .list files (DST_JET_run2pp-xxxx.list).
0813 # Summarize success/fail in text files.
0814 apply_createDstList_cut() {
0815     echo "Collecting CreateDST File List success/failure for the main scenario..."
0816
0817     if $DONT_GENERATE_FILELISTS; then
0818         echo "[INFO]: 'dontGenerateFileLists' was provided, so no DST creation was done for main scenario."
0819         export total_runs_createDst_success=0
0820         export runs_dropped_createDst=0
0821         export actual_events_after_createDst=0
0822         export total_runs_after_createDst=0
0823         return
0824     fi
0825
0826     final_stage4_file="FileLists/Full_ppGoldenRunList_Version1.txt"
0827     if [[ ! -f "$final_stage4_file" ]]; then
0828         echo "[ERROR]: Cannot find final stage file: $final_stage4_file"
0829         export total_runs_createDst_success=0
0830         export runs_dropped_createDst=0
0831         export actual_events_after_createDst=0
0832         export total_runs_after_createDst=0
0833         return
0834     fi
0835
0836     success_file="list/list_runnumber_createDstSuccess.txt"
0837     failure_file="list/list_runnumber_createDstFailure.txt"
0838     > "$success_file"
0839     > "$failure_file"
0840
0841     mapfile -t final_stage_runs < "$final_stage4_file"
0842
0843     base_path="${workplace}/../dst_list"
0844     created_run_nums=()
0845
0846     # Gather runs for which a dst_jet_run2pp-xxxx.list was indeed produced.
0847     for f in "${base_path}/dst_jet_run2pp-"*.list; do
0848         [ -e "$f" ] || continue
0849         bn=$(basename "$f" .list)
0850         runnum_str=${bn#dst_jet_run2pp-}
0851         if [[ "$runnum_str" =~ ^0*([0-9]+)$ ]]; then
0852             runnum=${BASH_REMATCH[1]}
0853             created_run_nums+=("$runnum")
0854         fi
0855     done
0856
0857     total_runs_createDst_success=0
0858     runs_dropped_createDst=0
0859     declare -A in_created
0860     for rn in "${created_run_nums[@]}"; do
0861         in_created["$rn"]=1
0862     done
0863
0864     for runnumber in "${final_stage_runs[@]}"; do
0865         if [[ -n "${in_created[$runnumber]}" ]]; then
0866             echo "$runnumber" >> "$success_file"
0867             (( total_runs_createDst_success++ ))
0868         else
0869             echo "$runnumber" >> "$failure_file"
0870             (( runs_dropped_createDst++ ))
0871         fi
0872     done
0873
0874     echo "Runs with successful .list creation: $total_runs_createDst_success"
0875     echo "Runs with no .list file: $runs_dropped_createDst"
0876     echo "List of runs that failed:  $failure_file"
0877     echo "List of runs that succeeded: $success_file"
0878     echo "----------------------------------------"
0879
0880     # Summation of events for runs that succeeded
0881     actual_events_after_createDst=$(get_actual_events_from_evt "$success_file")
0882     total_runs_after_createDst=$total_runs_createDst_success
0883
0884     cp "$success_file" "FileLists/Full_ppGoldenRunList_Version1_DSTsuccess.txt"
0885
0886     export total_runs_createDst_success
0887     export runs_dropped_createDst
0888     export actual_events_after_createDst
0889     export total_runs_after_createDst
0890
0891     # If noRunNumberLimit => do the same for the run≥47289 scenario
0892     if $NO_RUNNUMBER_LIMIT; then
0893         echo "Collecting CreateDST File List success/failure for the '≥47289' scenario..."
0894         final_stage4_file_ge47289="FileLists/Full_ppGoldenRunList_ge47289_Version1.txt"
0895         if [[ ! -f "$final_stage4_file_ge47289" ]]; then
0896             echo "[ERROR]: Cannot find the ge47289 final list: $final_stage4_file_ge47289"
0897             export total_runs_createDst_success_ge47289=0
0898             export runs_dropped_createDst_ge47289=0
0899             export actual_events_after_createDst_ge47289=0
0900             export total_runs_after_createDst_ge47289=0
0901         else
0902             success_file_ge47289="list/list_runnumber_createDstSuccess_ge47289.txt"
0903             failure_file_ge47289="list/list_runnumber_createDstFailure_ge47289.txt"
0904             > "$success_file_ge47289"
0905             > "$failure_file_ge47289"
0906
0907             mapfile -t final_stage_runs_ge47289 < "$final_stage4_file_ge47289"
0908
0909             base_path_scratch="${workplace}/../dst_list"
0910             created_run_nums_ge47289=()
0911             for f in "${base_path_scratch}/dst_jet_run2pp-"*.list; do
0912                 [ -e "$f" ] || continue
0913                 bn=$(basename "$f" .list)
0914                 runnum_str=${bn#dst_jet_run2pp-}
0915                 if [[ "$runnum_str" =~ ^0*([0-9]+)$ ]]; then
0916                     runnum=${BASH_REMATCH[1]}
0917                     created_run_nums_ge47289+=("$runnum")
0918                 fi
0919             done
0920
0921             total_runs_createDst_success_ge47289=0
0922             runs_dropped_createDst_ge47289=0
0923             declare -A in_created_ge47289
0924             for rn in "${created_run_nums_ge47289[@]}"; do
0925                 in_created_ge47289["$rn"]=1
0926             done
0927
0928             for runnumber in "${final_stage_runs_ge47289[@]}"; do
0929                 if [[ -n "${in_created_ge47289[$runnumber]}" ]]; then
0930                     echo "$runnumber" >> "$success_file_ge47289"
0931                     (( total_runs_createDst_success_ge47289++ ))
0932                 else
0933                     echo "$runnumber" >> "$failure_file_ge47289"
0934                     (( runs_dropped_createDst_ge47289++ ))
0935                 fi
0936             done
0937
0938             echo "≥47289 scenario: runs with successful .list creation: $total_runs_createDst_success_ge47289"
0939             echo "≥47289 scenario: runs with no .list file: $runs_dropped_createDst_ge47289"
0940             echo "≥47289 scenario: List of runs that failed:  $failure_file_ge47289"
0941             echo "≥47289 scenario: List of runs that succeeded: $success_file_ge47289"
0942             echo "----------------------------------------"
0943
0944             actual_events_after_createDst_ge47289=$(get_actual_events_from_evt "$success_file_ge47289")
0945             total_runs_after_createDst_ge47289=$total_runs_createDst_success_ge47289
0946
0947             cp "$success_file_ge47289" "FileLists/Full_ppGoldenRunList_ge47289_Version1_DSTsuccess.txt"
0948
0949             export total_runs_createDst_success_ge47289
0950             export runs_dropped_createDst_ge47289
0951             export actual_events_after_createDst_ge47289
0952             export total_runs_after_createDst_ge47289
0953         fi
0954     fi
0955 }
0956
0957 compute_event_counts() {
0958     pre_magnet_file="FileLists/Full_ppGoldenRunList_Version1_preMagnet.txt"
0959     final_file="FileLists/Full_ppGoldenRunList_Version1.txt"
0960
0961     #############################################
0962     # Stage 1: Initial Extraction
0963     #############################################
0964     actual_events_before_cuts=$(get_actual_events_from_evt 'list/list_runnumber_all.txt')
0965     total_runs_before_cuts=$(wc -l < 'list/list_runnumber_all.txt')
0966     actual_events_initial=$actual_events_before_cuts
0967
0968     #############################################
0969     # Stage 2: Calo QA (Golden Run List)
0970     #############################################
0971     actual_events_calo_qa=$(get_actual_events_from_evt 'list/Full_ppGoldenRunList.txt')
0972     runs_after_calo_qa=$(wc -l < 'list/Full_ppGoldenRunList.txt')
0973
0974     ####################################################
0975     # Stage 3: MBD Scaledown Cut (MBD Trigger On Check)
0976     ####################################################
0977     actual_events_mbd=$(get_actual_events_from_evt 'list/list_runnumber_mbdscaledown_v1.txt')
0978     runs_after_mbd=$(wc -l < 'list/list_runnumber_mbdscaledown_v1.txt')
0979     dropped_runs_mbd=$(wc -l < 'list/list_runnumber_bad_mbdscaledown_v1.txt')
0980     actual_events_mbd_dropped=$(get_actual_events_from_evt 'list/list_runnumber_bad_mbdscaledown_v1.txt')
0981
0982     #####################################
0983     # Stage 4: Runtime Cut (>5 minutes)
0984     #####################################
0985     actual_events_after_runtime=$(get_actual_events_from_evt 'list/list_runnumber_runtime_v1.txt')
0986     runs_after_runtime=$(wc -l < 'list/list_runnumber_runtime_v1.txt')
0987
0988     ##################################
0989     # Stage 5: Livetime Cut (>80%)
0990     ##################################
0991     actual_events_after_livetime=$(get_actual_events_from_evt 'list/list_runnumber_livetime_v1.txt')
0992     runs_after_livetime=$(wc -l < 'list/list_runnumber_livetime_v1.txt')
0993
0994     ##########################################
0995     # Stage 6: Pre-Magnet (Bad Tower Removal)
0996     ##########################################
0997     if [[ -f "$pre_magnet_file" ]]; then
0998         actual_events_after_badtower=$(get_actual_events_from_evt "$pre_magnet_file")
0999         runs_after_badtower=$(wc -l < "$pre_magnet_file")
1000     else
1001         actual_events_after_badtower=0
1002         runs_after_badtower=0
1003     fi
1004
1005     ###################################
1006     # Stage 7: Final (Magnet On Check)
1007     ###################################
1008     if [[ -f "$final_file" ]]; then
1009         actual_events_after_magnet=$(get_actual_events_from_evt "$final_file")
1010         runs_after_magnet=$(wc -l < "$final_file")
1011     else
1012         actual_events_after_magnet=0
1013         runs_after_magnet=0
1014     fi
1015
1016     ##############################################
1017     # Stage 8: DST Creation Success (Final Output)
1018     ##############################################
1019     if [[ -f FileLists/Full_ppGoldenRunList_Version1_DSTsuccess.txt ]]; then
1020         actual_events_after_createDst=$(get_actual_events_from_evt "FileLists/Full_ppGoldenRunList_Version1_DSTsuccess.txt")
1021         runs_after_createDst=$(wc -l < "FileLists/Full_ppGoldenRunList_Version1_DSTsuccess.txt")
1022     else
1023         actual_events_after_createDst=0
1024         runs_after_createDst=0
1025     fi
1026
1027     # Use Stage 1 (initial extraction) as the baseline.
1028     STAGE1_RUNS=$total_runs_before_cuts
1029     [[ "$STAGE1_RUNS" -eq 0 ]] && STAGE1_RUNS=1
1030     [[ "$actual_events_before_cuts" -eq 0 ]] && actual_events_before_cuts=1
1031
1032     # Compute run-based percentages
1033     percent_runs_calo_qa=$(echo "scale=2; 100.0*$runs_after_calo_qa/$STAGE1_RUNS" | bc)
1034     percent_runs_mbd=$(echo "scale=2; 100.0*$runs_after_mbd/$STAGE1_RUNS" | bc)
1035     percent_runs_runtime=$(echo "scale=2; 100.0*$runs_after_runtime/$STAGE1_RUNS" | bc)
1036     percent_runs_livetime=$(echo "scale=2; 100.0*$runs_after_livetime/$STAGE1_RUNS" | bc)
1037     percent_runs_badtower=$(echo "scale=2; 100.0*$runs_after_badtower/$STAGE1_RUNS" | bc)
1038     percent_runs_magnet=$(echo "scale=2; 100.0*$runs_after_magnet/$STAGE1_RUNS" | bc)
1039     percent_runs_createDst=$(echo "scale=2; 100.0*$runs_after_createDst/$STAGE1_RUNS" | bc)
1040
1041     # Compute event-based percentages
1042     percent_actual_events_calo_qa=$(echo "scale=2; 100.0*$actual_events_calo_qa/$actual_events_before_cuts" | bc)
1043     percent_actual_events_mbd=$(echo "scale=2; 100.0*$actual_events_mbd/$actual_events_before_cuts" | bc)
1044     percent_actual_events_after_runtime=$(echo "scale=2; 100.0*$actual_events_after_runtime/$actual_events_before_cuts" | bc)
1045     percent_actual_events_after_livetime=$(echo "scale=2; 100.0*$actual_events_after_livetime/$actual_events_before_cuts" | bc)
1046     percent_actual_events_after_badtower=$(echo "scale=2; 100.0*$actual_events_after_badtower/$actual_events_before_cuts" | bc)
1047     percent_actual_events_after_magnet=$(echo "scale=2; 100.0*$actual_events_after_magnet/$actual_events_before_cuts" | bc)
1048     percent_events_after_createDst=$(echo "scale=2; 100.0*$actual_events_after_createDst/$actual_events_before_cuts" | bc)
1049
1050     # Export variables for use in the summary
1051     export STAGE1_RUNS
1052     export runs_after_calo_qa runs_after_mbd runs_after_runtime runs_after_livetime runs_after_badtower runs_after_magnet runs_after_createDst
1053     export actual_events_before_cuts total_runs_before_cuts actual_events_initial
1054     export actual_events_calo_qa actual_events_mbd actual_events_after_runtime actual_events_after_livetime actual_events_after_badtower actual_events_after_magnet actual_events_after_createDst
1055     export percent_runs_calo_qa percent_runs_mbd percent_runs_runtime percent_runs_livetime percent_runs_badtower percent_runs_magnet percent_runs_createDst
1056     export percent_actual_events_calo_qa percent_actual_events_mbd percent_actual_events_after_runtime percent_actual_events_after_livetime percent_actual_events_after_badtower percent_actual_events_after_magnet percent_events_after_createDst
1057 }
1058 print_final_totals() {
1059     # Path to the final golden run list
1060     final_list="${workplace}/../dst_list/Final_RunNumbers_After_All_Cuts.txt"
1061
1062     # Check that the final list exists
1063     if [[ ! -f "$final_list" ]]; then
1064         echo "[ERROR] Final golden run list not found: $final_list"
1065         return 1
1066     fi
1067
1068     total_runtime=0
1069     total_live=0
1070
1071     echo "========================================"
1072     echo "[INFO] Calculating final totals for all runs in the final golden run list..."
1073     echo "========================================"
1074
1075     while IFS= read -r run; do
1076         # Skip empty lines.
1077         [[ -z "$run" ]] && continue
1078
1079         # Query the run duration (in seconds) for this run.
1080         runtime=$(psql -h sphnxdaqdbreplica -d daq -t -c "SELECT EXTRACT(EPOCH FROM (ertimestamp - brtimestamp)) FROM run WHERE runnumber = $run;" | xargs)
1081         # If runtime is not a valid number, treat it as zero.
1082         if ! [[ "$runtime" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
1083             runtime=0
1084         fi
1085
1086         # Query the MBD live count for this run at index=10.
1087         live=$(psql -h sphnxdaqdbreplica -d daq -t -c "SELECT live FROM gl1_scalers WHERE runnumber = $run AND index = 10;" | xargs)
1088         if ! [[ "$live" =~ ^[0-9]+$ ]]; then
1089             live=0
1090         fi
1091
1092         total_runtime=$(echo "$total_runtime + $runtime" | bc)
1093         total_live=$(echo "$total_live + $live" | bc)
1094
1095     done < "$final_list"
1096
1097     echo "========================================"
1098     echo "FINAL TOTALS:"
1099     echo "Total running time for all runs (in seconds): $total_runtime"
1100     echo "Total MBD N&S>=1 trigger live counts:       $total_live"
1101     echo "========================================"
1102 }
1103
1104
1105 final_summary() {
1106     echo -e "${BOLD}${MAGENTA}========================================${RESET}"
1107     echo -e "${BOLD}${MAGENTA}Final Summary (Version 1)${RESET}"
1108     echo -e "${MAGENTA}----------------------------------------${RESET}"
1109     printf "%-50s | %-35s | %-25s\n" "Stage" ".evt File Events" "Runs"
1110     echo "--------------------------------------------------|-------------------------------------|-------------------------"
1111
1112     if $NO_RUNNUMBER_LIMIT; then
1113         stage1_label="≥1M events"
1114     else
1115         stage1_label="≥47289 & ≥1M events"
1116     fi
1117
1118     # Stage 1: Initial Extraction
1119     printf "%-50s | %-35s | %-25s\n" \
1120         "1) $stage1_label" \
1121         "${actual_events_initial} (100%)" \
1122         "${STAGE1_RUNS} (100%)"
1123
1124     # Stage 2: Golden EMCal/HCal
1125     printf "%-50s | %-35s | %-25s\n" \
1126         "2) Golden EMCal/HCal" \
1127         "${actual_events_calo_qa} (${percent_actual_events_calo_qa}%)" \
1128         "${runs_after_calo_qa} (${percent_runs_calo_qa}%)"
1129
1130     # Stage 3: MBD Trigger (Scaledown Cut)
1131     printf "%-50s | %-35s | %-25s\n" \
1132         "3) MBD Trigger (Scaledown Cut)" \
1133         "${actual_events_mbd} (${percent_actual_events_mbd}%)" \
1134         "${runs_after_mbd} (${percent_runs_mbd}%)"
1135     # Also print a separate line for the dropped runs due to a scaled value of -1:
1136     printf "%-50s | %-35s | %-25s\n" \
1137         "    Dropped (MBD Trigger Off)" \
1138         "${actual_events_mbd_dropped}" \
1139         "${dropped_runs_mbd}"
1140
1141     # Stage 4: Runtime Cut (>5 minutes)
1142     printf "%-50s | %-35s | %-25s\n" \
1143         "4) > 5 minutes runtime" \
1144         "${actual_events_after_runtime} (${percent_actual_events_after_runtime}%)" \
1145         "${runs_after_runtime} (${percent_runs_runtime}%)"
1146
1147     # Stage 5: MB Livetime >80%
1148     printf "%-50s | %-35s | %-25s\n" \
1149         "5) MB livetime > 80%" \
1150         "${actual_events_after_livetime} (${percent_actual_events_after_livetime}%)" \
1151         "${runs_after_livetime} (${percent_runs_livetime}%)"
1152
1153     # Stage 6: Pre-Magnet (Bad Tower Map Availability)
1154     if $REMOVE_MISSING_MAPS; then
1155         step_label="6) Bad Tower Map Available (Removed)"
1156     else
1157         step_label="6) Bad Tower Map Available (Not Removed)"
1158     fi
1159     printf "%-50s | %-35s | %-25s\n" \
1160         "$step_label" \
1161         "${actual_events_after_badtower} (${percent_actual_events_after_badtower}%)" \
1162         "${runs_after_badtower} (${percent_runs_badtower}%)"
1163
1164     # Stage 7: Final (Magnet On Check)
1165     if [[ "$ADD_NO_MAGNET" == false ]]; then
1166         printf "%-50s | %-35s | %-25s\n" \
1167             "7) Magnet On" \
1168             "${actual_events_after_magnet} (${percent_actual_events_after_magnet}%)" \
1169             "${runs_after_magnet} (${percent_runs_magnet}%)"
1170     fi
1171
1172     # Stage 8: DST Creation Success
1173     printf "%-50s | %-35s | %-25s\n" \
1174         "8) CreateDST File List Success" \
1175         "${actual_events_after_createDst} (${percent_events_after_createDst}%)" \
1176         "${runs_after_createDst} (${percent_runs_createDst}%)"
1177
1178     # Additional notes (e.g. missing maps, magnet-off runs)
1179     if $REMOVE_MISSING_MAPS; then
1180         map_note="Removed from final DST"
1181     else
1182         map_note="Kept in final DST"
1183     fi
1184     printf "%-50s | %-35s | %-25s\n" \
1185         "Missing Map Runs" \
1186         "-" \
1187         "${total_runs_missing_bad_tower} (${map_note})"
1188
1189     if [[ "$ADD_NO_MAGNET" == false ]]; then
1190         if [[ -z "$total_runs_magnet_off" ]]; then
1191             total_runs_magnet_off=0
1192         fi
1193         mag_note="Removed"
1194         printf "%-50s | %-35s | %-25s\n" \
1195             "No Magnet-On Runs" \
1196             "-" \
1197             "${total_runs_magnet_off} (${mag_note})"
1198     fi
1199
1200     echo "================================================="
1201     echo -e "${BOLD}${MAGENTA}========================================${RESET}"
1202     echo ""
1203     if $REMOVE_MISSING_MAPS; then
1204         echo "Final golden run list (missing maps removed):"
1205     else
1206         echo "Final golden run list (with runs missing maps included):"
1207     fi
1208     echo " --> ${workplace}/../dst_list/Final_RunNumbers_After_All_Cuts.txt"
1209     echo "Done."
1210
1211     # (Optional) Additional comparison table if NO_RUNNUMBER_LIMIT is true…
1212     if $NO_RUNNUMBER_LIMIT; then
1213         echo ""
1214         echo "----------------------------------------"
1215         echo -e "${BOLD}${MAGENTA}Additional Comparison (Before & After Cuts):${RESET} No-limit vs. ≥47289 scenario"
1216         echo ""
1217         cat <<EOCOMPARISON
1218
1219 Metric                                  | NoLimit(All)                | ≥47289                    | ΔValue           | %Lost
1220 --------------------------------------------------------------------------------------------------------------
1221 EOCOMPARISON
1222
1223         ev_all_initial=${actual_events_before_cuts:-0}
1224         ev_ge_initial=${actual_events_before_cuts_ge47289:-0}
1225         diff_ev_initial=$(echo "$ev_all_initial - $ev_ge_initial" | bc)
1226         if (( ev_all_initial > 0 )); then
1227             perc_ev_lost_initial=$(echo "scale=2; 100.0*$diff_ev_initial/$ev_all_initial" | bc)
1228         else
1229             perc_ev_lost_initial=0
1230         fi
1231
1232         run_all_initial=${total_runs_before_cuts:-0}
1233         run_ge_initial=${total_runs_before_cuts_ge47289:-0}
1234         diff_run_initial=$(echo "$run_all_initial - $run_ge_initial" | bc)
1235         if (( run_all_initial > 0 )); then
1236             perc_run_lost_initial=$(echo "scale=2; 100.0*$diff_run_initial/$run_all_initial" | bc)
1237         else
1238             perc_run_lost_initial=0
1239         fi
1240
1241         printf "%-40s | %-20s | %-20s | %-20s | %-20s\n" \
1242           "Events Before All Cuts" "$ev_all_initial" "$ev_ge_initial" "$diff_ev_initial" "${perc_ev_lost_initial}%"
1243         printf "%-40s | %-20s | %-20s | %-20s | %-20s\n" \
1244           "Runs Before All Cuts"   "$run_all_initial" "$run_ge_initial" "$diff_run_initial" "${perc_run_lost_initial}%"
1245
1246         echo ""
1247         # After All Cuts => DST
1248         ev_all=${actual_events_after_createDst:-0}
1249         ev_ge=${actual_events_after_createDst_ge47289:-0}
1250         diff_ev=$(echo "$ev_all - $ev_ge" | bc)
1251         if (( ev_all > 0 )); then
1252             perc_ev_lost=$(echo "scale=2; 100.0*$diff_ev/$ev_all" | bc)
1253         else
1254             perc_ev_lost=0
1255         fi
1256
1257         run_all=${runs_after_createDst:-0}
1258         run_ge=${runs_after_createDst_ge47289:-0}
1259         diff_run=$(echo "$run_all - $run_ge" | bc)
1260         if (( run_all > 0 )); then
1261             perc_run_lost=$(echo "scale=2; 100.0*$diff_run/$run_all" | bc)
1262         else
1263             perc_run_lost=0
1264         fi
1265
1266         printf "%-40s | %-20s | %-20s | %-20s | %-20s\n" \
1267           "Events After All Cuts" "$ev_all" "$ev_ge" "$diff_ev" "${perc_ev_lost}%"
1268         printf "%-40s | %-20s | %-20s | %-20s | %-20s\n" \
1269           "Runs After All Cuts"   "$run_all" "$run_ge" "$diff_run" "${perc_run_lost}%"
1270
1271         echo ""
1272         echo "≥47289: DST successes = ${runs_after_createDst_ge47289:-0}, events = ${actual_events_after_createDst_ge47289:-0}"
1273
1274         if $REMOVE_MISSING_MAPS; then
1275             missing_maps_all=${total_runs_missing_bad_tower:-0}
1276             missing_maps_ge47289=${missing_maps_ge47289:-0}
1277
1278             diff_maps=$(echo "$missing_maps_all - $missing_maps_ge47289" | bc)
1279             if (( missing_maps_all > 0 )); then
1280                 perc_maps_lost=$(echo "scale=2; 100.0*$diff_maps/$missing_maps_all" | bc)
1281             else
1282                 perc_maps_lost=0
1283             fi
1284
1285             echo ""
1286             echo "Differences in Missing Bad Tower Map Runs:"
1287             printf "%-40s | %-20s | %-20s | %-20s | %-20s\n" \
1288               "Missing Map Runs" \
1289               "$missing_maps_all" \
1290               "$missing_maps_ge47289" \
1291               "$diff_maps" \
1292               "${perc_maps_lost}%"
1293         fi
1294     fi
1295 }
1296
1297 ########################################
1298 # MAIN EXECUTION FLOW
1299 ########################################
1300
1301 echo -e "${BOLD}${GREEN}========================================${RESET}"
1302 echo -e "${BOLD}${GREEN}Starting the Golden Run List Generation${RESET}"
1303 echo -e "${BOLD}${GREEN}========================================${RESET}"
1304
1305 parse_arguments "$@"
1306 set_workplace
1307 setup_directories
1308 clean_previous_data
1309
1310 extract_initial_runs
1311 validate_golden_list
1312
1313 apply_incremental_cuts_header
1314 mbd_scaledown_cut
1315 runtime_cut
1316 livetime_cut
1317 missing_bad_tower_maps_step
1318
1319 magnet_check_step
1320
1321 create_list_file
1322 clean_old_dst_lists
1323
1324 if ! $DONT_GENERATE_FILELISTS; then
1325     generate_dst_lists
1326     # ------------------------------------------------------------------------
1327     # NEW STEP: remove_problematic_segments, if 'removeBadSegments' was set
1328     # This must happen AFTER we generate the .list files, or there is nothing to edit!
1329     # ------------------------------------------------------------------------
1330     remove_problematic_segments
1331 fi
1332
1333 apply_createDstList_cut
1334 compute_event_counts
1335 final_summary
1336 print_final_totals