File indexing completed on 2025-08-05 08:13:08
0001
0002 import subprocess
0003 import argparse
0004 import os
0005
0006 parser = argparse.ArgumentParser()
0007
0008 parser.add_argument('-i', '--log-dir', type=str, default='/sphenix/data/data02/sphnxpro/run2pp/calologs/ana446_2024p007', help='Location of the log directory.')
0009 parser.add_argument('-d', '--dataset', type=str, default='ana446_2024p007', help='Production Dataset.')
0010 parser.add_argument('-d2', '--dsttype', type=str, default='DST_TRIGGERED_EVENT_run2pp', help='Production Dst Type.')
0011 parser.add_argument('-o', '--output', type=str, default='bad-calologs-ana446-2024p007.list', help='Output list file of all files that have an error.')
0012 parser.add_argument('-o2', '--output-segments', type=str, default='bad-ana446-2024p007-segments.list', help='Output list file of all segments that have an error.')
0013 parser.add_argument('-o3', '--output-dir', type=str, default='bad-segments', help='Output directory to save all files.')
0014
0015 args = parser.parse_args()
0016
0017 if __name__ == '__main__':
0018 log_dir = os.path.realpath(args.log_dir)
0019 dataset = args.dataset
0020 dsttype = args.dsttype
0021 output = args.output
0022 output_segments = args.output_segments
0023 output_dir = os.path.realpath(args.output_dir)
0024
0025 print(f'Log Dir: {log_dir}')
0026 print(f'Dataset: {dataset}')
0027 print(f'DST Type: {dsttype}')
0028 print(f'Output: {output}')
0029 print(f'Output Segments: {output_segments}')
0030 print(f'Output Directory: {output_dir}')
0031
0032 os.makedirs(output_dir,exist_ok=True)
0033
0034 if os.path.exists(f'{output_dir}/{output}'):
0035 os.remove(f'{output_dir}/{output}')
0036 print(f'File {output} deleted successfully.')
0037
0038
0039 command = f"""while read d; do
0040 echo "Processing Dir: $d, $i"
0041 /direct/sphenix+u/anarde/.cargo/bin/rg -l "Error" $d >> {output}
0042 echo "logs: $(wc -l {output})"
0043 i=$((i+1))
0044 done < <(readlink -f {log_dir}/*)"""
0045 subprocess.run(['bash','-c',command],cwd=output_dir)
0046
0047
0048 command = f'sort {output} -o {output}'
0049 subprocess.run(['bash','-c',command],cwd=output_dir)
0050
0051
0052 command = f'awk -F\'/\' \'{{print $NF}}\' {output} | cut -d"-" -f2,3 | cut -d "." -f1 | sort > {output_segments}'
0053 subprocess.run(['bash','-c',command],cwd=output_dir)
0054
0055
0056 command = f'psql FileCatalog -c "select filename from datasets where dataset = \'{dataset}\' and dsttype=\'{dsttype}\';" -At | cut -d "-" -f2,3 | cut -d"." -f1 | sort > {dsttype}_{dataset}.list'
0057 subprocess.run(['bash','-c',command],cwd=output_dir)
0058
0059
0060 produced_segments = f'{os.path.splitext(output_segments)[0]}-produced.list'
0061 command = f'comm -12 {dsttype}_{dataset}.list {output_segments} > {produced_segments}'
0062 subprocess.run(['bash','-c',command],cwd=output_dir)
0063
0064
0065 produced_logs = f'{os.path.splitext(output)[0]}-produced.list'
0066 command = f'/direct/sphenix+u/anarde/.cargo/bin/rg -Ff {produced_segments} {output} > {produced_logs}'
0067 subprocess.run(['bash','-c',command],cwd=output_dir)