File indexing completed on 2025-08-06 08:12:39
0001 std::vector<std::string> ReadList(std::string filename)
0002 {
0003 std::vector<std::string> list;
0004 std::ifstream file(filename);
0005 std::string str;
0006 while (std::getline(file, str))
0007 {
0008 list.push_back(str);
0009 }
0010
0011 std::cout<<"list.size(): "<<list.size()<<std::endl;
0012
0013 return list;
0014 }
0015
0016
0017 std::vector<std::vector<std::string>> RandomSplit(std::vector<std::string> list, int N, bool IsShuffle = true, int segment_index = 0)
0018 {
0019 std::vector<std::vector<std::string>> output(N);
0020 std::random_device rd;
0021 std::mt19937 g(rd());
0022 if (IsShuffle){std::shuffle(list.begin(), list.end(), g);}
0023
0024 int N_each = int(list.size()) / N;
0025
0026 for (int i = 0; i < list.size(); i++)
0027 {
0028
0029
0030
0031 if (i < segment_index){
0032 output[0].push_back(list[i]);
0033 }
0034 else {
0035 output[1].push_back(list[i]);
0036 }
0037 }
0038
0039 return output;
0040 }
0041
0042 std::string get_merged_suffix(int file_index, int total_file)
0043 {
0044 if (total_file <= 0){
0045 std::cout<<"wtf"<<std::endl;
0046 exit(1);
0047 }
0048 else if (total_file == 1){
0049 return "";
0050 }
0051 else if (total_file > 1){
0052
0053 std::string job_index = std::to_string( file_index + 1 );
0054 int job_index_len = 3;
0055 job_index.insert(0, job_index_len - job_index.size(), '0');
0056
0057 return "_" + job_index;
0058 }
0059
0060 return "";
0061 }
0062
0063 int RandomMerge_region(
0064 string input_directory,
0065 string input_filename,
0066 int segment_index
0067 )
0068 {
0069 bool IsShuffle = 0;
0070 int N_merged_files = 2;
0071
0072 if ( IsShuffle == 1 || N_merged_files != 2){
0073 std::cout<<"IsShuffle == 1"<<std::endl;
0074 std::cout<<"N_merged_files != 2"<<std::endl;
0075 exit(1);
0076 }
0077
0078 std::cout<<std::endl;
0079
0080
0081
0082
0083
0084
0085
0086 string input_filename_no_number = input_filename.substr(0, input_filename.find_last_of("_"));
0087
0088 std::cout<<"input_directory: "<<input_directory<<std::endl;
0089 std::cout<<"input_filename: "<<input_filename<<std::endl;
0090 std::cout<<"input_filename_no_number: "<<input_filename_no_number<<std::endl;
0091 std::cout<<std::endl;
0092
0093 system(Form("ls %s/%s_0*.root > %s/file_list.txt", input_directory.c_str(), input_filename_no_number.c_str(), input_directory.c_str()));
0094
0095
0096 std::vector<std::string> list = ReadList(Form("%s/file_list.txt", input_directory.c_str()));
0097 for (int i = 0; i < list.size(); i++)
0098 {
0099 string filename = list[i];
0100
0101
0102 if (filename.find(Form("%s_merged",input_filename_no_number.c_str())) != std::string::npos)
0103 {
0104 std::cout<<"removing : "<<filename<<std::endl;
0105 list.erase(std::remove(list.begin(), list.end(), filename), list.end());
0106 system(Form("rm %s", filename.c_str()));
0107
0108 i -= 1;
0109 }
0110 }
0111 std::cout<<std::endl;
0112 std::cout<<"N files post removing: "<<list.size()<<std::endl;
0113
0114
0115
0116
0117
0118 std::vector<std::vector<std::string>> list_splitted = RandomSplit(list, N_merged_files, IsShuffle, segment_index);
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143 for (int i = 0; i < N_merged_files; i++)
0144 {
0145 std::cout<<std::endl;
0146 std::cout<<"list_splitted["<<i<<"].size(): "<<list_splitted[i].size()<<std::endl;
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156 system(Form("mkdir -p %s/merged_files_%s_%s", input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0157
0158 for (auto filename : list_splitted[i])
0159 {
0160 std::cout<<"filename: "<<filename<<std::endl;
0161 system(Form("mv %s %s/merged_files_%s_%s", filename.c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0162 }
0163
0164 std::string multi_thread_str = (list_splitted[i].size() > 16) ? "-j 8" : "";
0165
0166 std::cout<<std::endl;
0167 system(Form("time hadd %s %s/%s_merged%s.root %s/merged_files_%s_%s/*.root", multi_thread_str.c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0168 }
0169
0170 system(Form("rm %s/file_list.txt", input_directory.c_str()));
0171
0172 return 3;
0173 }