File indexing completed on 2025-08-06 08:12:39
0001 std::vector<std::string> ReadList(std::string filename)
0002 {
0003 std::vector<std::string> list;
0004 std::ifstream file(filename);
0005 std::string str;
0006 while (std::getline(file, str))
0007 {
0008 list.push_back(str);
0009 }
0010
0011 std::cout<<"list.size(): "<<list.size()<<std::endl;
0012
0013 return list;
0014 }
0015
0016
0017 std::vector<std::vector<std::string>> RandomSplit(std::vector<std::string> list, int N, bool IsShuffle = true)
0018 {
0019 std::vector<std::vector<std::string>> output(N);
0020 std::random_device rd;
0021 std::mt19937 g(rd());
0022 if (IsShuffle){std::shuffle(list.begin(), list.end(), g);}
0023
0024 int N_each = int(list.size()) / N;
0025
0026 for (int i = 0; i < list.size(); i++)
0027 {
0028
0029 int corresponding_file_index = i / N_each;
0030 corresponding_file_index = (corresponding_file_index >= N) ? N - 1 : corresponding_file_index;
0031 output[corresponding_file_index].push_back(list[i]);
0032 }
0033
0034 return output;
0035 }
0036
0037 std::string get_merged_suffix(int file_index, int total_file)
0038 {
0039 if (total_file <= 0){
0040 std::cout<<"wtf"<<std::endl;
0041 exit(1);
0042 }
0043 else if (total_file == 1){
0044 return "";
0045 }
0046 else if (total_file > 1){
0047
0048 std::string job_index = std::to_string( file_index + 1 );
0049 int job_index_len = 3;
0050 job_index.insert(0, job_index_len - job_index.size(), '0');
0051
0052 return "_" + job_index;
0053 }
0054
0055 return "";
0056 }
0057
0058 int RandomMerge(
0059 bool IsShuffle,
0060 int N_merged_files,
0061 string input_directory,
0062 string input_filename,
0063 bool mega_merge = false
0064 )
0065 {
0066 std::cout<<std::endl;
0067 std::cout<<"!!! Be careful, the code can currently only handle the maximal number of files is 10000, [00000 - 09999] !!!"<<std::endl;
0068
0069
0070
0071
0072
0073
0074 string input_filename_no_number = input_filename.substr(0, input_filename.find_last_of("_"));
0075
0076 std::cout<<"input_directory: "<<input_directory<<std::endl;
0077 std::cout<<"input_filename: "<<input_filename<<std::endl;
0078 std::cout<<"input_filename_no_number: "<<input_filename_no_number<<std::endl;
0079 std::cout<<std::endl;
0080
0081 system(Form("ls %s/%s_0*.root > %s/file_list.txt", input_directory.c_str(), input_filename_no_number.c_str(), input_directory.c_str()));
0082
0083 std::vector<std::string> list = ReadList(Form("%s/file_list.txt", input_directory.c_str()));
0084 for (int i = 0; i < list.size(); i++)
0085 {
0086 string filename = list[i];
0087
0088
0089 if (filename.find(Form("%s_merged",input_filename_no_number.c_str())) != std::string::npos)
0090 {
0091 std::cout<<"removing : "<<filename<<std::endl;
0092 list.erase(std::remove(list.begin(), list.end(), filename), list.end());
0093 system(Form("rm %s", filename.c_str()));
0094
0095 i -= 1;
0096 }
0097 }
0098 std::cout<<std::endl;
0099 std::cout<<"N files post removing: "<<list.size()<<std::endl;
0100
0101
0102
0103
0104
0105 std::vector<std::vector<std::string>> list_splitted = RandomSplit(list, N_merged_files, IsShuffle);
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130 for (int i = 0; i < N_merged_files; i++)
0131 {
0132 std::cout<<std::endl;
0133 std::cout<<"list_splitted["<<i<<"].size(): "<<list_splitted[i].size()<<std::endl;
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143 system(Form("mkdir -p %s/merged_files_%s_%s", input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0144
0145 for (auto filename : list_splitted[i])
0146 {
0147 std::cout<<"filename: "<<filename<<std::endl;
0148 system(Form("mv %s %s/merged_files_%s_%s", filename.c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0149 }
0150
0151 std::string multi_thread_str = (list_splitted[i].size() > 16) ? "-j 8" : "";
0152
0153 std::cout<<std::endl;
0154 system(Form("time hadd %s %s/%s_merged%s.root %s/merged_files_%s_%s/*.root", multi_thread_str.c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0155 }
0156
0157 if (mega_merge && N_merged_files > 1)
0158 {
0159 system(Form("hadd %s/%s_merged.root %s/%s_merged_*.root", input_directory.c_str(), input_filename_no_number.c_str(), input_directory.c_str(), input_filename_no_number.c_str()));
0160 }
0161
0162 system(Form("rm %s/file_list.txt", input_directory.c_str()));
0163
0164 return 3;
0165 }