Back to home page

sPhenix code displayed by LXR

 
 

    


File indexing completed on 2025-08-06 08:12:39

0001 std::vector<std::string> ReadList(std::string filename)
0002 {
0003     std::vector<std::string> list;
0004     std::ifstream file(filename);
0005     std::string str;
0006     while (std::getline(file, str))
0007     {
0008         list.push_back(str);
0009     }
0010 
0011     std::cout<<"list.size(): "<<list.size()<<std::endl;
0012 
0013     return list;
0014 }
0015 
0016 // note : to split the input vector randomly into N vectors
0017 std::vector<std::vector<std::string>> RandomSplit(std::vector<std::string> list, int N, bool IsShuffle = true)
0018 {
0019     std::vector<std::vector<std::string>> output(N);
0020     std::random_device rd;
0021     std::mt19937 g(rd());
0022     if (IsShuffle){std::shuffle(list.begin(), list.end(), g);}
0023 
0024     int N_each = int(list.size()) / N;
0025 
0026     for (int i = 0; i < list.size(); i++)
0027     {
0028         // output[i % N].push_back(list[i]);
0029         int corresponding_file_index = i / N_each;
0030         corresponding_file_index = (corresponding_file_index >= N) ? N - 1 : corresponding_file_index;
0031         output[corresponding_file_index].push_back(list[i]);
0032     }
0033 
0034     return output;
0035 }
0036 
0037 std::string get_merged_suffix(int file_index, int total_file)
0038 {
0039     if (total_file <= 0){
0040         std::cout<<"wtf"<<std::endl;
0041         exit(1);
0042     }
0043     else if (total_file == 1){
0044         return "";
0045     }
0046     else if (total_file > 1){
0047 
0048         std::string job_index = std::to_string( file_index + 1 );
0049         int job_index_len = 3;
0050         job_index.insert(0, job_index_len - job_index.size(), '0');
0051 
0052         return "_" + job_index;
0053     }
0054 
0055     return "";
0056 }
0057 
0058 int RandomMerge(
0059     bool IsShuffle,
0060     int N_merged_files,
0061     string input_directory,
0062     string input_filename,
0063     bool mega_merge = false
0064 )
0065 {
0066     std::cout<<std::endl;
0067     std::cout<<"!!! Be careful, the code can currently only handle the maximal number of files is 10000, [00000 - 09999] !!!"<<std::endl;
0068 
0069     // bool IsShuffle = false;
0070     // int N_merged_files = 1;
0071     // string input_directory = "/sphenix/user/ChengWei/sPH_dNdeta/Run24AuAuMC/Sim_Ntuple_HIJING_ana443_20241102/Run3/EvtVtxZ/completed/RestDist/completed";
0072     // string input_filename = "MC_RestDist_vtxZQA_VtxZReWeighting_vtxZRangeM10p0to10p0_ClusQAAdc35PhiSize500_00000.root"; // note : xxxxx_00001.root
0073 
0074     string input_filename_no_number = input_filename.substr(0, input_filename.find_last_of("_"));
0075 
0076     std::cout<<"input_directory: "<<input_directory<<std::endl;
0077     std::cout<<"input_filename: "<<input_filename<<std::endl;
0078     std::cout<<"input_filename_no_number: "<<input_filename_no_number<<std::endl;
0079     std::cout<<std::endl;
0080 
0081     system(Form("ls %s/%s_0*.root > %s/file_list.txt", input_directory.c_str(), input_filename_no_number.c_str(), input_directory.c_str())); // todo: the maximal number of files is 10000
0082 
0083     std::vector<std::string> list = ReadList(Form("%s/file_list.txt", input_directory.c_str()));
0084     for (int i = 0; i < list.size(); i++)
0085     {
0086         string filename = list[i];
0087 
0088         // std::cout<<"filename: "<<filename<<std::endl;
0089         if (filename.find(Form("%s_merged",input_filename_no_number.c_str())) != std::string::npos)
0090         {
0091             std::cout<<"removing : "<<filename<<std::endl;
0092             list.erase(std::remove(list.begin(), list.end(), filename), list.end());
0093             system(Form("rm %s", filename.c_str()));
0094 
0095             i -= 1;
0096         }
0097     }
0098     std::cout<<std::endl;
0099     std::cout<<"N files post removing: "<<list.size()<<std::endl;
0100 
0101     // for (auto filename : list){
0102     //     std::cout<<"filename: "<<filename<<std::endl;
0103     // }
0104 
0105     std::vector<std::vector<std::string>> list_splitted = RandomSplit(list, N_merged_files, IsShuffle);
0106 
0107     // for (int i = 0; i < N_merged_files; i++)
0108     // {
0109     //     std::cout<<"list_splitted["<<i<<"].size(): "<<list_splitted[i].size()<<std::endl;
0110 
0111     //     std::string all_in_one = "";
0112 
0113     //     for (auto filename : list_splitted[i])
0114     //     {
0115     //         std::cout<<"filename: "<<filename<<std::endl;
0116     //         all_in_one += filename + " ";
0117     //     }
0118 
0119     //     std::cout<<std::endl;
0120     //     system(Form("time hadd %s/%s_merged%s.root %s", input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str(), all_in_one.c_str()));
0121 
0122     //     system(Form("mkdir -p %s/merged_files_%s_%s", input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0123 
0124     //     for (auto filename : list_splitted[i])
0125     //     {
0126     //         system(Form("mv %s %s/merged_files_%s_%s", filename.c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0127     //     }
0128     // }
0129 
0130     for (int i = 0; i < N_merged_files; i++)
0131     {
0132         std::cout<<std::endl;
0133         std::cout<<"list_splitted["<<i<<"].size(): "<<list_splitted[i].size()<<std::endl;
0134 
0135         // std::string all_in_one = "";
0136 
0137         // for (auto filename : list_splitted[i])
0138         // {
0139         //     std::cout<<"filename: "<<filename<<std::endl;
0140         //     all_in_one += filename + " ";
0141         // }
0142 
0143         system(Form("mkdir -p %s/merged_files_%s_%s", input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0144 
0145         for (auto filename : list_splitted[i])
0146         {
0147             std::cout<<"filename: "<<filename<<std::endl;
0148             system(Form("mv %s %s/merged_files_%s_%s", filename.c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0149         }
0150 
0151         std::string multi_thread_str = (list_splitted[i].size() > 16) ? "-j 8" : "";
0152 
0153         std::cout<<std::endl;
0154         system(Form("time hadd %s %s/%s_merged%s.root %s/merged_files_%s_%s/*.root", multi_thread_str.c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0155     }
0156 
0157     if (mega_merge && N_merged_files > 1)
0158     {
0159         system(Form("hadd %s/%s_merged.root %s/%s_merged_*.root", input_directory.c_str(), input_filename_no_number.c_str(), input_directory.c_str(), input_filename_no_number.c_str()));
0160     }
0161 
0162     system(Form("rm %s/file_list.txt", input_directory.c_str()));
0163 
0164     return 3;
0165 }