Back to home page

sPhenix code displayed by LXR

 
 

    


File indexing completed on 2025-08-06 08:12:39

0001 std::vector<std::string> ReadList(std::string filename)
0002 {
0003     std::vector<std::string> list;
0004     std::ifstream file(filename);
0005     std::string str;
0006     while (std::getline(file, str))
0007     {
0008         list.push_back(str);
0009     }
0010 
0011     std::cout<<"list.size(): "<<list.size()<<std::endl;
0012 
0013     return list;
0014 }
0015 
0016 // note : to split the input vector randomly into N vectors
0017 std::vector<std::vector<std::string>> RandomSplit(std::vector<std::string> list, int N, bool IsShuffle = true, int segment_index = 0)
0018 {
0019     std::vector<std::vector<std::string>> output(N);
0020     std::random_device rd;
0021     std::mt19937 g(rd());
0022     if (IsShuffle){std::shuffle(list.begin(), list.end(), g);}
0023 
0024     int N_each = int(list.size()) / N;
0025 
0026     for (int i = 0; i < list.size(); i++)
0027     {
0028         // output[i % N].push_back(list[i]);
0029         // int corresponding_file_index = i / N_each;
0030         // corresponding_file_index = (corresponding_file_index >= N) ? N - 1 : corresponding_file_index;
0031         if (i < segment_index){ // note : 0 - 75, < 76
0032             output[0].push_back(list[i]);
0033         }
0034         else {
0035             output[1].push_back(list[i]);
0036         }
0037     }
0038 
0039     return output;
0040 }
0041 
0042 std::string get_merged_suffix(int file_index, int total_file)
0043 {
0044     if (total_file <= 0){
0045         std::cout<<"wtf"<<std::endl;
0046         exit(1);
0047     }
0048     else if (total_file == 1){
0049         return "";
0050     }
0051     else if (total_file > 1){
0052 
0053         std::string job_index = std::to_string( file_index + 1 );
0054         int job_index_len = 3;
0055         job_index.insert(0, job_index_len - job_index.size(), '0');
0056 
0057         return "_" + job_index;
0058     }
0059 
0060     return "";
0061 }
0062 
0063 int RandomMerge_region(
0064     string input_directory,
0065     string input_filename,
0066     int segment_index
0067 )
0068 {
0069     bool IsShuffle = 0;
0070     int N_merged_files = 2;
0071 
0072     if ( IsShuffle == 1 || N_merged_files != 2){
0073         std::cout<<"IsShuffle == 1"<<std::endl;
0074         std::cout<<"N_merged_files != 2"<<std::endl;
0075         exit(1);
0076     }
0077 
0078     std::cout<<std::endl;
0079     // std::cout<<"!!! Be careful, the code can currently only handle the maximal number of files is 10000, [00000 - 09999] !!!"<<std::endl;
0080 
0081     // bool IsShuffle = false;
0082     // int N_merged_files = 1;
0083     // string input_directory = "/sphenix/user/ChengWei/sPH_dNdeta/Run24AuAuMC/Sim_Ntuple_HIJING_ana443_20241102/Run3/EvtVtxZ/completed/RestDist/completed";
0084     // string input_filename = "MC_RestDist_vtxZQA_VtxZReWeighting_vtxZRangeM10p0to10p0_ClusQAAdc35PhiSize500_00000.root"; // note : xxxxx_00001.root
0085 
0086     string input_filename_no_number = input_filename.substr(0, input_filename.find_last_of("_"));
0087 
0088     std::cout<<"input_directory: "<<input_directory<<std::endl;
0089     std::cout<<"input_filename: "<<input_filename<<std::endl;
0090     std::cout<<"input_filename_no_number: "<<input_filename_no_number<<std::endl;
0091     std::cout<<std::endl;
0092 
0093     system(Form("ls %s/%s_0*.root > %s/file_list.txt", input_directory.c_str(), input_filename_no_number.c_str(), input_directory.c_str())); // todo: the maximal number of files is 10000
0094     // system(Form("ls %s/%s > %s/file_list.txt", input_directory.c_str(), input_filename.c_str(), input_directory.c_str())); // todo: the maximal number of files is 10000
0095 
0096     std::vector<std::string> list = ReadList(Form("%s/file_list.txt", input_directory.c_str()));
0097     for (int i = 0; i < list.size(); i++)
0098     {
0099         string filename = list[i];
0100 
0101         // std::cout<<"filename: "<<filename<<std::endl;
0102         if (filename.find(Form("%s_merged",input_filename_no_number.c_str())) != std::string::npos)
0103         {
0104             std::cout<<"removing : "<<filename<<std::endl;
0105             list.erase(std::remove(list.begin(), list.end(), filename), list.end());
0106             system(Form("rm %s", filename.c_str()));
0107 
0108             i -= 1;
0109         }
0110     }
0111     std::cout<<std::endl;
0112     std::cout<<"N files post removing: "<<list.size()<<std::endl;
0113 
0114     // for (auto filename : list){
0115     //     std::cout<<"filename: "<<filename<<std::endl;
0116     // }
0117 
0118     std::vector<std::vector<std::string>> list_splitted = RandomSplit(list, N_merged_files, IsShuffle, segment_index);
0119 
0120     // for (int i = 0; i < N_merged_files; i++)
0121     // {
0122     //     std::cout<<"list_splitted["<<i<<"].size(): "<<list_splitted[i].size()<<std::endl;
0123 
0124     //     std::string all_in_one = "";
0125 
0126     //     for (auto filename : list_splitted[i])
0127     //     {
0128     //         std::cout<<"filename: "<<filename<<std::endl;
0129     //         all_in_one += filename + " ";
0130     //     }
0131 
0132     //     std::cout<<std::endl;
0133     //     system(Form("time hadd %s/%s_merged%s.root %s", input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str(), all_in_one.c_str()));
0134 
0135     //     system(Form("mkdir -p %s/merged_files_%s_%s", input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0136 
0137     //     for (auto filename : list_splitted[i])
0138     //     {
0139     //         system(Form("mv %s %s/merged_files_%s_%s", filename.c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0140     //     }
0141     // }
0142 
0143     for (int i = 0; i < N_merged_files; i++)
0144     {
0145         std::cout<<std::endl;
0146         std::cout<<"list_splitted["<<i<<"].size(): "<<list_splitted[i].size()<<std::endl;
0147 
0148         // std::string all_in_one = "";
0149 
0150         // for (auto filename : list_splitted[i])
0151         // {
0152         //     std::cout<<"filename: "<<filename<<std::endl;
0153         //     all_in_one += filename + " ";
0154         // }
0155 
0156         system(Form("mkdir -p %s/merged_files_%s_%s", input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0157 
0158         for (auto filename : list_splitted[i])
0159         {
0160             std::cout<<"filename: "<<filename<<std::endl;
0161             system(Form("mv %s %s/merged_files_%s_%s", filename.c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0162         }
0163 
0164         std::string multi_thread_str = (list_splitted[i].size() > 16) ? "-j 8" : "";
0165 
0166         std::cout<<std::endl;
0167         system(Form("time hadd %s %s/%s_merged%s.root %s/merged_files_%s_%s/*.root", multi_thread_str.c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str(), input_directory.c_str(), input_filename_no_number.c_str(), get_merged_suffix(i, N_merged_files).c_str()));
0168     }
0169 
0170     system(Form("rm %s/file_list.txt", input_directory.c_str()));
0171 
0172     return 3;
0173 }