Back to home page

sPhenix code displayed by LXR

 
 

    


File indexing completed on 2025-08-06 08:17:36

0001 /**
0002  * Compression algorithm for use in the CMSSW and sPHENIX projects.
0003  * Using 16-bit short integers to store 32-bit floating-point values.
0004  * Author: fishyu@iii.org.tw
0005  * May 22, 2021
0006  */
0007 #include <TTree.h>
0008 
0009 #include <fstream>
0010 #include <map>
0011 #include <set>
0012 #include <vector>
0013 
0014 //-----------------------------------------------------------------------------
0015 /**
0016  * approx() compresses data held in t and returns the standard deviation of the differences between the actual and approximated data.
0017  */
0018 Float_t approx(
0019   std::vector<UShort_t>* order, 
0020   std::vector<Float_t>* dict, 
0021   std::vector<size_t>* cnt, 
0022   Int_t n_entries, 
0023   TTree* t, 
0024   Float_t* gen_, 
0025   size_t maxNumClusters
0026 );
0027 //-----------------------------------------------------------------------------
0028 Int_t newLoc(std::vector<Int_t>* loc_vec, std::vector<std::set<Int_t>>* loc_set_vec);
0029 void removeDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map);
0030 void addDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map);
0031 //-----------------------------------------------------------------------------
0032 Float_t approx(std::vector<UShort_t>* order, std::vector<Float_t>* dict, std::vector<size_t>* cnt, Int_t n_entries, TTree* t, Float_t* gen_, size_t maxNumClusters)
0033 {
0034   Float_t maxAbsErrorDoubled = (Float_t) 0;
0035 
0036   std::map<Float_t, std::pair<Float_t, Int_t>> min_max_loc_map;
0037   std::vector<std::set<Int_t>> loc_set_vec;
0038   std::vector<Int_t> loc_vec;
0039   std::map<Float_t, std::set<Float_t>> distance_min_set_map;
0040 
0041   for (Int_t j = 0 ; j < n_entries; j++){
0042     t->GetEntry(j);
0043     
0044     std::map<Float_t, std::pair<Float_t, Int_t>>::iterator mmlm = min_max_loc_map.find(*gen_);
0045 
0046     if (mmlm != min_max_loc_map.end())
0047       loc_set_vec[mmlm->second.second].insert(j);
0048     else {
0049       Int_t loc = newLoc(&loc_vec, &loc_set_vec);
0050 
0051       loc_set_vec[loc].insert(j);
0052 
0053       min_max_loc_map[*gen_] = std::pair<Float_t, Int_t>(*gen_, loc);
0054 
0055       mmlm = min_max_loc_map.find(*gen_);
0056       if (mmlm != min_max_loc_map.begin() && *gen_ <= prev(mmlm)->second.first) {
0057         loc_set_vec[prev(mmlm)->second.second].insert(j);
0058         loc_set_vec[mmlm->second.second].clear();
0059         loc_vec.push_back(mmlm->second.second);
0060 
0061         min_max_loc_map.erase(mmlm);
0062 
0063       } else if (min_max_loc_map.size() >= 2) {
0064         if (mmlm != min_max_loc_map.begin() && mmlm != prev(min_max_loc_map.end())) {
0065 
0066           removeDiff(next(mmlm)->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
0067         }
0068 
0069         if (mmlm != min_max_loc_map.begin())
0070           addDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
0071 
0072         if (mmlm != prev(min_max_loc_map.end()))
0073           addDiff(next(mmlm)->second.first - mmlm->first, mmlm->first, &distance_min_set_map);
0074       }
0075     }
0076 
0077     if (min_max_loc_map.size() <= maxNumClusters)
0078       continue;
0079    
0080     std::map<Float_t, std::set<Float_t>>::iterator dmsm = distance_min_set_map.begin();
0081     Float_t min = *(dmsm->second.begin());
0082     
0083     dmsm->second.erase(min);
0084     if (dmsm->second.empty())
0085       distance_min_set_map.erase(dmsm);
0086     
0087     mmlm = min_max_loc_map.find(min);
0088     if (mmlm != min_max_loc_map.begin())
0089       removeDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
0090 
0091     if (next(mmlm) != prev(min_max_loc_map.end()))
0092       removeDiff(next(next(mmlm))->second.first - next(mmlm)->first, next(mmlm)->first, &distance_min_set_map);
0093 
0094     std::set<Int_t>* s = &(loc_set_vec[next(mmlm)->second.second]);
0095     loc_set_vec[mmlm->second.second].insert(s->begin(), s->end());
0096     mmlm->second.first = next(mmlm)->second.first;
0097     min_max_loc_map.erase(next(mmlm));
0098     mmlm = min_max_loc_map.find(min);
0099     maxAbsErrorDoubled = std::max(maxAbsErrorDoubled, mmlm->second.first - mmlm->first);
0100     if (mmlm != min_max_loc_map.begin())
0101       addDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
0102 
0103     if (mmlm != prev(min_max_loc_map.end()))
0104       addDiff(next(mmlm)->second.first - mmlm->first, mmlm->first, &distance_min_set_map);
0105 
0106   }
0107   
0108   Double_t squaredSum = 0;
0109   Double_t sum = 0;
0110   
0111   order->resize(n_entries);
0112   for (const auto &mmlm : min_max_loc_map) {
0113     Double_t estimate = (Double_t) (mmlm.first + mmlm.second.first) / (Double_t) 2;
0114       
0115     // cppcheck-suppress containerOutOfBounds
0116     for (const auto &index : loc_set_vec[mmlm.second.second]) {
0117       (*order)[index] = dict->size();
0118       
0119       t->GetEntry(index);
0120       Double_t delta = std::fabs(*gen_ - estimate);
0121       squaredSum += (delta * delta);
0122       sum += delta;
0123     }
0124 
0125     dict->push_back(estimate);
0126     // cppcheck-suppress containerOutOfBounds
0127     cnt->push_back(loc_set_vec[mmlm.second.second].size());
0128   }
0129   
0130   Double_t avg = sum / (Double_t) n_entries;
0131   return sqrt((squaredSum / (Double_t) n_entries) - avg * avg);
0132 }
0133 
0134 Int_t newLoc(std::vector<Int_t>* loc_vec, std::vector<std::set<Int_t>>* loc_set_vec)
0135 {
0136   if (!loc_vec->empty()) {
0137     Int_t loc = loc_vec->back();
0138     loc_vec->pop_back();
0139     return loc;
0140   }
0141  
0142   Int_t loc = loc_set_vec->size();
0143   loc_set_vec->push_back({});
0144   return loc;
0145 }
0146 
0147 
0148 void removeDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map) 
0149 {
0150   std::map<Float_t, std::set<Float_t>>::iterator dmsm = distance_min_set_map->find(distance);
0151   dmsm->second.erase(min);
0152 
0153   if (dmsm->second.empty())
0154     distance_min_set_map->erase(dmsm);
0155 }
0156 
0157 void addDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map) 
0158 {
0159   std::map<Float_t, std::set<Float_t>>::iterator dmsm = distance_min_set_map->find(distance);
0160   if (dmsm == distance_min_set_map->end()) {
0161     (*distance_min_set_map)[distance] = {min};
0162   } else {
0163     dmsm->second.insert(min);
0164   }
0165 }