Back to home page

sPhenix code displayed by LXR

 
 

    


File indexing completed on 2025-08-06 08:19:06

0001 /**
0002  * Compression algorithm for use in the CMSSW and sPHENIX projects.
0003  * Using 16-bit short integers to store 32-bit floating-point values.
0004  * Author: fishyu@iii.org.tw
0005  * June 8, 2021
0006  */
0007 //-----------------------------------------------------------------------------
0008 #include <map>
0009 #include <random>
0010 #include <set>
0011 #include <utility>
0012 #include <vector>
0013 
0014 #include "RtypesCore.h"
0015 //-----------------------------------------------------------------------------
0016 
0017 UShort_t residesIn(Float_t raw, std::vector<Float_t>* dict)
0018 {
0019   for (size_t i = 0; i < dict->size(); ++i)
0020   {
0021     if (raw <= dict->at(i))
0022     {
0023       if (i == 0)
0024         return 0;
0025       else if ((dict->at(i) - raw) < (raw - dict->at(i - 1)))
0026         return i;
0027       else
0028         return i - 1;
0029     }
0030   }
0031   return dict->size() - 1;
0032 }
0033 //-----------------------------------------------------------------------------
0034 /**
0035  * approx() compresses data generated by a normal distribution and returns the maximum absolute difference between the actual and approximated data.
0036  */
0037 Float_t approx(
0038     std::vector<UShort_t>* order,
0039     std::vector<Float_t>* dict,
0040     std::vector<size_t>* cnt,
0041     Int_t n_entries,
0042     std::default_random_engine& generator,
0043     std::normal_distribution<double>& distribution,
0044     size_t maxNumClusters);
0045 //-----------------------------------------------------------------------------
0046 Int_t newLoc(std::vector<Int_t>* loc_vec, std::vector<std::vector<Int_t>>* loc_vec_vec);
0047 void removeDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map);
0048 void addDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map);
0049 //-----------------------------------------------------------------------------
0050 Float_t approx(std::vector<UShort_t>* order, std::vector<Float_t>* dict, std::vector<size_t>* cnt, Int_t n_entries, std::default_random_engine& generator, std::normal_distribution<double>& distribution, size_t maxNumClusters)
0051 {
0052   Float_t maxAbsErrorDoubled = (Float_t) 0;
0053 
0054   std::map<Float_t, std::pair<Float_t, Int_t>> min_max_loc_map;
0055   std::vector<std::vector<Int_t>> loc_vec_vec;
0056   std::vector<Int_t> loc_vec;
0057   std::map<Float_t, std::set<Float_t>> distance_min_set_map;
0058 
0059   for (Int_t j = 0; j < n_entries; j++)
0060   {
0061     Float_t number = distribution(generator);
0062     Float_t* gen_ = &number;
0063 
0064     std::map<Float_t, std::pair<Float_t, Int_t>>::iterator mmlm = min_max_loc_map.find(*gen_);
0065 
0066     if (mmlm != min_max_loc_map.end())
0067       loc_vec_vec[mmlm->second.second].push_back(j);
0068     else
0069     {
0070       Int_t loc = newLoc(&loc_vec, &loc_vec_vec);
0071 
0072       loc_vec_vec[loc].push_back(j);
0073 
0074       min_max_loc_map[*gen_] = std::pair<Float_t, Int_t>(*gen_, loc);
0075 
0076       mmlm = min_max_loc_map.find(*gen_);
0077       if (mmlm != min_max_loc_map.begin() && *gen_ <= prev(mmlm)->second.first)
0078       {
0079         loc_vec_vec[prev(mmlm)->second.second].push_back(j);
0080         loc_vec_vec[mmlm->second.second].clear();
0081         loc_vec.push_back(mmlm->second.second);
0082 
0083         min_max_loc_map.erase(mmlm);
0084       }
0085       else if (min_max_loc_map.size() >= 2)
0086       {
0087         if (mmlm != min_max_loc_map.begin() && mmlm != prev(min_max_loc_map.end()))
0088         {
0089           removeDiff(next(mmlm)->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
0090         }
0091 
0092         if (mmlm != min_max_loc_map.begin())
0093           addDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
0094 
0095         if (mmlm != prev(min_max_loc_map.end()))
0096           addDiff(next(mmlm)->second.first - mmlm->first, mmlm->first, &distance_min_set_map);
0097       }
0098     }
0099 
0100     if (min_max_loc_map.size() <= maxNumClusters)
0101       continue;
0102 
0103     std::map<Float_t, std::set<Float_t>>::iterator dmsm = distance_min_set_map.begin();
0104     Float_t min = *(dmsm->second.begin());
0105 
0106     dmsm->second.erase(min);
0107     if (dmsm->second.empty())
0108       distance_min_set_map.erase(dmsm);
0109 
0110     mmlm = min_max_loc_map.find(min);
0111     if (mmlm != min_max_loc_map.begin())
0112       removeDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
0113 
0114     if (next(mmlm) != prev(min_max_loc_map.end()))
0115       removeDiff(next(next(mmlm))->second.first - next(mmlm)->first, next(mmlm)->first, &distance_min_set_map);
0116 
0117     std::vector<Int_t>* s = &(loc_vec_vec[next(mmlm)->second.second]);
0118     loc_vec_vec[mmlm->second.second].insert(loc_vec_vec[mmlm->second.second].end(), s->begin(), s->end());
0119     mmlm->second.first = next(mmlm)->second.first;
0120     min_max_loc_map.erase(next(mmlm));
0121     mmlm = min_max_loc_map.find(min);
0122     maxAbsErrorDoubled = std::max(maxAbsErrorDoubled, mmlm->second.first - mmlm->first);
0123     if (mmlm != min_max_loc_map.begin())
0124       addDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
0125 
0126     if (mmlm != prev(min_max_loc_map.end()))
0127       addDiff(next(mmlm)->second.first - mmlm->first, mmlm->first, &distance_min_set_map);
0128   }
0129 
0130   order->resize(n_entries);
0131   for (const auto& mmlm : min_max_loc_map)
0132   {
0133     Double_t estimate = (Double_t) (mmlm.first + mmlm.second.first) / (Double_t) 2;
0134     // cppcheck-suppress containerOutOfBounds
0135     for (const auto& index : loc_vec_vec[mmlm.second.second])
0136     {
0137       (*order)[index] = dict->size();
0138     }
0139 
0140     dict->push_back(estimate);
0141     // cppcheck-suppress containerOutOfBounds
0142     cnt->push_back(loc_vec_vec[mmlm.second.second].size());
0143   }
0144 
0145   return maxAbsErrorDoubled / (double) 2;  // sqrt((squaredSum / (Double_t) n_entries) - avg * avg);
0146 }
0147 
0148 Int_t newLoc(std::vector<Int_t>* loc_vec, std::vector<std::vector<Int_t>>* loc_vec_vec)
0149 {
0150   if (!loc_vec->empty())
0151   {
0152     Int_t loc = loc_vec->back();
0153     loc_vec->pop_back();
0154     return loc;
0155   }
0156 
0157   Int_t loc = loc_vec_vec->size();
0158   loc_vec_vec->push_back({});
0159   return loc;
0160 }
0161 
0162 void removeDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map)
0163 {
0164   std::map<Float_t, std::set<Float_t>>::iterator dmsm = distance_min_set_map->find(distance);
0165   dmsm->second.erase(min);
0166 
0167   if (dmsm->second.empty())
0168     distance_min_set_map->erase(dmsm);
0169 }
0170 
0171 void addDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map)
0172 {
0173   std::map<Float_t, std::set<Float_t>>::iterator dmsm = distance_min_set_map->find(distance);
0174   if (dmsm == distance_min_set_map->end())
0175   {
0176     (*distance_min_set_map)[distance] = {min};
0177   }
0178   else
0179   {
0180     dmsm->second.insert(min);
0181   }
0182 }