Back to home page

sPhenix code displayed by LXR

 
 

    


File indexing completed on 2025-08-05 08:10:11

0001 // This file is part of the Acts project.
0002 //
0003 // Copyright (C) 2017 CERN for the benefit of the Acts project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
0008 
0009 #pragma once
0010 
0011 #include <cmath>
0012 #include <exception>
0013 #include <functional>
0014 #include <sstream>
0015 #include <vector>
0016 
0017 #include "TDictionary.h"
0018 #include "TTreeReaderValue.h"
0019 
0020 // Pairs of elements of the same type
0021 template <typename T>
0022 using HomogeneousPair = std::pair<T, T>;
0023 
0024 // === TYPE ERASURE FOR CONCRETE DATA ===
0025 
0026 // Minimal type-erasure wrapper for std::vector<T>. This will be used as a
0027 // workaround to compensate for the absence of C++17's std::any in Cling.
0028 class AnyVector {
0029  public:
0030   // Create a type-erased vector<T>, using proposed constructor arguments.
0031   // Returns a pair containing the type-erased vector and a pointer to the
0032   // underlying concrete vector.
0033   template <typename T, typename... Args>
0034   static std::pair<AnyVector, std::vector<T>*> create(Args&&... args) {
0035     std::vector<T>* vector = new std::vector<T>(std::forward<Args>(args)...);
0036     std::function<void()> deleter = [vector] { delete vector; };
0037     return std::make_pair(
0038         AnyVector{static_cast<void*>(vector), std::move(deleter)}, vector);
0039   }
0040 
0041   // Default-construct a null type-erased vector
0042   AnyVector() = default;
0043 
0044   // Move-construct a type-erased vector
0045   AnyVector(AnyVector&& other)
0046       : m_vector{other.m_vector}, m_deleter{std::move(other.m_deleter)} {
0047     other.m_vector = nullptr;
0048   }
0049 
0050   // Move-assign a type-erased vector
0051   AnyVector& operator=(AnyVector&& other) {
0052     if (&other != this) {
0053       m_vector = other.m_vector;
0054       m_deleter = std::move(other.m_deleter);
0055       other.m_vector = nullptr;
0056     }
0057     return *this;
0058   }
0059 
0060   // Forbid copies of type-erased vectors
0061   AnyVector(const AnyVector&) = delete;
0062   AnyVector& operator=(const AnyVector&) = delete;
0063 
0064   // Delete a type-erased vector
0065   ~AnyVector() {
0066     if (m_vector != nullptr) {
0067       m_deleter();
0068     }
0069   }
0070 
0071  private:
0072   // Construct a type-erased vector from a concrete vector
0073   AnyVector(void* vector, std::function<void()>&& deleter)
0074       : m_vector{vector}, m_deleter{std::move(deleter)} {}
0075 
0076   void* m_vector{nullptr};          // Casted std::vector<T>*
0077   std::function<void()> m_deleter;  // Deletes the underlying vector
0078 };
0079 
0080 // === GENERIC DATA ORDERING ===
0081 
0082 // We want to check, in a single operation, how two pieces of data are ordered
0083 enum class Ordering { SMALLER, EQUAL, GREATER };
0084 
0085 // In general, any type which implements comparison operators that behave as a
0086 // mathematical total order can use this comparison function...
0087 template <typename T>
0088 Ordering compare(const T& x, const T& y) {
0089   if (x < y) {
0090     return Ordering::SMALLER;
0091   } else if (x == y) {
0092     return Ordering::EQUAL;
0093   } else {
0094     return Ordering::GREATER;
0095   }
0096 }
0097 
0098 // ...but we'll want to tweak that a little for floats, to handle NaNs better...
0099 template <typename T>
0100 Ordering compareFloat(const T& x, const T& y) {
0101   if (std::isless(x, y)) {
0102     return Ordering::SMALLER;
0103   } else if (std::isgreater(x, y)) {
0104     return Ordering::GREATER;
0105   } else {
0106     return Ordering::EQUAL;
0107   }
0108 }
0109 
0110 template <>
0111 Ordering compare(const float& x, const float& y) {
0112   return compareFloat(x, y);
0113 }
0114 
0115 template <>
0116 Ordering compare(const double& x, const double& y) {
0117   return compareFloat(x, y);
0118 }
0119 
0120 // ...and for vectors, where the default lexicographic comparison cannot
0121 // efficiently tell all of what we want in a single vector iteration pass.
0122 template <typename U>
0123 Ordering compare(const std::vector<U>& v1, const std::vector<U>& v2) {
0124   // First try to order by size...
0125   if (v1.size() < v2.size()) {
0126     return Ordering::SMALLER;
0127   } else if (v1.size() > v2.size()) {
0128     return Ordering::GREATER;
0129   }
0130   // ...if the size is identical...
0131   else {
0132     // ...then try to order by contents of increasing index...
0133     for (std::size_t i = 0; i < v1.size(); ++i) {
0134       if (v1[i] < v2[i]) {
0135         return Ordering::SMALLER;
0136       } else if (v1[i] > v2[i]) {
0137         return Ordering::GREATER;
0138       }
0139     }
0140 
0141     // ...and declare the vectors equal if the contents are equal
0142     return Ordering::EQUAL;
0143   }
0144 }
0145 
0146 // === GENERIC SORTING MECHANISM ===
0147 
0148 // The following functions are generic implementations of sorting algorithms,
0149 // which require only a comparison operator, a swapping operator, and an
0150 // inclusive range of indices to be sorted in order to operate
0151 using IndexComparator = std::function<Ordering(std::size_t, std::size_t)>;
0152 using IndexSwapper = std::function<void(std::size_t, std::size_t)>;
0153 
0154 // Selection sort has pertty bad asymptotic scaling, but it is non-recursive
0155 // and in-place, which makes it a good choice for smaller inputs
0156 void selectionSort(const std::size_t firstIndex, const std::size_t lastIndex,
0157                    const IndexComparator& compare, const IndexSwapper& swap) {
0158   for (std::size_t targetIndex = firstIndex; targetIndex < lastIndex;
0159        ++targetIndex) {
0160     std::size_t minIndex = targetIndex;
0161     for (std::size_t readIndex = targetIndex + 1; readIndex <= lastIndex;
0162          ++readIndex) {
0163       if (compare(readIndex, minIndex) == Ordering::SMALLER) {
0164         minIndex = readIndex;
0165       }
0166     }
0167     if (minIndex != targetIndex) {
0168       swap(minIndex, targetIndex);
0169     }
0170   }
0171 }
0172 
0173 // Quick sort is used as the top-level sorting algorithm for our datasets
0174 void quickSort(const std::size_t firstIndex, const std::size_t lastIndex,
0175                const IndexComparator& compare, const IndexSwapper& swap) {
0176   // We switch to non-recursive selection sort when the range becomes too small.
0177   // This optimization voids the need for detection of 0- and 1-element input.
0178   static const std::size_t NON_RECURSIVE_THRESHOLD = 25;
0179   if (lastIndex - firstIndex < NON_RECURSIVE_THRESHOLD) {
0180     selectionSort(firstIndex, lastIndex, compare, swap);
0181     return;
0182   }
0183 
0184   // We'll use the midpoint as a pivot. Later on, we can switch to more
0185   // elaborate pivot selection schemes if their usefulness for our use case
0186   // (pseudorandom events with thread-originated reordering) is demonstrated.
0187   std::size_t pivotIndex = firstIndex + (lastIndex - firstIndex) / 2;
0188 
0189   // Partition the data around the pivot using Hoare's scheme
0190   std::size_t splitIndex = 0;
0191   {
0192     // Start with two indices one step beyond each side of the array
0193     std::size_t i = firstIndex - 1;
0194     std::size_t j = lastIndex + 1;
0195     while (true) {
0196       // Move left index forward at least once, and until an element which is
0197       // greater than or equal to the pivot is detected.
0198       do {
0199         i = i + 1;
0200       } while (compare(i, pivotIndex) == Ordering::SMALLER);
0201 
0202       // Move right index backward at least once, and until an element which is
0203       // smaller than or equal to the pivot is detected
0204       do {
0205         j = j - 1;
0206       } while (compare(j, pivotIndex) == Ordering::GREATER);
0207 
0208       // By transitivity of inequality, the element at location i is greater
0209       // than or equal to the one at location j, and a swap could be required
0210       if (i < j) {
0211         // These elements are in the wrong order, swap them
0212         swap(i, j);
0213 
0214         // Don't forget to keep track the pivot's index along the way, as this
0215         // is currently the only way by which we can refer to the pivot element.
0216         if (i == pivotIndex) {
0217           pivotIndex = j;
0218         } else if (j == pivotIndex) {
0219           pivotIndex = i;
0220         }
0221       } else {
0222         // If i and j went past each other, our partitioning is done
0223         splitIndex = j;
0224         break;
0225       }
0226     }
0227   }
0228 
0229   // Now, we'll recursively sort both partitions using quicksort. We should
0230   // recurse in the smaller range first, so as to leverage compiler tail call
0231   // optimization if available.
0232   if (splitIndex - firstIndex <= lastIndex - splitIndex - 1) {
0233     quickSort(firstIndex, splitIndex, compare, swap);
0234     quickSort(splitIndex + 1, lastIndex, compare, swap);
0235   } else {
0236     quickSort(splitIndex + 1, lastIndex, compare, swap);
0237     quickSort(firstIndex, splitIndex, compare, swap);
0238   }
0239 }
0240 
0241 // === GENERIC TTREE BRANCH MANIPULATION MECHANISM ===
0242 
0243 // When comparing a pair of TTrees, we'll need to set up quite a few facilities
0244 // for each branch. Since this setup is dependent on the branch data type, which
0245 // is only known at runtime, it is quite involved, which is why we extracted it
0246 // to a separate struct and its constructor.
0247 struct BranchComparisonHarness {
0248   // We'll keep track of the branch name for debugging purposes
0249   std::string branchName;
0250 
0251   // Type-erased event data for the current branch, in both trees being compared
0252   HomogeneousPair<AnyVector> eventData;
0253 
0254   // Function which loads the active event data for the current branch. This is
0255   // to be performed for each branch and combined with TTreeReader-based event
0256   // iteration on both trees.
0257   void loadCurrentEvent() { (*m_eventLoaderPtr)(); }
0258 
0259   // Functors which compare two events within a given tree and order them
0260   // with respect to one another, and which swap two events. By combining such
0261   // functionality for each branch, a global tree order can be produced.
0262   HomogeneousPair<std::pair<IndexComparator, IndexSwapper>> sortHarness;
0263 
0264   // Functor which compares the current event data in *both* trees and tells
0265   // whether it is identical. The comparison is order-sensitive, so events
0266   // should previously have been sorted in a canonical order in both trees.
0267   // By combining the results for each branch, global tree equality is defined.
0268   using TreeComparator = std::function<bool()>;
0269   TreeComparator eventDataEqual;
0270 
0271   // Functor which dumps the event data for the active event side by side, in
0272   // two columns. This enables manual comparison during debugging.
0273   std::function<void()> dumpEventData;
0274 
0275   // General metadata about the tree which is identical for every branch
0276   struct TreeMetadata {
0277     TTreeReader& tree1Reader;
0278     TTreeReader& tree2Reader;
0279     const std::size_t entryCount;
0280   };
0281 
0282   // This exception will be thrown if an unsupported branch type is encountered
0283   class UnsupportedBranchType : public std::exception {};
0284 
0285   // Type-erased factory of branch comparison harnesses, taking ROOT run-time
0286   // type information as input in order to select an appropriate C++ constructor
0287   static BranchComparisonHarness create(TreeMetadata& treeMetadata,
0288                                         const std::string& branchName,
0289                                         const EDataType dataType,
0290                                         const std::string& className) {
0291     switch (dataType) {
0292       case kChar_t:
0293         return BranchComparisonHarness::create<char>(treeMetadata, branchName);
0294       case kUChar_t:
0295         return BranchComparisonHarness::create<unsigned char>(treeMetadata,
0296                                                               branchName);
0297       case kShort_t:
0298         return BranchComparisonHarness::create<short>(treeMetadata, branchName);
0299       case kUShort_t:
0300         return BranchComparisonHarness::create<unsigned short>(treeMetadata,
0301                                                                branchName);
0302       case kInt_t:
0303         return BranchComparisonHarness::create<int>(treeMetadata, branchName);
0304       case kUInt_t:
0305         return BranchComparisonHarness::create<unsigned int>(treeMetadata,
0306                                                              branchName);
0307       case kLong_t:
0308         return BranchComparisonHarness::create<long>(treeMetadata, branchName);
0309       case kULong_t:
0310         return BranchComparisonHarness::create<unsigned long>(treeMetadata,
0311                                                               branchName);
0312       case kULong64_t:
0313         return BranchComparisonHarness::create<unsigned long long>(treeMetadata,
0314                                                                    branchName);
0315 
0316       case kFloat_t:
0317         return BranchComparisonHarness::create<float>(treeMetadata, branchName);
0318       case kDouble_t:
0319         return BranchComparisonHarness::create<double>(treeMetadata,
0320                                                        branchName);
0321       case kBool_t:
0322         return BranchComparisonHarness::create<bool>(treeMetadata, branchName);
0323       case kOther_t:
0324         if (className.substr(0, 6) == "vector") {
0325           std::string elementType = className.substr(7, className.size() - 8);
0326           return BranchComparisonHarness::createVector(treeMetadata, branchName,
0327                                                        std::move(elementType));
0328         } else {
0329           throw UnsupportedBranchType();
0330         }
0331       default:
0332         throw UnsupportedBranchType();
0333     }
0334   }
0335 
0336  private:
0337   // Under the hood, the top-level factory calls the following function
0338   // template, parametrized with the proper C++ data type
0339   template <typename T>
0340   static BranchComparisonHarness create(TreeMetadata& treeMetadata,
0341                                         const std::string& branchName) {
0342     // Our result will eventually go there
0343     BranchComparisonHarness result;
0344 
0345     // Save the branch name for debugging purposes
0346     result.branchName = branchName;
0347 
0348     // Setup type-erased event data storage
0349     auto tree1DataStorage = AnyVector::create<T>();
0350     auto tree2DataStorage = AnyVector::create<T>();
0351     result.eventData = std::make_pair(std::move(tree1DataStorage.first),
0352                                       std::move(tree2DataStorage.first));
0353     std::vector<T>& tree1Data = *tree1DataStorage.second;
0354     std::vector<T>& tree2Data = *tree2DataStorage.second;
0355 
0356     // Use our advance knowledge of the event count to preallocate storage
0357     tree1Data.reserve(treeMetadata.entryCount);
0358     tree2Data.reserve(treeMetadata.entryCount);
0359 
0360     // Setup event data readout
0361     result.m_eventLoaderPtr.reset(
0362         new EventLoaderT<T>{treeMetadata.tree1Reader, treeMetadata.tree2Reader,
0363                             branchName, tree1Data, tree2Data});
0364 
0365     // Setup event comparison and swapping for each tree
0366     result.sortHarness = std::make_pair(
0367         std::make_pair(
0368             [&tree1Data](std::size_t i, std::size_t j) -> Ordering {
0369               return compare(tree1Data[i], tree1Data[j]);
0370             },
0371             [&tree1Data](std::size_t i, std::size_t j) {
0372               std::swap(tree1Data[i], tree1Data[j]);
0373             }),
0374         std::make_pair(
0375             [&tree2Data](std::size_t i, std::size_t j) -> Ordering {
0376               return compare(tree2Data[i], tree2Data[j]);
0377             },
0378             [&tree2Data](std::size_t i, std::size_t j) {
0379               std::swap(tree2Data[i], tree2Data[j]);
0380             }));
0381 
0382     // Setup order-sensitive tree comparison
0383     result.eventDataEqual = [&tree1Data, &tree2Data]() -> bool {
0384       for (std::size_t i = 0; i < tree1Data.size(); ++i) {
0385         if (compare(tree1Data[i], tree2Data[i]) != Ordering::EQUAL) {
0386           return false;
0387         }
0388       }
0389       return true;
0390     };
0391 
0392     // Add a debugging method to dump event data
0393     result.dumpEventData = [&tree1Data, &tree2Data] {
0394       std::cout << "File 1                \tFile 2" << std::endl;
0395       for (std::size_t i = 0; i < tree1Data.size(); ++i) {
0396         std::cout << toString(tree1Data[i]) << "      \t"
0397                   << toString(tree2Data[i]) << std::endl;
0398       }
0399     };
0400 
0401     // ...and we're good to go!
0402     return std::move(result);
0403   }
0404 
0405   // Because the people who created TTreeReaderValue could not bother to make it
0406   // movable (for moving it into a lambda), or even just virtually destructible
0407   // (for moving a unique_ptr into the lambda), loadEventData can only be
0408   // implemented through lots of unpleasant C++98-ish boilerplate.
0409   class IEventLoader {
0410    public:
0411     virtual ~IEventLoader() = default;
0412     virtual void operator()() = 0;
0413   };
0414 
0415   template <typename T>
0416   class EventLoaderT : public IEventLoader {
0417    public:
0418     EventLoaderT(TTreeReader& tree1Reader, TTreeReader& tree2Reader,
0419                  const std::string& branchName, std::vector<T>& tree1Data,
0420                  std::vector<T>& tree2Data)
0421         : branch1Reader{tree1Reader, branchName.c_str()},
0422           branch2Reader{tree2Reader, branchName.c_str()},
0423           branch1Data(tree1Data),
0424           branch2Data(tree2Data) {}
0425 
0426     void operator()() override {
0427       branch1Data.push_back(*branch1Reader);
0428       branch2Data.push_back(*branch2Reader);
0429     }
0430 
0431    private:
0432     TTreeReaderValue<T> branch1Reader, branch2Reader;
0433     std::vector<T>& branch1Data;
0434     std::vector<T>& branch2Data;
0435   };
0436 
0437   std::unique_ptr<IEventLoader> m_eventLoaderPtr;
0438 
0439   // This helper factory helps building branches associated with std::vectors
0440   // of data, which are the only STL collection that we support at the moment.
0441   static BranchComparisonHarness createVector(TreeMetadata& treeMetadata,
0442                                               const std::string& branchName,
0443                                               const std::string elemType) {
0444 // We support vectors of different types by switching across type (strings)
0445 #define CREATE_VECTOR__HANDLE_TYPE(type_name)                       \
0446   if (elemType == #type_name) {                                     \
0447     return BranchComparisonHarness::create<std::vector<type_name>>( \
0448         treeMetadata, branchName);                                  \
0449   }
0450 
0451     // Handle vectors of booleans
0452     CREATE_VECTOR__HANDLE_TYPE(bool)
0453 
0454     // Handle vectors of all standard floating-point types
0455     else CREATE_VECTOR__HANDLE_TYPE(float) else CREATE_VECTOR__HANDLE_TYPE(
0456         double)
0457 
0458 // For integer types, we'll want to handle both signed and unsigned versions
0459 #define CREATE_VECTOR__HANDLE_INTEGER_TYPE(integer_type_name) \
0460   CREATE_VECTOR__HANDLE_TYPE(integer_type_name)               \
0461   else CREATE_VECTOR__HANDLE_TYPE(unsigned integer_type_name)
0462 
0463         // Handle vectors of all standard integer types
0464         else CREATE_VECTOR__HANDLE_INTEGER_TYPE(char) else CREATE_VECTOR__HANDLE_INTEGER_TYPE(short) else CREATE_VECTOR__HANDLE_INTEGER_TYPE(
0465             int) else CREATE_VECTOR__HANDLE_INTEGER_TYPE(long) else CREATE_VECTOR__HANDLE_INTEGER_TYPE(long long)
0466 
0467         // Throw an exception if the vector element type is not recognized
0468         else throw UnsupportedBranchType();
0469   }
0470 
0471   // This helper method provides general string conversion for all supported
0472   // branch event data types.
0473   template <typename T>
0474   static std::string toString(const T& data) {
0475     std::ostringstream oss;
0476     oss << data;
0477     return oss.str();
0478   }
0479 
0480   template <typename U>
0481   static std::string toString(const std::vector<U>& vector) {
0482     std::ostringstream oss{"{ "};
0483     for (const auto& data : vector) {
0484       oss << data << "  \t";
0485     }
0486     oss << " }";
0487     return oss.str();
0488   }
0489 };