Back to home page

sPhenix code displayed by LXR

 
 

    


File indexing completed on 2025-08-05 08:10:11

0001 // This file is part of the Acts project.
0002 //
0003 // Copyright (C) 2017 CERN for the benefit of the Acts project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
0008 
0009 // This ROOT script compares two ROOT files in an order-insensitive way. Its
0010 // intended use is to compare the output of a single-threaded and multi-threaded
0011 // programs in order to check that results are perfectly reproducible.
0012 //
0013 // As a current limitation, which may be lifted in the future, the script does
0014 // all of its processing in RAM, which means that the input dataset must fit in
0015 // RAM. So do not try to run this on terabytes of data. You don't need that much
0016 // data to check that your multithreaded program runs well anyhow.
0017 //
0018 // Another limitation is that the comparison relies on perfect output
0019 // reproducibility, which is a very costly guarantee to achieve in a
0020 // multi-threaded environment. If you want to compare "slightly different"
0021 // outputs, this script will not work as currently written. I cannot think of a
0022 // way in which imperfect reproducibility could be checked in a manner which
0023 // doesn't depend on the details of the data being compared.
0024 
0025 #include <cstring>
0026 #include <map>
0027 #include <string>
0028 #include <utility>
0029 #include <vector>
0030 
0031 #include "TBranch.h"
0032 #include "TFile.h"
0033 #include "TKey.h"
0034 #include "TList.h"
0035 #include "TObject.h"
0036 #include "TTree.h"
0037 #include "TTreeReader.h"
0038 
0039 #include "compareRootFiles.hpp"
0040 
0041 // Minimal mechanism for assertion checking and comparison
0042 #define CHECK(pred, msg)                                                       \
0043   if (!(pred)) {                                                               \
0044     std::cout << msg << std::endl;                                             \
0045     return 1;                                                                  \
0046   }
0047 
0048 #define CHECK_EQUAL(v1, v2, msg)                                               \
0049   CHECK((v1) == (v2), msg << "(" << (v1) << " vs " << (v2) << ") ")
0050 
0051 #define CHECK_STR_EQUAL(s1, s2, msg)                                           \
0052   CHECK(strcmp((s1), (s2)) == 0, msg << " (" << (s1) << " vs " << (s2) << ") ")
0053 
0054 // This script returns 0 if the files have identical contents except for event
0055 // ordering, and a nonzero result if the contents differ or an error occurred.
0056 //
0057 // If the optional dump_data_on_failure flag is set, it will also dump the
0058 // mismatching event data to stdout on failure for manual inspection.
0059 //
0060 // If the optional skip_unsupported_branches flag is set, the script will ignore
0061 // unsupported branch types in the input file instead of aborting.
0062 //
0063 
0064 int
0065 compareRootFiles(std::string file1,
0066                  std::string file2,
0067                  bool        dump_data_on_failure      = false,
0068                  bool        skip_unsupported_branches = false)
0069 {
0070   std::cout << "Comparing ROOT files " << file1 << " and " << file2
0071             << std::endl;
0072 
0073   std::cout << "* Opening the files..." << std::endl;
0074   HomogeneousPair<TFile> files{file1.c_str(), file2.c_str()};
0075   if (files.first.IsZombie()) {
0076     std::cout << "  - Could not open file " << file1 << "!" << std::endl;
0077     return 2;
0078   } else if (files.second.IsZombie()) {
0079     std::cout << "  - Could not open file " << file2 << "!" << std::endl;
0080     return 2;
0081   }
0082 
0083   std::cout << "* Extracting file keys..." << std::endl;
0084   HomogeneousPair<std::vector<TKey*>> fileKeys;
0085   {
0086     // This is how we would extract keys from one file
0087     const auto loadKeys = [](const TFile& file, std::vector<TKey*>& target) {
0088       const int keyCount = file.GetNkeys();
0089       target.reserve(keyCount);
0090       TIter keyIter{file.GetListOfKeys()};
0091       for (int i = 0; i < keyCount; ++i) {
0092         target.emplace_back(dynamic_cast<TKey*>(keyIter()));
0093       }
0094     };
0095 
0096     // Do it for each of our files
0097     loadKeys(files.first, fileKeys.first);
0098     loadKeys(files.second, fileKeys.second);
0099   }
0100 
0101   std::cout << "* Selecting the latest key cycle..." << std::endl;
0102   std::vector<HomogeneousPair<TKey*>> keyPairs;
0103   {
0104     // For each file and for each key name, we want to know what is the latest
0105     // key cycle, and who is the associated key object
0106     using KeyMetadata  = std::pair<short, TKey*>;
0107     using FileMetadata = std::map<std::string, KeyMetadata>;
0108     HomogeneousPair<FileMetadata> metadata;
0109 
0110     // This is how we compute this metadata for a single file
0111     const auto findLatestCycle
0112         = [](const std::vector<TKey*>& keys, FileMetadata& target) {
0113             // Iterate through the file's keys
0114             for (const auto key : keys) {
0115               // Extract information about the active key
0116               const std::string keyName{key->GetName()};
0117               const short       newCycle{key->GetCycle()};
0118 
0119               // Do we already know of a key with the same name?
0120               auto latestCycleIter = target.find(keyName);
0121               if (latestCycleIter != target.end()) {
0122                 // If so, keep the key with the most recent cycle number
0123                 auto& latestCycleMetadata = latestCycleIter->second;
0124                 if (newCycle > latestCycleMetadata.first) {
0125                   latestCycleMetadata = {newCycle, key};
0126                 }
0127               } else {
0128                 // If not, this is obviously the most recent key we've seen so
0129                 // far
0130                 target.emplace(keyName, KeyMetadata{newCycle, key});
0131               }
0132             }
0133           };
0134 
0135     // We'll compute this information for both of our files...
0136     std::cout << "  - Finding the latest cycle for each file..." << std::endl;
0137     findLatestCycle(fileKeys.first, metadata.first);
0138     findLatestCycle(fileKeys.second, metadata.second);
0139 
0140     // ...and then we'll group the latest keys by name, detect keys which only
0141     // exist in a single file along the way, and report that as an error
0142     std::cout << "  - Grouping per-file latest keys..." << std::endl;
0143     {
0144       // Make sure that both files have the same amount of keys once duplicate
0145       // versions are removed
0146       const auto f1KeyCount = metadata.first.size();
0147       const auto f2KeyCount = metadata.second.size();
0148       CHECK_EQUAL(
0149           f1KeyCount, f2KeyCount, "    o Number of keys does not match");
0150       keyPairs.reserve(f1KeyCount);
0151 
0152       // Iterate through the keys, in the same order (as guaranteed by std::map)
0153       for (auto f1MetadataIter = metadata.first.cbegin(),
0154                 f2MetadataIter = metadata.second.cbegin();
0155            f1MetadataIter != metadata.first.cend();
0156            ++f1MetadataIter, ++f2MetadataIter) {
0157         // Do the keys have the same name?
0158         const auto& f1KeyName = f1MetadataIter->first;
0159         const auto& f2KeyName = f2MetadataIter->first;
0160         CHECK_EQUAL(f1KeyName, f2KeyName, "    o Key names do not match");
0161 
0162         // If so, extract the associated key pair
0163         keyPairs.emplace_back(f1MetadataIter->second.second,
0164                               f2MetadataIter->second.second);
0165       }
0166     }
0167   }
0168 
0169   std::cout << "* Comparing key metadata..." << std::endl;
0170   for (const auto& keyPair : keyPairs) {
0171     const auto& key1 = keyPair.first;
0172     const auto& key2 = keyPair.second;
0173 
0174     CHECK_STR_EQUAL(key1->GetClassName(),
0175                     key2->GetClassName(),
0176                     "  - Class name does not match!");
0177     CHECK_STR_EQUAL(
0178         key1->GetTitle(), key2->GetTitle(), "  - Title does not match!");
0179     CHECK_EQUAL(key1->GetVersion(),
0180                 key2->GetVersion(),
0181                 "  - Key version does not match!");
0182   }
0183 
0184   // NOTE: The current version of this script only supports TTree file contents.
0185   //       It may be extended later if the need for other data formats arise.
0186   std::cout << "* Extracting TTrees..." << std::endl;
0187   std::vector<HomogeneousPair<TTree*>> treePairs;
0188   for (const auto& keyPair : keyPairs) {
0189     TObject* obj1 = keyPair.first->ReadObj();
0190     TObject* obj2 = keyPair.second->ReadObj();
0191 
0192     CHECK_STR_EQUAL(obj1->ClassName(),
0193                     obj2->ClassName(),
0194                     "  - Object type does not match!");
0195     CHECK_STR_EQUAL(
0196         obj1->ClassName(), "TTree", "  - Non-TTree input is not supported!");
0197 
0198     treePairs.emplace_back(dynamic_cast<TTree*>(obj1),
0199                            dynamic_cast<TTree*>(obj2));
0200   }
0201 
0202   std::cout << "* Comparing the trees..." << std::endl;
0203   for (const auto& treePair : treePairs) {
0204     const auto& tree1 = treePair.first;
0205     const auto& tree2 = treePair.second;
0206 
0207     std::cout << "  - Comparing tree " << tree1->GetName() << "..."
0208               << std::endl;
0209 
0210     std::cout << "    o Comparing tree-wide metadata..." << std::endl;
0211     const std::size_t t1EntryCount = tree1->GetEntries();
0212     {
0213       const std::size_t t2EntryCount = tree2->GetEntries();
0214       CHECK_EQUAL(t1EntryCount,
0215                   t2EntryCount,
0216                   "      ~ Number of entries does not match!");
0217     }
0218 
0219     if (t1EntryCount == 0) {
0220       std::cout << "    o Skipping empty tree!" << std::endl;
0221       continue;
0222     }
0223 
0224     std::cout << "    o Preparing for tree readout..." << std::endl;
0225     TTreeReader                           t1Reader(tree1);
0226     TTreeReader                           t2Reader(tree2);
0227     BranchComparisonHarness::TreeMetadata treeMetadata{
0228         t1Reader, t2Reader, t1EntryCount};
0229 
0230     std::cout << "    o Comparing branch metadata..." << std::endl;
0231     std::vector<HomogeneousPair<TBranch*>> branchPairs;
0232     {
0233       // Check number of branches and allocate branch storage
0234       const int t1BranchCount = tree1->GetNbranches();
0235       const int t2BranchCount = tree2->GetNbranches();
0236       CHECK_EQUAL(t1BranchCount,
0237                   t2BranchCount,
0238                   "      ~ Number of branches does not match!");
0239       branchPairs.reserve(t1BranchCount);
0240 
0241       // Extract branches using TTree::GetListOfBranches()
0242       TIter t1BranchIter{tree1->GetListOfBranches()};
0243       TIter t2BranchIter{tree2->GetListOfBranches()};
0244       for (int i = 0; i < t1BranchCount; ++i) {
0245         branchPairs.emplace_back(dynamic_cast<TBranch*>(t1BranchIter()),
0246                                  dynamic_cast<TBranch*>(t2BranchIter()));
0247       }
0248     }
0249 
0250     std::cout << "    o Setting up branch-specific processing..." << std::endl;
0251     std::vector<BranchComparisonHarness> branchComparisonHarnesses;
0252     branchComparisonHarnesses.reserve(branchPairs.size());
0253     for (const auto& branchPair : branchPairs) {
0254       const auto& branch1 = branchPair.first;
0255       const auto& branch2 = branchPair.second;
0256 
0257       std::cout << "      ~ Checking branch metadata..." << std::endl;
0258       std::string b1ClassName, b1BranchName;
0259       EDataType   b1DataType;
0260       {
0261         std::string b2ClassName, b2BranchName;
0262         EDataType   b2DataType;
0263         TClass*     unused;
0264 
0265         b1ClassName = branch1->GetClassName();
0266         b2ClassName = branch2->GetClassName();
0267         CHECK_EQUAL(
0268             b1ClassName, b2ClassName, "        + Class name does not match!");
0269         branch1->GetExpectedType(unused, b1DataType);
0270         branch2->GetExpectedType(unused, b2DataType);
0271         CHECK_EQUAL(
0272             b1DataType, b2DataType, "        + Raw data type does not match!");
0273         const int b1LeafCount = branch1->GetNleaves();
0274         const int b2LeafCount = branch2->GetNleaves();
0275         CHECK_EQUAL(b1LeafCount,
0276                     b2LeafCount,
0277                     "        + Number of leaves does not match!");
0278         CHECK_EQUAL(
0279             b1LeafCount,
0280             1,
0281             "        + Branches with several leaves are not supported!");
0282         b1BranchName = branch1->GetName();
0283         b2BranchName = branch2->GetName();
0284         CHECK_EQUAL(b1BranchName,
0285                     b2BranchName,
0286                     "        + Branch name does not match!");
0287       }
0288 
0289       std::cout << "      ~ Building comparison harness for branch "
0290                 << b1BranchName << "..." << std::endl;
0291       try {
0292         auto branchHarness = BranchComparisonHarness::create(
0293             treeMetadata, b1BranchName, b1DataType, b1ClassName);
0294         branchComparisonHarnesses.emplace_back(std::move(branchHarness));
0295       } catch (BranchComparisonHarness::UnsupportedBranchType) {
0296         // When encountering an unsupported branch type, we can either skip
0297         // the branch or abort depending on configuration
0298         std::cout << "        + Unsupported branch type! "
0299                   << "(eDataType: " << b1DataType << ", ClassName: \""
0300                   << b1ClassName << "\")" << std::endl;
0301         if (skip_unsupported_branches) {
0302           continue;
0303         } else {
0304           return 3;
0305         }
0306       }
0307     }
0308 
0309     std::cout << "    o Reading event data..." << std::endl;
0310     for (std::size_t i = 0; i < t1EntryCount; ++i) {
0311       // Move to the next TTree entry (= next event)
0312       t1Reader.Next();
0313       t2Reader.Next();
0314 
0315       // Load the data associated with each branch
0316       for (auto& branchHarness : branchComparisonHarnesses) {
0317         branchHarness.loadCurrentEvent();
0318       }
0319     }
0320 
0321     std::cout << "    o Sorting the first tree..." << std::endl;
0322     {
0323       std::cout << "      ~ Defining event comparison operator..." << std::endl;
0324       IndexComparator t1CompareEvents
0325           = [&branchComparisonHarnesses](std::size_t i,
0326                                          std::size_t j) -> Ordering {
0327         for (auto& branchHarness : branchComparisonHarnesses) {
0328           const auto order = branchHarness.sortHarness.first.first(i, j);
0329           if (order != Ordering::EQUAL) { return order; }
0330         }
0331         return Ordering::EQUAL;
0332       };
0333 
0334       std::cout << "      ~ Defining event swapping operator..." << std::endl;
0335       IndexSwapper t1SwapEvents
0336           = [&branchComparisonHarnesses](std::size_t i, std::size_t j) {
0337               for (auto& branchHarness : branchComparisonHarnesses) {
0338                 branchHarness.sortHarness.first.second(i, j);
0339               }
0340             };
0341 
0342       std::cout << "      ~ Running quicksort on the tree..." << std::endl;
0343       quickSort(0, t1EntryCount - 1, t1CompareEvents, t1SwapEvents);
0344     }
0345 
0346     std::cout << "    o Sorting the second tree..." << std::endl;
0347     {
0348       std::cout << "      ~ Defining event comparison operator..." << std::endl;
0349       IndexComparator t2CompareEvents
0350           = [&branchComparisonHarnesses](std::size_t i,
0351                                          std::size_t j) -> Ordering {
0352         for (auto& branchHarness : branchComparisonHarnesses) {
0353           const auto order = branchHarness.sortHarness.second.first(i, j);
0354           if (order != Ordering::EQUAL) { return order; }
0355         }
0356         return Ordering::EQUAL;
0357       };
0358 
0359       std::cout << "      ~ Defining event swapping operator..." << std::endl;
0360       IndexSwapper t2SwapEvents
0361           = [&branchComparisonHarnesses](std::size_t i, std::size_t j) {
0362               for (auto& branchHarness : branchComparisonHarnesses) {
0363                 branchHarness.sortHarness.second.second(i, j);
0364               }
0365             };
0366 
0367       std::cout << "      ~ Running quicksort on the tree..." << std::endl;
0368       quickSort(0, t1EntryCount - 1, t2CompareEvents, t2SwapEvents);
0369     }
0370 
0371     std::cout << "    o Checking that both trees are now equal..." << std::endl;
0372     for (auto& branchHarness : branchComparisonHarnesses) {
0373       std::cout << "      ~ Comparing branch " << branchHarness.branchName
0374                 << "..." << std::endl;
0375       if (!branchHarness.eventDataEqual()) {
0376         std::cout << "        + Branch contents do not match!" << std::endl;
0377         if (dump_data_on_failure) {
0378           std::cout << "        + Dumping branch contents:" << std::endl;
0379           branchHarness.dumpEventData();
0380         }
0381         return 4;
0382       }
0383     }
0384   }
0385 
0386   std::cout << "* Input files are equal, event order aside!" << std::endl;
0387   return 0;
0388 }