DAS  3.0
Das Analysis System
Flow.h
Go to the documentation of this file.
1 // SPDX-License-Identifier: GPLv3-or-later
2 //
3 // SPDX-FileCopyrightText: Patrick L.S. Connor <patrick.connor@desy.de>
4 // SPDX-FileCopyrightText: Louis Moureaux <louis.moureaux@cern.ch>
5 
6 #pragma once
7 
8 #include "exceptions.h"
9 #include "FriendUtils.h"
10 #include "Looper.h"
11 
12 #include <TChain.h>
13 #include <TFile.h>
14 #include <TH1.h>
15 #include <TTree.h>
16 
17 #include <any>
18 #include <array>
19 #include <filesystem>
20 #include <map>
21 #include <memory>
22 #include <ranges>
23 #include <source_location>
24 #include <string>
25 #include <vector>
26 
27 namespace Darwin::Tools {
28 
29 enum BranchMode {
32 };
33 
34 using enum BranchMode;
35 
36 using Slice = std::pair<int, int>;
37 
40 inline std::ostream& operator<< (std::ostream& Stream, const Darwin::Tools::Slice& slice)
41 {
42  return Stream << slice.second << '/' << slice.first;
43 }
44 
78 class Flow {
79 
80  int steering;
81 
82  std::vector<std::filesystem::path> inputs;
83 
84  std::unique_ptr<ChainSlice> tIn;
85  std::shared_ptr<TFile> fOut;
86  std::unique_ptr<TTree> tOut;
87 
88  std::map<std::string, std::any> branches;
89 
92  template<typename T> requires std::is_compound<T>::value
93  std::shared_ptr<T*> branch_cast (std::any& a)
94  try {
95  return std::any_cast<std::shared_ptr<T*>>(a);
96  }
97  catch (const std::bad_any_cast& e) {
98  BOOST_THROW_EXCEPTION(e);
99  }
100 
103  template<typename T> requires std::is_fundamental<T>::value
104  std::shared_ptr<T> branch_cast (std::any& a)
105  try {
106  return std::any_cast<std::shared_ptr<T>>(a);
107  }
108  catch (const std::bad_any_cast& e) {
109  BOOST_THROW_EXCEPTION(e);
110  }
111 
114  template<typename T> requires std::is_compound<T>::value
115  T * GetBranchAddress (std::shared_ptr<T*> ptr)
116  {
117  return *ptr;
118  }
119 
122  template<typename T> requires std::is_fundamental<T>::value
123  T * GetBranchAddress (std::shared_ptr<T> ptr)
124  {
125  return ptr.get();
126  }
127 
130  template<typename T>
131  T * GetBranchAddress (const std::string& name)
132  {
133  using namespace std;
134  any& a = branches[name];
135  shared_ptr ptr = branch_cast<T>(a);
136  return GetBranchAddress<T>(ptr);
137  }
138 
141  template<typename T> T * NoBranch (const std::string& name, BranchMode mode)
142  {
143  if (mode == facultative)
144  return nullptr;
145 
146  namespace DE = Darwin::Exceptions;
147  std::string what = name + " branch could not be found";
148  BOOST_THROW_EXCEPTION( DE::BadInput(what.c_str(), *tIn) );
149  }
150 
151 public:
154  std::string DumpActiveBranches () const
155  {
156  auto keys = branches | std::views::keys;
157  return std::accumulate(keys.begin(), keys.end(), std::string(),
158  [](const std::string& str, const std::string& branch_name) {
159  return str + ' ' + branch_name;
160  });
161  }
162 
167  Flow (int = none,
168  const std::vector<std::filesystem::path>& = {}
169  );
170 
175  ~Flow ();
176 
179  ChainSlice * GetInputTree
180  (const Slice,
181  const std::string& = "events");
182 
185  ChainSlice * GetInputTree
186  (const std::string& = "events");
187 
193  template<typename THX = TH1, size_t N>
194  std::array<std::unique_ptr<THX>, N> GetInputHists
195  (const std::array<std::string, N>& names = {}
196  )
197  {
198  using namespace std;
199  namespace fs = filesystem;
200 
201  if (inputs.size() == 0)
202  BOOST_THROW_EXCEPTION( invalid_argument("Empty list of input files") );
203 
204  array<unique_ptr<THX>, N> sums;
205  for (const fs::path& input: inputs) {
206  auto fIn = make_unique<TFile>(input.c_str(), "READ");
207  for (size_t i = 0; i < N; ++i) {
208  const string& name = names[i];
209  unique_ptr<THX> h(fIn->Get<THX>(name.c_str()));
210  if (!h) {
211  namespace DE = Darwin::Exceptions;
212  BOOST_THROW_EXCEPTION(
213  DE::BadInput(Form("`%s` cannot be found in (one of) the "
214  " file(s).", name.c_str()), fIn));
215  }
216  if (sums[i])
217  sums[i]->Add(h.get());
218  else {
219  sums[i] = std::move(h);
220  sums[i]->SetDirectory(nullptr);
221  }
222  }
223  }
224  return sums;
225  }
226 
232  template<typename THX = TH1, typename... Args>
233  auto GetInputHists (const Args... args)
234  {
235  constexpr const size_t N = sizeof...(args);
236  std::array<std::string, N> names {{ args... }};
237  return GetInputHists<THX, N>(names);
238  }
239 
244  template<typename THX = TH1>
245  std::unique_ptr<THX> GetInputHist
246  (const std::string& name)
247  {
248  auto hists = GetInputHists<THX,1>({name});
249  THX * hist = hists.front().release();
250  return std::unique_ptr<THX>(hist);
251  }
252 
266  TTree * GetOutputTree (std::shared_ptr<TFile> = {},
267  const std::source_location = std::source_location::current());
268 
271  TTree * GetOutputTree (const std::filesystem::path&,
272  const std::source_location = std::source_location::current());
273 
276  void SetOutputFile (std::shared_ptr<TFile> fOut) { this->fOut = fOut; }
277 
280  TFile * GetOutputFile () { return fOut.get(); }
281 
284  std::pair<TFile *, TTree *> GetOutput (const std::filesystem::path&,
285  const std::source_location = std::source_location::current());
286 
297  template<typename T>
299  const std::string& name,
300  BranchMode mode = mandatory)
301  {
302  using namespace std;
303 
304  if (!tIn)
305  BOOST_THROW_EXCEPTION( invalid_argument("`GetInputTree()` should "
306  "be called before declaring a read-only branch") );
307 
308  if (!branches.contains(name)) {
309 
310  any branch;
311  if constexpr (is_compound<T>::value)
312  branch = make_shared<T*>();
313  else
314  branch = make_shared<T>();
315 
316  if (steering & verbose)
317  cout << "Flow: loading read-only branch `" << name << "`" << endl;
318 
319  if (tIn->GetBranch(name.c_str()) == nullptr)
320  return NoBranch<T>(name, mode);
321 
322  int err = tIn->SetBranchAddress(name.c_str(), branch_cast<T>(branch).get());
323  if (steering & verbose)
324  cout << "Flow: `TTree::SetBranchAddress()` returned " << to_string(err)
325  << " (check `TTree::ESetBranchAddressStatus` for the meaning)."
326  << endl;
327  if (err < 0) {
328  string what = "`"s + name + "` branch could not be set. "s;
329  if (mode == facultative) {
330  if (steering & verbose)
331  cout << orange << "Flow: " << what << def << endl;
332  return nullptr;
333  }
334 
335  namespace DE = Darwin::Exceptions;
336  BOOST_THROW_EXCEPTION( DE::BadInput(what.c_str(), *tIn) );
337  }
338  branches.emplace(name, branch);
339  }
340 
341  return GetBranchAddress<T>(name);
342  }
343 
352  template<typename T>
353  T * GetBranchWriteOnly (const std::string& name)
354  {
355  using namespace std;
356 
357  if (!tOut)
358  BOOST_THROW_EXCEPTION( invalid_argument("`GetOutputTree()` should "
359  "be called before") );
360 
361  if (!branches.contains(name)) {
362  if constexpr (is_compound<T>::value)
363  branches.emplace(name, make_shared<T*>());
364  else
365  branches.emplace(name, make_shared<T>());
366  }
367 
368  any branch = branches[name];
369  shared_ptr ptr = branch_cast<T>(branch);
370 
371  if (steering & verbose)
372  cout << "Flow: setting up a write-only branch for `" << name << "`" << endl;
373 
374  if (tOut->Branch(name.c_str(), ptr.get()) == nullptr) {
375  namespace DE = Darwin::Exceptions;
376  string what = name + " branch could not be set up";
377  BOOST_THROW_EXCEPTION( DE::BadInput(what.c_str(), *tOut) );
378  }
379 
380  return GetBranchAddress<T>(ptr);
381  }
382 
391  template<typename T>
392  T * GetBranchReadWrite (const std::string& name,
393  BranchMode mode = mandatory)
394  {
395  using namespace std;
396 
397  if (!tIn)
398  BOOST_THROW_EXCEPTION( invalid_argument("`GetInputTree()` should "
399  "be called before declaring a read-write branch") );
400 
401  if (!tOut)
402  BOOST_THROW_EXCEPTION( invalid_argument("`GetOutputTree()` should "
403  "be called before declaring a read-write branch") );
404 
405  if (!branches.contains(name)) {
406  if (GetBranchReadOnly<T>(name, mode) == nullptr)
407  return NoBranch<T>(name, mode);
408  if (steering & Friend)
409  GetBranchWriteOnly<T>(name);
410  }
411 
412  return GetBranchAddress<T>(name);
413  }
414 };
415 
416 } // namespace Darwin::Tools
417 
418 using Darwin::Tools::operator<<;
DYToLL_M-50_13TeV_pythia8_cff_GEN_SIM_RECOBEFMIX_DIGI_L1_DIGI2RAW_L1Reco_RECO.name
name
Definition: DYToLL_M-50_13TeV_pythia8_cff_GEN_SIM_RECOBEFMIX_DIGI_L1_DIGI2RAW_L1Reco_RECO.py:48
Darwin::Tools::Flow::branches
std::map< std::string, std::any > branches
pointers to mounted branches
Definition: Flow.h:88
Darwin::Tools::mandatory
@ mandatory
mounting branch is mandatory
Definition: Flow.h:30
Darwin::Tools::Flow
User-friendly handling of input and output n-tuples.
Definition: Flow.h:78
exceptions.h
Step::def
static const char * def
Definition: Step.h:36
Darwin::Tools::Friend
@ Friend
activate -F to only fill the new branches
Definition: Options.h:28
Ntupliser_cfg.args
args
Definition: Ntupliser_cfg.py:11
Darwin::Tools::Flow::GetBranchWriteOnly
T * GetBranchWriteOnly(const std::string &name)
Wrapper to initialise write-only branches.
Definition: Flow.h:353
Darwin::Tools::Slice
std::pair< int, int > Slice
total number of slices (>0) / current slice index (>0)
Definition: Flow.h:36
Darwin::Tools::Flow::tOut
std::unique_ptr< TTree > tOut
output tree
Definition: Flow.h:86
Darwin::Tools::Flow::fOut
std::shared_ptr< TFile > fOut
output ROOT file
Definition: Flow.h:85
Darwin::Tools::BranchMode
BranchMode
Definition: Flow.h:29
Darwin::Tools::none
@ none
default (for simple executables)
Definition: Options.h:24
Darwin::Tools::Flow::GetBranchAddress
requires std::is_fundamental< T >::value T * GetBranchAddress(std::shared_ptr< T > ptr)
Get branch address for fundamental types.
Definition: Flow.h:123
Darwin::Tools::Flow::GetBranchReadWrite
T * GetBranchReadWrite(const std::string &name, BranchMode mode=mandatory)
Wrapper to initialise read-write branches.
Definition: Flow.h:392
Darwin::Tools::Flow::branch_cast
requires std::is_fundamental< T >::value std::shared_ptr< T > branch_cast(std::any &a)
Casts from std::any to shared_ptr
Definition: Flow.h:104
Darwin::Tools::Flow::GetBranchAddress
requires std::is_compound< T >::value T * GetBranchAddress(std::shared_ptr< T * > ptr)
Get branch address for compound types.
Definition: Flow.h:115
Darwin::Tools::Flow::GetInputHists
std::array< std::unique_ptr< THX >, N > GetInputHists(const std::array< std::string, N > &names={})
Load ROOT histograms from a list of files.
Definition: Flow.h:195
Darwin::Tools::Flow::GetInputHists
auto GetInputHists(const Args... args)
Load ROOT histograms from a list of files.
Definition: Flow.h:233
Darwin::Tools::Flow::Flow
Flow(int=none, const std::vector< std::filesystem::path > &={})
Constructor.
Definition: Flow.cc:11
Looper.h
Darwin::Tools::Flow::NoBranch
T * NoBranch(const std::string &name, BranchMode mode)
Wrapper either to return a nullptr or to throw an error.
Definition: Flow.h:141
Darwin::Exceptions
Handling of exceptions.
Definition: darwin.h:36
Darwin::Tools::verbose
@ verbose
bit for debug mode (-v is always available)
Definition: Options.h:30
orange
static const char * orange
Definition: colours.h:6
DYToLL_M-50_13TeV_pythia8_cff_GEN_SIM_RECOBEFMIX_DIGI_L1_DIGI2RAW_L1Reco_RECO.input
input
Definition: DYToLL_M-50_13TeV_pythia8_cff_GEN_SIM_RECOBEFMIX_DIGI_L1_DIGI2RAW_L1Reco_RECO.py:35
Darwin::Tools::operator<<
std::ostream & operator<<(std::ostream &Stream, const Darwin::Tools::Slice &slice)
Prints the current slice and the total number of slices.
Definition: Flow.h:40
FriendUtils.h
Darwin::Tools::Flow::GetInputTree
ChainSlice * GetInputTree(const Slice, const std::string &="events")
Load chain from a list of files with non-empty trees.
Definition: Flow.cc:67
Darwin::Tools::Flow::DumpActiveBranches
std::string DumpActiveBranches() const
Returns a list of all active branches, formatted as a string.
Definition: Flow.h:154
Darwin::Tools::Flow::branch_cast
requires std::is_compound< T >::value std::shared_ptr< T * > branch_cast(std::any &a)
Casts from std::any to shared_ptr
Definition: Flow.h:93
Darwin::Tools::Flow::inputs
std::vector< std::filesystem::path > inputs
ROOT files or directories.
Definition: Flow.h:82
Darwin::Tools
Classes and functions related to the framework.
Definition: Darwin_dict.cxx:1144
Darwin::Tools::Flow::GetOutputTree
TTree * GetOutputTree(std::shared_ptr< TFile >={}, const std::source_location=std::source_location::current())
Create an output TTree object.
Definition: Flow.cc:89
Darwin::Tools::Flow::GetOutput
std::pair< TFile *, TTree * > GetOutput(const std::filesystem::path &, const std::source_location=std::source_location::current())
Get both the output file and the output tree in one go.
Definition: Flow.cc:164
Darwin::Tools::Flow::GetBranchReadOnly
T * GetBranchReadOnly(const std::string &name, BranchMode mode=mandatory)
Wrapper to initialise read-only branches.
Definition: Flow.h:298
Darwin::Tools::Flow::steering
int steering
steering from Options, mostly useful for friends
Definition: Flow.h:80
Darwin::Tools::Flow::~Flow
~Flow()
Destructor.
Definition: Flow.cc:19
Darwin::Tools::Flow::tIn
std::unique_ptr< ChainSlice > tIn
input chain
Definition: Flow.h:84
Darwin::Tools::Flow::GetBranchAddress
T * GetBranchAddress(const std::string &name)
Get branch address from name (universal)
Definition: Flow.h:131
Darwin::Tools::Flow::GetInputHist
std::unique_ptr< THX > GetInputHist(const std::string &name)
Load a single ROOT histogram from a list of files.
Definition: Flow.h:246
Darwin::Tools::Flow::SetOutputFile
void SetOutputFile(std::shared_ptr< TFile > fOut)
Set the output file where the output TTree should be saved.
Definition: Flow.h:276
Darwin::Tools::Flow::GetOutputFile
TFile * GetOutputFile()
Get a raw pointer to the output file.
Definition: Flow.h:280
Darwin::Exceptions::BadInput
Generic exception for ill-defined input (before the event loop).
Definition: exceptions.h:83
Darwin::Tools::facultative
@ facultative
mounting branch is facultative
Definition: Flow.h:31