DAS  3.0
Das Analysis System
Flow.h
Go to the documentation of this file.
1 // SPDX-License-Identifier: GPLv3-or-later
2 //
3 // SPDX-FileCopyrightText: Patrick L.S. Connor <patrick.connor@desy.de>
4 // SPDX-FileCopyrightText: Louis Moureaux <louis.moureaux@cern.ch>
5 
6 #pragma once
7 
8 #include "exceptions.h"
9 #include "FriendUtils.h"
10 #include "IPlugin.h"
11 #include "Looper.h"
12 #include "PluginLoader.h"
13 
14 #include <TChain.h>
15 #include <TFile.h>
16 #include <TH1.h>
17 #include <TTree.h>
18 
19 #include <any>
20 #include <array>
21 #include <filesystem>
22 #include <map>
23 #include <memory>
24 #include <ranges>
25 #include <source_location>
26 #include <string>
27 #include <vector>
28 
29 namespace Darwin::Tools {
30 
31 enum BranchMode {
34 };
35 
36 using enum BranchMode;
37 
38 using Slice = std::pair<int, int>;
39 
42 inline std::ostream& operator<< (std::ostream& Stream, const Darwin::Tools::Slice& slice)
43 {
44  return Stream << slice.second << '/' << slice.first;
45 }
46 
80 class Flow {
81 
82  int steering;
83 
84  std::vector<PluginLoader> plugin_loaders;
85  std::vector<std::filesystem::path> inputs;
86 
87  std::unique_ptr<ChainSlice> tIn;
88  std::shared_ptr<TFile> fOut;
89  std::unique_ptr<TTree> tOut;
90 
91  std::map<std::string, std::any> branches;
92 
95  template<typename T> requires std::is_compound<T>::value
96  std::shared_ptr<T*> branch_cast (std::any& a)
97  try {
98  return std::any_cast<std::shared_ptr<T*>>(a);
99  }
100  catch (const std::bad_any_cast& e) {
101  BOOST_THROW_EXCEPTION(e);
102  }
103 
106  template<typename T> requires std::is_fundamental<T>::value
107  std::shared_ptr<T> branch_cast (std::any& a)
108  try {
109  return std::any_cast<std::shared_ptr<T>>(a);
110  }
111  catch (const std::bad_any_cast& e) {
112  BOOST_THROW_EXCEPTION(e);
113  }
114 
117  template<typename T> requires std::is_compound<T>::value
118  T * GetBranchAddress (std::shared_ptr<T*> ptr)
119  {
120  return *ptr;
121  }
122 
125  template<typename T> requires std::is_fundamental<T>::value
126  T * GetBranchAddress (std::shared_ptr<T> ptr)
127  {
128  return ptr.get();
129  }
130 
133  template<typename T>
134  T * GetBranchAddress (const std::string& name)
135  {
136  using namespace std;
137  any& a = branches[name];
138  shared_ptr ptr = branch_cast<T>(a);
139  return GetBranchAddress<T>(ptr);
140  }
141 
144  template<typename T> T * NoBranch (const std::string& name, BranchMode mode)
145  {
146  if (mode == facultative)
147  return nullptr;
148 
149  namespace DE = Darwin::Exceptions;
150  std::string what = name + " branch could not be found";
151  BOOST_THROW_EXCEPTION( DE::BadInput(what.c_str(), *tIn) );
152  }
153 
154 public:
157  std::string DumpActiveBranches () const
158  {
159  auto keys = branches | std::views::keys;
160  return std::accumulate(keys.begin(), keys.end(), std::string(),
161  [](const std::string& str, const std::string& branch_name) {
162  return str + ' ' + branch_name;
163  });
164  }
165 
170  Flow (int = none,
171  const std::vector<std::filesystem::path>& = {}
172  );
173 
178  ~Flow ();
179 
182  ChainSlice * GetInputTree
183  (const Slice,
184  const std::string& = "events");
185 
188  ChainSlice * GetInputTree
189  (const std::string& = "events");
190 
196  template<typename THX = TH1, size_t N>
197  std::array<std::unique_ptr<THX>, N> GetInputHists
198  (const std::array<std::string, N>& names = {}
199  )
200  {
201  using namespace std;
202  namespace fs = filesystem;
203 
204  if (inputs.size() == 0)
205  BOOST_THROW_EXCEPTION( invalid_argument("Empty list of input files") );
206 
207  array<unique_ptr<THX>, N> sums;
208  for (const fs::path& input: inputs) {
209  auto fIn = make_unique<TFile>(input.c_str(), "READ");
210  for (size_t i = 0; i < N; ++i) {
211  const string& name = names[i];
212  unique_ptr<THX> h(fIn->Get<THX>(name.c_str()));
213  if (!h) {
214  namespace DE = Darwin::Exceptions;
215  BOOST_THROW_EXCEPTION(
216  DE::BadInput(Form("`%s` cannot be found in (one of) the "
217  " file(s).", name.c_str()), fIn));
218  }
219  if (sums[i])
220  sums[i]->Add(h.get());
221  else {
222  sums[i] = std::move(h);
223  sums[i]->SetDirectory(nullptr);
224  }
225  }
226  }
227  return sums;
228  }
229 
235  template<typename THX = TH1, typename... Args>
236  auto GetInputHists (const Args... args)
237  {
238  constexpr const size_t N = sizeof...(args);
239  std::array<std::string, N> names {{ args... }};
240  return GetInputHists<THX, N>(names);
241  }
242 
247  template<typename THX = TH1>
248  std::unique_ptr<THX> GetInputHist
249  (const std::string& name)
250  {
251  auto hists = GetInputHists<THX,1>({name});
252  THX * hist = hists.front().release();
253  return std::unique_ptr<THX>(hist);
254  }
255 
269  TTree * GetOutputTree (std::shared_ptr<TFile> = {},
270  const std::source_location = std::source_location::current());
271 
274  TTree * GetOutputTree (const std::filesystem::path&,
275  const std::source_location = std::source_location::current());
276 
279  void SetOutputFile (std::shared_ptr<TFile> fOut) { this->fOut = fOut; }
280 
283  TFile * GetOutputFile () { return fOut.get(); }
284 
287  std::pair<TFile *, TTree *> GetOutput (const std::filesystem::path&,
288  const std::source_location = std::source_location::current());
289 
300  template<typename T>
302  const std::string& name,
303  BranchMode mode = mandatory)
304  {
305  using namespace std;
306 
307  if (!tIn)
308  BOOST_THROW_EXCEPTION( invalid_argument("`GetInputTree()` should "
309  "be called before declaring a read-only branch") );
310 
311  if (!branches.contains(name)) {
312 
313  any branch;
314  if constexpr (is_compound<T>::value)
315  branch = make_shared<T*>();
316  else
317  branch = make_shared<T>();
318 
319  if (steering & verbose)
320  cout << "Flow: loading read-only branch `" << name << "`" << endl;
321 
322  if (tIn->GetBranch(name.c_str()) == nullptr)
323  return NoBranch<T>(name, mode);
324 
325  int err = tIn->SetBranchAddress(name.c_str(), branch_cast<T>(branch).get());
326  if (steering & verbose)
327  cout << "Flow: `TTree::SetBranchAddress()` returned " << to_string(err)
328  << " (check `TTree::ESetBranchAddressStatus` for the meaning)."
329  << endl;
330  if (err < 0) {
331  string what = "`"s + name + "` branch could not be set. "s;
332  if (mode == facultative) {
333  if (steering & verbose)
334  cout << orange << "Flow: " << what << def << endl;
335  return nullptr;
336  }
337 
338  namespace DE = Darwin::Exceptions;
339  BOOST_THROW_EXCEPTION( DE::BadInput(what.c_str(), *tIn) );
340  }
341  branches.emplace(name, branch);
342  }
343 
344  return GetBranchAddress<T>(name);
345  }
346 
355  template<typename T>
356  T * GetBranchWriteOnly (const std::string& name)
357  {
358  using namespace std;
359 
360  if (!tOut)
361  BOOST_THROW_EXCEPTION( invalid_argument("`GetOutputTree()` should "
362  "be called before") );
363 
364  if (!branches.contains(name)) {
365  if constexpr (is_compound<T>::value)
366  branches.emplace(name, make_shared<T*>());
367  else
368  branches.emplace(name, make_shared<T>());
369  }
370 
371  any branch = branches[name];
372  shared_ptr ptr = branch_cast<T>(branch);
373 
374  if (steering & verbose)
375  cout << "Flow: setting up a write-only branch for `" << name << "`" << endl;
376 
377  if (tOut->Branch(name.c_str(), ptr.get()) == nullptr) {
378  namespace DE = Darwin::Exceptions;
379  string what = name + " branch could not be set up";
380  BOOST_THROW_EXCEPTION( DE::BadInput(what.c_str(), *tOut) );
381  }
382 
383  return GetBranchAddress<T>(ptr);
384  }
385 
394  template<typename T>
395  T * GetBranchReadWrite (const std::string& name,
396  BranchMode mode = mandatory)
397  {
398  using namespace std;
399 
400  if (!tIn)
401  BOOST_THROW_EXCEPTION( invalid_argument("`GetInputTree()` should "
402  "be called before declaring a read-write branch") );
403 
404  if (!tOut)
405  BOOST_THROW_EXCEPTION( invalid_argument("`GetOutputTree()` should "
406  "be called before declaring a read-write branch") );
407 
408  if (!branches.contains(name)) {
409  if (GetBranchReadOnly<T>(name, mode) == nullptr)
410  return NoBranch<T>(name, mode);
411  if (steering & Friend)
412  GetBranchWriteOnly<T>(name);
413  }
414 
415  return GetBranchAddress<T>(name);
416  }
417 
424  template<Plugin P, std::ranges::range R> std::vector<P*> GetPlugins (R&& plugin_paths)
425  {
426  using namespace std;
427  namespace fs = filesystem;
428 
429  for (fs::path file: plugin_paths)
430  plugin_loaders.emplace_back(file, steering);
431 
432  auto plugins = plugin_loaders | views::transform(&PluginLoader::Get<P>)
433  | views::join
434  | views::common;
435  return vector<P*>(plugins.begin(), plugins.end());
436  }
437 };
438 
439 } // namespace Darwin::Tools
440 
441 using Darwin::Tools::operator<<;
DYToLL_M-50_13TeV_pythia8_cff_GEN_SIM_RECOBEFMIX_DIGI_L1_DIGI2RAW_L1Reco_RECO.name
name
Definition: DYToLL_M-50_13TeV_pythia8_cff_GEN_SIM_RECOBEFMIX_DIGI_L1_DIGI2RAW_L1Reco_RECO.py:48
Darwin::Tools::Flow::branches
std::map< std::string, std::any > branches
pointers to mounted branches
Definition: Flow.h:91
Darwin::Tools::mandatory
@ mandatory
mounting branch is mandatory
Definition: Flow.h:32
Darwin::Tools::Flow::plugin_loaders
std::vector< PluginLoader > plugin_loaders
Definition: Flow.h:84
Darwin::Tools::Flow
User-friendly handling of input and output n-tuples.
Definition: Flow.h:80
exceptions.h
Step::def
static const char * def
Definition: Step.h:36
Darwin::Tools::Friend
@ Friend
activate -F to only fill the new branches
Definition: Options.h:33
Ntupliser_cfg.args
args
Definition: Ntupliser_cfg.py:11
Darwin::Tools::Flow::GetBranchWriteOnly
T * GetBranchWriteOnly(const std::string &name)
Wrapper to initialise write-only branches.
Definition: Flow.h:356
Darwin::Tools::Slice
std::pair< int, int > Slice
total number of slices (>0) / current slice index (>0)
Definition: Flow.h:38
Darwin::Tools::Flow::tOut
std::unique_ptr< TTree > tOut
output tree
Definition: Flow.h:89
Darwin::Tools::Flow::fOut
std::shared_ptr< TFile > fOut
output ROOT file
Definition: Flow.h:88
Darwin::Tools::BranchMode
BranchMode
Definition: Flow.h:31
Darwin::Tools::none
@ none
default (for simple executables)
Definition: Options.h:28
Darwin::Tools::Flow::GetBranchAddress
requires std::is_fundamental< T >::value T * GetBranchAddress(std::shared_ptr< T > ptr)
Get branch address for fundamental types.
Definition: Flow.h:126
Darwin::Tools::Flow::GetBranchReadWrite
T * GetBranchReadWrite(const std::string &name, BranchMode mode=mandatory)
Wrapper to initialise read-write branches.
Definition: Flow.h:395
Darwin::Tools::Flow::branch_cast
requires std::is_fundamental< T >::value std::shared_ptr< T > branch_cast(std::any &a)
Casts from std::any to shared_ptr
Definition: Flow.h:107
Darwin::Tools::Flow::GetBranchAddress
requires std::is_compound< T >::value T * GetBranchAddress(std::shared_ptr< T * > ptr)
Get branch address for compound types.
Definition: Flow.h:118
Darwin::Tools::Flow::GetInputHists
std::array< std::unique_ptr< THX >, N > GetInputHists(const std::array< std::string, N > &names={})
Load ROOT histograms from a list of files.
Definition: Flow.h:198
Darwin::Tools::Flow::GetInputHists
auto GetInputHists(const Args... args)
Load ROOT histograms from a list of files.
Definition: Flow.h:236
Darwin::Tools::Flow::Flow
Flow(int=none, const std::vector< std::filesystem::path > &={})
Constructor.
Definition: Flow.cc:11
Looper.h
Darwin::Tools::Flow::NoBranch
T * NoBranch(const std::string &name, BranchMode mode)
Wrapper either to return a nullptr or to throw an error.
Definition: Flow.h:144
plugins
PluginsVec * plugins()
Definition: IPlugin.h:77
IPlugin.h
Darwin::Exceptions
Handling of exceptions.
Definition: darwin.h:37
Darwin::Tools::verbose
@ verbose
bit for debug mode (-v is always available)
Definition: Options.h:29
orange
static const char * orange
Definition: colours.h:6
DYToLL_M-50_13TeV_pythia8_cff_GEN_SIM_RECOBEFMIX_DIGI_L1_DIGI2RAW_L1Reco_RECO.input
input
Definition: DYToLL_M-50_13TeV_pythia8_cff_GEN_SIM_RECOBEFMIX_DIGI_L1_DIGI2RAW_L1Reco_RECO.py:35
Darwin::Tools::operator<<
std::ostream & operator<<(std::ostream &Stream, const Darwin::Tools::Slice &slice)
Prints the current slice and the total number of slices.
Definition: Flow.h:42
FriendUtils.h
Darwin::Tools::Flow::GetInputTree
ChainSlice * GetInputTree(const Slice, const std::string &="events")
Load chain from a list of files with non-empty trees.
Definition: Flow.cc:67
Darwin::Tools::Flow::DumpActiveBranches
std::string DumpActiveBranches() const
Returns a list of all active branches, formatted as a string.
Definition: Flow.h:157
Darwin::Tools::Flow::branch_cast
requires std::is_compound< T >::value std::shared_ptr< T * > branch_cast(std::any &a)
Casts from std::any to shared_ptr
Definition: Flow.h:96
PluginLoader.h
Darwin::Tools::Flow::inputs
std::vector< std::filesystem::path > inputs
ROOT files or directories.
Definition: Flow.h:85
Darwin::Tools
Classes and functions related to the framework.
Definition: Darwin_dict.cxx:1144
Darwin::Tools::Flow::GetOutputTree
TTree * GetOutputTree(std::shared_ptr< TFile >={}, const std::source_location=std::source_location::current())
Create an output TTree object.
Definition: Flow.cc:89
Darwin::Tools::Flow::GetOutput
std::pair< TFile *, TTree * > GetOutput(const std::filesystem::path &, const std::source_location=std::source_location::current())
Get both the output file and the output tree in one go.
Definition: Flow.cc:164
Darwin::Tools::Flow::GetBranchReadOnly
T * GetBranchReadOnly(const std::string &name, BranchMode mode=mandatory)
Wrapper to initialise read-only branches.
Definition: Flow.h:301
join
PseudoJet join(const std::vector< PseudoJet > &pieces)
Definition: fjcore.hh:1245
Darwin::Tools::Flow::GetPlugins
std::vector< P * > GetPlugins(R &&plugin_paths)
Load and get plugins.
Definition: Flow.h:424
Darwin::Tools::Flow::steering
int steering
steering from Options, mostly useful for friends
Definition: Flow.h:82
Darwin::Tools::Flow::~Flow
~Flow()
Destructor.
Definition: Flow.cc:19
Darwin::Tools::Flow::tIn
std::unique_ptr< ChainSlice > tIn
input chain
Definition: Flow.h:87
Darwin::Tools::Flow::GetBranchAddress
T * GetBranchAddress(const std::string &name)
Get branch address from name (universal)
Definition: Flow.h:134
Darwin::Tools::Flow::GetInputHist
std::unique_ptr< THX > GetInputHist(const std::string &name)
Load a single ROOT histogram from a list of files.
Definition: Flow.h:249
Darwin::Tools::Flow::SetOutputFile
void SetOutputFile(std::shared_ptr< TFile > fOut)
Set the output file where the output TTree should be saved.
Definition: Flow.h:279
Darwin::Tools::Flow::GetOutputFile
TFile * GetOutputFile()
Get a raw pointer to the output file.
Definition: Flow.h:283
Darwin::Exceptions::BadInput
Generic exception for ill-defined input (before the event loop).
Definition: exceptions.h:83
Darwin::Tools::facultative
@ facultative
mounting branch is facultative
Definition: Flow.h:33