DAS  3.0
Das Analysis System
Step Namespace Reference

Namespaces

 Basis
 

Classes

struct  Correlator
 
struct  FunctionalForm
 
struct  Polynomial
 
struct  Result
 

Enumerations

enum  EarlyStopping { None, Chi2ndf, fTest, xVal }
 

Functions

std::ostream & operator<< (std::ostream &stream, const Result &result)
 
double Ftest (const Result &former, const Result &current)
 
double Xval (const Result &current, const Result &former)
 
void PrintAllTests (std::function< double(const Result &, const Result &)> test, std::ostream &stream=std::cout, size_t best=0)
 
double identity (double x)
 
template<double(*)(double, int) T = Basis::Chebyshev, double(*)(double) Log = identity, double(*)(double) Exp = identity>
TF1 * GetSmoothFit (TH1 *h, TH2 *cov, int im, int iM, unsigned int maxdegree, EarlyStopping criterion=None, double stopParam=1, UInt_t nrep=0, std::ostream &stream=std::cout)
 
template<double(*)(double, int) T = Basis::Chebyshev, double(*)(double) Log = identity, double(*)(double) Exp = identity>
TF1 * GetSmoothFit (TH1 *h, TH2 *cov, unsigned int maxdegree, EarlyStopping criterion=EarlyStopping::None, double stopParam=1, UInt_t nrep=0, std::ostream &stream=std::cout)
 
template<double(*)(double, int) T = Basis::Chebyshev, double(*)(double) Log = identity, double(*)(double) Exp = identity>
TF1 * GetSmoothFit (TH1 *h, int im, int iM, unsigned int maxdegree, EarlyStopping criterion=EarlyStopping::None, double stopParam=1, UInt_t nrep=0, std::ostream &stream=std::cout)
 
template<double(*)(double, int) T = Basis::Chebyshev, double(*)(double) Log = identity, double(*)(double) Exp = identity>
TF1 * GetSmoothFit (TH1 *h, unsigned int maxdegree, EarlyStopping criterion=EarlyStopping::None, double stopParam=1, UInt_t nrep=0, std::ostream &stream=std::cout)
 

Variables

static const char * green = "\x1B[32m"
 
static const char * red = "\x1B[31m"
 
static const char * bold = "\e[1m"
 
static const char * def = "\e[0m"
 
static const auto eps = std::numeric_limits<double>::epsilon()
 
static const auto inf = std::numeric_limits<double>::infinity()
 
static bool verbose = false
 
static std::deque< Resultchi2s
 
static auto bestResult = chi2s.begin()
 

Detailed Description

Smoothness Test with Expansion of Polynmials.

Enumeration Type Documentation

◆ EarlyStopping

Enumerator
None 

iterate until max value is reached

Chi2ndf 

stop as soon as Chi2ndf has reached a plateau, or as soon as it is compatible with 1

fTest 

include additional parameter as long as F-test provides a p-value beyond a certain threshold

xVal 

include additional parameter as long as cross validation provides a p-value beyond a certain threshold

430  {
431  None,
432  Chi2ndf,
433  fTest,
434  xVal
435 };

Function Documentation

◆ Ftest()

double Step::Ftest ( const Result former,
const Result current 
)

F-test

Fisher, R. A. (1922). "On the interpretation of chi2 from contingency tables, and the calculation of P". Journal of the Royal Statistical Society. 85 (1): 87–94. doi:10.2307/2340521. JSTOR 2340521.

Wikipedia

108 {
109  using namespace std;
110  if (!former.valid || !current.valid) throw -1.;
111  double RSS1 = former.chi2,
112  RSS2 = current.chi2;
113  size_t p1 = former.X.size(),
114  p2 = current.X.size();
115  assert(p2 > p1);
116  int ndf = current.ndf; // = nbins - p2
117  double x = ((RSS1-RSS2)/(p2-p1))/(RSS2/ndf);
118  double pvalue = TMath::FDistI(x, p2-p1, ndf);
119  return pvalue;
120 }

◆ GetSmoothFit() [1/4]

TF1* Step::GetSmoothFit ( TH1 *  h,
int  im,
int  iM,
unsigned int  maxdegree,
EarlyStopping  criterion = EarlyStopping::None,
double  stopParam = 1,
UInt_t  nrep = 0,
std::ostream &  stream = std::cout 
)

Function overloading, in case of no bin-to-bin correlations.

667 {
668  return GetSmoothFit<T,Log,Exp>(h, nullptr, im, iM,
669  maxdegree, criterion, stopParam, nrep, stream);
670 }

◆ GetSmoothFit() [2/4]

TF1* Step::GetSmoothFit ( TH1 *  h,
TH2 *  cov,
int  im,
int  iM,
unsigned int  maxdegree,
EarlyStopping  criterion = None,
double  stopParam = 1,
UInt_t  nrep = 0,
std::ostream &  stream = std::cout 
)

Step::GetSmoothFit() is the only function to call to get the smooth fit. It assumes that the histogram and the covariance matrix are already normalised to the bin width (i.e. cross section, not count).

Todo:
and in case of F-test failure?
Parameters
hinput histogram
covcovariance matrix in histogram format
imfirst bin to consider in the fit
iMlast fbin to consider in the fit
maxdegreehighest possible degree in fit
criterionearly stopping criterion
stopParamearly stopping parameter: if nrep > 0, threshold for x-validation; else #sigmas to satisfy
nrepnumber of replicas
streamstream (e.g. `cout` or file)
455 {
456  using namespace std;
457 
458  assert(h != nullptr);
459 
460  if (verbose)
461  stream << "im = " << im << ", iM = " << iM << '\n';
462 
463  unsigned int nbins = iM-im+1;
464  if (verbose)
465  stream << "nbins = " << nbins << '\n';
466  assert(nbins > 2);
467 
468  double m = h->GetBinLowEdge(im),
469  M = h->GetBinLowEdge(iM+1);
470  if (verbose)
471  stream << "m = " << m << ", M = " << M << '\n';
472 
473  Polynomial<T,Log,Exp> pol(maxdegree, m, M, stream);
474  Correlator cor(h, cov, im, iM, pol, stream);
475  ROOT::Math::Functor functor(cor, pol.npars);
476 
477  auto minimiser = ROOT::Math::Factory::CreateMinimizer("Minuit2", "Migrad");
478  const int Ncalls = 1e6;
479  minimiser->SetMaxFunctionCalls(Ncalls);
480  minimiser->SetFunction(functor);
481  minimiser->SetTolerance(0.001);
482  minimiser->SetPrintLevel(verbose);
483 
484  double fm = Log(h->GetBinContent(im)),
485  fM = Log(h->GetBinContent(iM));
486  auto step = 0.001;
487  double p0 = (fM+fm)/2, p1 = (fM-fm)/2;
488  minimiser->SetVariable(0,"p0",p0,step);
489  minimiser->SetVariable(1,"p1",p1,step);
490  for (unsigned int d = 2; d <= maxdegree; ++d) {
491  minimiser->SetVariable(d,Form("p%d", d),0.,step);
492  minimiser->FixVariable(d);
493  }
494 
495  // running
496  chi2s.clear();
497  chi2s.push_back({false, inf, nbins-2, {p0,p1}});
498  bestResult = chi2s.begin();
499  bool found = false;
500  while (chi2s.size() <= maxdegree) {
501  unsigned int degree = chi2s.size();
502  minimiser->ReleaseVariable(degree);
503  bool valid = minimiser->Minimize();
504 
505  const double * X = minimiser->X();
506  assert(X != nullptr);
507  double chi2 = cor(X);
508 
509  assert(isnormal(chi2));
510  unsigned int npars = minimiser->NFree(),
511  ndf = nbins - npars;
512  assert(npars == degree+1);
513 
514  const auto& former = chi2s.back();
515  chi2s.push_back({valid, chi2, ndf, vector<double>(X,X+npars)});
516  auto& current = chi2s.back();
517 
518  switch (minimiser->Status()) {
519  case 0: stream << current;
520  {
521  // check EarlyStopping::Chi2ndf criterion
522  bool plateau = current.x2n() > former.x2n(),
523  goodchi2 = abs(current.x2n()-1) <= stopParam*current.x2nErr();
524  if (plateau)
525  stream << red << "Chi2/ndf has increased.\n" << def;
526  else if (goodchi2)
527  stream << green << "Chi2 ~ ndf.\n" << def;
528 
529  // check EarlyStopping::fTest criterion
530  double ftestRes = 0;
531  if (bestResult != chi2s.begin())
532  try {
533  ftestRes = Ftest(former, current);
534  stream << "F-test result: " << ftestRes << '\n';
535  }
536  catch (double) { stream << red << "No F-test possible.\n" << def; }
537  bool fvalid = ftestRes > stopParam;
538 
539  // check EarlyStopping::xVal criterion
540  double xvalFrac = 0;
541  if (nrep > 0) {
542  current.chi2sT.reserve(nrep);
543  current.chi2sV.reserve(nrep);
544  for (UInt_t seed = 1; seed <= nrep; ++seed) {
545  cor.SetReplica(seed);
546  for (unsigned int i = 0; i <= degree; ++i)
547  minimiser->SetVariableValue(i, current.X[i]);
548  const double * X = minimiser->X();
549  assert(X != nullptr);
550  current.chi2sT.push_back(cor(X));
551  cor.SetReplica(seed+nrep);
552  current.chi2sV.push_back(cor(X));
553  }
554  cor.UnsetReplica();
555  for (unsigned int i = 0; i <= degree; ++i)
556  minimiser->SetVariableValue(i, current.X[i]);
557  X = minimiser->X();
558  xvalFrac = Xval(current, former);
559  }
560  bool xvalid = 1.-xvalFrac > stopParam;
561  if (nrep > 0)
562  stream << "Fraction of validation replicas with better fit Chi2/ndf = " << (xvalid ? green : red) << xvalFrac << def << '\n';
563 
564  // apply early stopping criterion
565  if (!found)
566  switch (criterion) {
568  found = plateau || goodchi2;
569  if (plateau) --bestResult;
570  break;
572  found = !fvalid;
573  break;
574  case EarlyStopping::xVal:
575  assert(nrep > 0);
576  found = !xvalid;
577  break;
578  case EarlyStopping::None:
579  found = false;
580  }
581  }
582  break;
583  case 1: stream << red << "Warning: Fit parameter covariance was made positive defined\n" << def;
584  for (unsigned int i = 0; i < npars; ++i) {
585  for (unsigned int j = 0; j < npars; ++j)
586  stream << setw(12) << minimiser->CovMatrix(i,j);
587  stream << '\n';
588  }
589  break;
590  case 2: stream << red << "Warning: Hesse is invalid\n" << def;
591  {
592  double * hessian = nullptr;
593  minimiser->GetHessianMatrix(hessian);
594  assert(hessian != nullptr);
595  for (unsigned int i = 0; i < npars; ++i) {
596  for (unsigned int j = 0; j < npars; ++j)
597  stream << setw(12) << hessian[i*pol.npars + j];
598  stream << '\n';
599  }
600  }
601  break;
602  case 3: stream << red << "Warning: Expected Distance reached from the Minimum (EDM = " << minimiser->Edm() << ") is above max\n" << def;
603  break;
604  case 4: stream << red << "Warning: Reached call limit (Ncalls = " << Ncalls << ")\n" << def;
605  break;
606  case 5:
607  default: stream << red << "Warning: Unknown failure\n" << def;
608  }
609 
610  if (!found) ++bestResult;
611 
612  if (verbose) {
613  stream << "Correlations of the fit parameters:\n";
614  for (unsigned int i = 0; i < npars; ++i) {
615  for (unsigned int j = 0; j < npars; ++j)
616  stream << setw(12) << minimiser->Correlation(i,j);
617  stream << '\n';
618  }
619  }
620 
621  if (current.ndf == 2) {
622  stream << red << "Only 2 d.o.f. left. Stopping.\n" << def;
623  break;
624  }
625 
626  stream << "---\n";
627  } // end of `while`-loop on number of parameters
628  chi2s.pop_front(); // removing first entry, which is not a fit (chi2 = inf)
629 
630  const char * name = Form("smooth_%s", h->GetName());
631  TF1 * f = bestResult->GetTF1(name, pol, m, M);
632  stream << "Best result: " << f->GetTitle() << "\n-----\n";
633 
634  stream << bold << "Running F-test:\n" << def;
635  PrintAllTests(Ftest, stream, bestResult->degree());
636 
637  if (nrep > 0) {
638  stream << bold << "Running Cross-validation:\n" << def;
639  PrintAllTests(Xval, stream, bestResult->degree());
640  }
641 
642  return f;
643 }

◆ GetSmoothFit() [3/4]

TF1* Step::GetSmoothFit ( TH1 *  h,
TH2 *  cov,
unsigned int  maxdegree,
EarlyStopping  criterion = EarlyStopping::None,
double  stopParam = 1,
UInt_t  nrep = 0,
std::ostream &  stream = std::cout 
)

Function overloading, where the full range of the integral is used in the fit.

653 {
654  int im = 1, iM = h->GetNbinsX();
655  return GetSmoothFit<T,Log,Exp>(h, cov, im, iM,
656  maxdegree, criterion, stopParam, nrep, stream);
657 }

◆ GetSmoothFit() [4/4]

TF1* Step::GetSmoothFit ( TH1 *  h,
unsigned int  maxdegree,
EarlyStopping  criterion = EarlyStopping::None,
double  stopParam = 1,
UInt_t  nrep = 0,
std::ostream &  stream = std::cout 
)

Function overloading, in case of no bin-to-bin correlations and with fit in full range.

681 {
682  return GetSmoothFit<T,Log,Exp>(h, nullptr,
683  maxdegree, criterion, stopParam, nrep, stream);
684 }

◆ identity()

double Step::identity ( double  x)

Identical operation, used as default for template arguments of Step::GetSmoothFit().

188 { return x; }

◆ operator<<()

std::ostream& Step::operator<< ( std::ostream &  stream,
const Result result 
)

Operator overloading to print result from Step::chi2s.

77 {
78  using namespace std;
79  if (result.valid) {
80  stream << bold << "Chi2/ndf = " << result.chi2 << " / " << result.ndf
81  << " = " << result.x2n() << "\u00B1" << result.x2nErr()
82  << " (prob = " << result.prob() << ")\n" << def;
83  size_t nrep = result.chi2sT.size();
84  assert(nrep == result.chi2sV.size());
85  stream << result.X.size() << " parameters: ";
86  for (auto x: result.X) stream << std::setw(12) << x;
87  stream << '\n';
88  if (nrep > 0)
89  stream << nrep << " training replicas: Chi2T/" << result.ndf << " = " << result.x2nT() << '\n'
90  << nrep << " validation replicas: Chi2V/" << (result.ndf + result.X.size()) << " = " << result.x2nV() << '\n';
91  }
92  else stream << "Not a fit\n";
93  return stream;
94 }

◆ PrintAllTests()

void Step::PrintAllTests ( std::function< double(const Result &, const Result &)>  test,
std::ostream &  stream = std::cout,
size_t  best = 0 
)

Print all F-tests based on the direct-fit results stored in Step::chi2s.

148 {
149  using namespace std;
150 
151  int nGoodFits = 0;
152  for (auto result = begin(chi2s); result != end(chi2s); ++result)
153  if (result->valid) ++nGoodFits;
154  if (nGoodFits < 2) {
155  stream << red << "Only one valid fit. No test to run.\n" << def;
156  return;
157  }
158  stream << " " << bold;
159  for (auto result = begin(chi2s); result != prev(end(chi2s)); ++result) {
160  // loop over all results except the highest one
161  size_t deg = result->degree();
162  if (deg == best) stream << green;
163  if (!result->valid) stream << red;
164  stream << setw(10) << deg << def;
165  }
166  stream << '\n';
167  for (auto result2 = next(begin(chi2s)); result2 != end(chi2s); ++result2) {
168  // loop over all results
169  size_t deg2 = result2->degree();
170  if (deg2 == best) stream << green;
171  if (!result2->valid) stream << red;
172  stream << setw(3) << deg2 << def;
173  if (deg2 == best) stream << def;
174  stream << setprecision(4);
175  for (auto result1 = begin(chi2s); result1 != result2; ++result1) {
176  // compare to all results with less parameters
177  try { stream << setw(10) << test(*result1, *result2); }
178  catch (double) { stream << red << " -" << def; }
179  }
180  stream << bold << '\n';
181  }
182  stream << def;
183 }

◆ Xval()

double Step::Xval ( const Result current,
const Result former 
)

Cross-validation with replicas

Wikipedia

127 {
128  if (former.chi2sV.size() != current.chi2sV.size())
129  return 0;
130 
131  size_t nrep = former.chi2sV.size();
132  if (nrep == 0) return 0;
133 
134  int Nf = former .ndf + former .X.size(),
135  Nc = current.ndf + current.X.size();
136  double nBetter = 0;
137  for (size_t i = 0; i < nrep; ++i)
138  if (former.chi2sV[i]/Nf < current.chi2sV[i]/Nc)
139  ++nBetter;
140  nBetter /= nrep;
141  return nBetter;
142 }

Variable Documentation

◆ bestResult

auto bestResult = chi2s.begin()
static

best result from last call of Step::GetSmoothFit()

◆ bold

const char * bold = "\e[1m"
static

◆ chi2s

std::deque<Result> chi2s
static

results from last call of Step::GetSmoothFit()

◆ def

const char * def = "\e[0m"
static

◆ eps

const auto eps = std::numeric_limits<double>::epsilon()
static

◆ green

const char* green = "\x1B[32m"
static

◆ inf

const auto inf = std::numeric_limits<double>::infinity()
static

◆ red

const char * red = "\x1B[31m"
static

◆ verbose

bool verbose = false
static
DYToLL_M-50_13TeV_pythia8_cff_GEN_SIM_RECOBEFMIX_DIGI_L1_DIGI2RAW_L1Reco_RECO.name
name
Definition: DYToLL_M-50_13TeV_pythia8_cff_GEN_SIM_RECOBEFMIX_DIGI_L1_DIGI2RAW_L1Reco_RECO.py:48
Step::Xval
double Xval(const Result &current, const Result &former)
Definition: Step.h:126
Step::xVal
@ xVal
include additional parameter as long as cross validation provides a p-value beyond a certain threshol...
Definition: Step.h:434
Step::verbose
static bool verbose
Definition: Step.h:40
Step::PrintAllTests
void PrintAllTests(std::function< double(const Result &, const Result &)> test, std::ostream &stream=std::cout, size_t best=0)
Print all F-tests based on the direct-fit results stored in Step::chi2s.
Definition: Step.h:146
Step::def
static const char * def
Definition: Step.h:36
Step::Ftest
double Ftest(const Result &former, const Result &current)
Definition: Step.h:107
Ntupliser_cfg.f
f
Definition: Ntupliser_cfg.py:255
Step::chi2s
static std::deque< Result > chi2s
results from last call of Step::GetSmoothFit()
Definition: Step.h:96
Step::red
static const char * red
Definition: Step.h:34
Step::bestResult
static auto bestResult
best result from last call of Step::GetSmoothFit()
Definition: Step.h:97
Step::green
static const char * green
Definition: Step.h:33
Step::Chi2ndf
@ Chi2ndf
stop as soon as Chi2ndf has reached a plateau, or as soon as it is compatible with 1
Definition: Step.h:432
Step::fTest
@ fTest
include additional parameter as long as F-test provides a p-value beyond a certain threshold
Definition: Step.h:433
Step::None
@ None
iterate until max value is reached
Definition: Step.h:431
Step::inf
static const auto inf
Definition: Step.h:39
Step::bold
static const char * bold
Definition: Step.h:35