Functions
def	getHist (fName, hName="pileup")

def	normAndExtract (hist, norm=1.)

def	getRatio (numBins, numCont, denBins, denCont)

def	main ()

Variables
	logger = logging.getLogger(__name__)

dictionary	mcPUProfiles

Detailed Description

A script to generate a BinnedValues-JSON file for pileup reweighting of MC

Function Documentation

◆ getHist()

def makePUReWeightJSON.getHist	(	fName,
		hName = `"pileup"`
	)

 def getHist(fName, hName="pileup"):
     from cppyy import gbl
     tf = gbl.TFile.Open(fName)
     if not tf:
         raise RuntimeError("Could not open file '{0}'".format(fName))
     hist = tf.Get(hName)
     if not hist:
         raise RuntimeError("No histogram with name '{0}' found in file '{1}'".format(hName, fName))
     return tf, hist
  

◆ getRatio()

def makePUReWeightJSON.getRatio	(	numBins,
		numCont,
		denBins,
		denCont
	)

 def getRatio(numBins, numCont, denBins, denCont):
     
     if not all(db in numBins for db in denBins):
         raise RuntimeError("Numerator (data) needs to have at least the bin edges that are the denominator (MC)")
     
     xMinC, xMaxC = denBins[0], denBins[-1]
     inMn = np.where(numBins == xMinC)[0][0]
     inMx = np.where(numBins == xMaxC)[0][0]
     ratio = np.zeros((inMx-inMn,))
     di = 0
     for ni in range(inMn, inMx):
         if numBins[ni+1] > denBins[di+1]:
             di += 1
         assert ( denBins[di] <= numBins[ni] ) and ( numBins[ni+1] <= denBins[di+1] )
         if denCont[di] == 0.:
             ratio[ni-inMn] = 1. 
         else:
             ratio[ni-inMn] = numCont[ni]/denCont[di]
     bR = np.array(numBins[inMn:inMx+1])
     
     bR[0] = numBins[0]
     bR[-1] = numBins[-1]
     return bR, ratio
  

◆ main()

def makePUReWeightJSON.main ( )

 def main():
     import argparse
     parser = argparse.ArgumentParser(description="Produce a BinnedValues-JSON file for pileup reweighting, using data pileup distributions obtained with `pileupCalc.py -i analysis-lumi-json.txt --inputLumiJSON pileup-json.txt --calcMode true --minBiasXsec MBXSECINNB --maxPileupBin NMAX --numPileupBins N outname.root` (see also https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJSONFileforData#Pileup_JSON_Files_For_Run_II)")
     parser.add_argument("-o", "--output", default="puweights.json", type=str, help="Output file name")
     parser.add_argument("-f", "--format", type=str, choices=["correctionlib", "cp3-llbb"], default="cp3-llbb", help="Output JSON format")
     parser.add_argument("--name", type=str, default="puweights", help="Name of the correction inside the CorrectionSet (only used for the correctionlib format)")
     parser.add_argument("--mcprofile", help="Pileup profile used to generate the MC sample (use --listmcprofiles to see the list of defined profiles)")
     parser.add_argument("--listmcprofiles", action="store_true", help="list the available MC pileup profiles")
     parser.add_argument("--nominal", type=str, help="File with the data (true) pileup distribution histogram assuming the nominal minimum bias cross-section value")
     parser.add_argument("--up", type=str, help="File with the data (true) pileup distribution histogram assuming the nominal+1sigma minimum bias cross-section value")
     parser.add_argument("--down", type=str, help="File with the data (true) pileup distribution histogram assuming the nominal-1sigma minimum bias cross-section value")
     parser.add_argument("--rebin", type=int, help="Factor to rebin the data histograms by")
     parser.add_argument("--makePlot", action="store_true", help="Make a plot of the PU profiles and weights (requires matplotlib)")
     parser.add_argument("mcfiles", nargs="*", help="MC ROOT files to extract a pileup profile from (if used)")
     parser.add_argument("--mctreename", type=str, default="Events", help="Name of the tree to use in mcfiles")
     parser.add_argument("--mcreweightvar", type=str, default="Pileup_nTrueInt", help="Name of the branch in the tree of the mcfiles to use for getting a histogram")
     parser.add_argument("-v", "--verbose", action="store_true", help="Print verbose output")
     parser.add_argument("--gzip", action="store_true", help="Save the output as gzip file")
     args = parser.parse_args()
     logging.basicConfig(level=(logging.DEBUG if args.verbose else logging.INFO))
     if args.makePlot:
         try:
             import matplotlib
             matplotlib.use("agg")
             from matplotlib import pyplot as plt
         except Exception as ex:
             logger.warning("matplotlib could not be imported, so no plot will be produced")
             args.makePlot = False
     if args.gzip:
         try:
             import gzip, io
         except Exception as ex:
             logger.warning("gzip or io could not be imported, output will be stored as regular file")
             args.gzip = False
     if args.listmcprofiles:
         logger.info("The known PU profiles are: {0}".format(", ".join(repr(k) for k in mcPUProfiles)))
         return
     elif args.mcfiles:
         if args.mcprofile:
             logger.warning("MC PU profile and MC files are passed - extracting from the files")
         logger.info("Extracting the MC profile from {0} in the {1} tree of: {2}".format(args.mcreweightvar, args.mctreename, ", ".join(args.mcfiles)))
         from cppyy import gbl
         tup = gbl.TChain(args.mctreename)
         for mcfn in args.mcfiles:
             tup.Add(mcfn)
         hMCPU = gbl.TH1F("hMCPU", "MC PU profile", 100, 0., 100.)
         tup.Draw(f"{args.mcreweightvar}>>hMCPU", "", "GOFF")
         mcPUBins, mcPUVals = normAndExtract(hMCPU)
     elif args.mcprofile:
         if args.mcprofile not in mcPUProfiles:
             raise ValueError("No MC PU profile with tag '{0}' is known".format(args.mcprofile))
  
         mcPUBins, mcPUVals = mcPUProfiles[args.mcprofile]
         if len(mcPUBins) != len(mcPUVals)+1:
             logger.verbose(len(mcPUBins), len(mcPUVals))
     else:
         raise RuntimeError("Either one of --listmcprofiles or --mcprofile, or a list of MC files to extract a MC profile from, must be passed")
  
     if not args.nominal:
         raise RuntimeError("No --nominal argument")
  
  
     fNom, hNom = getHist(args.nominal)
     if args.rebin:
         hNom.Rebin(args.rebin)
     nomBins, nomCont = normAndExtract(hNom)
     ratioBins, nomRatio = getRatio(nomBins, nomCont, mcPUBins, mcPUVals)
  
     upCont, upRatio, downCont, downRatio = None, None, None, None
     if bool(args.up) != bool(args.down):
         raise ValueError("If either one of --up and --down is specified, both should be")
     if args.up and args.down:
         fUp, hUp = getHist(args.up)
         if args.rebin:
             hUp.Rebin(args.rebin)
         upBins, upCont = normAndExtract(hUp)
         #if not all(ub == nb for ub,nb in zip(upBins, nomBins)):
         #    raise RuntimeError("Up-variation and nominal binning is different: {0} vs {1}".format(upBins, nomBins))
         _, upRatio = getRatio(upBins, upCont, mcPUBins, mcPUVals)
         fDown, hDown = getHist(args.down)
         if args.rebin:
             hDown.Rebin(args.rebin)
         downBins, downCont = normAndExtract(hDown)
         #if not all(db == nb for db,nb in zip(downBins, nomBins)):
         #    raise RuntimeError("Up-variation and nominal binning is different: {0} vs {1}".format(upBins, nomBins))
         _, downRatio = getRatio(downBins, downCont, mcPUBins, mcPUVals)
  
     if args.format == "correctionlib":
         out = {
                 "schema_version": 2,
                 "corrections": [{
                     "name": args.name,
                     "version" : 0,
                     "inputs": [
                         {
                             "name": "NumTrueInteractions",
                             "type": "real",
                             "description": "Number of true interactions"
                             },
                         {
                             "name": "weights",
                             "type": "string",
                             "description": "nominal, up, or down"
                             }
                         ],
                     "output": {
                         "name": "weight",
                         "type": "real",
                         "description": "Event weight for pileup reweighting"
                         },
                     "data": {
                         "nodetype": "category",
                         "input": "weights",
                         "content": ([{
                             "key": "nominal",
                             "value": {
                                 "nodetype": "binning",
                                 "input": "NumTrueInteractions",
                                 "flow": "clamp",
                                 "edges": list(ratioBins),
                                 "content": list(nomRatio)
                             }}]+([{
                                 "key": "up",
                                 "value": {
                                     "nodetype": "binning",
                                     "input": "NumTrueInteractions",
                                     "flow": "clamp",
                                     "edges": list(ratioBins),
                                     "content": list(upRatio)
                                 }}] if upRatio is not None else []
                             )+([{
                                 "key": "down",
                                 "value": {
                                     "nodetype": "binning",
                                     "input": "NumTrueInteractions",
                                     "flow": "clamp",
                                     "edges": list(ratioBins),
                                     "content": list(downRatio)
                                 }}] if downRatio is not None else [])
                             )
                         }
                     }]
                 }
     elif args.format == "cp3-llbb":
         out = {
                 "dimension" : 1,
                 "variables" : ["NumTrueInteractions"],
                 "binning" : {"x": list(ratioBins)},
                 "error_type" : "absolute",
                 "data" : [
                   {
                       "bin" : [ratioBins[i], ratioBins[i+1]],
                       "value" : nomRatio[i],
                       "error_low"  : (nomRatio[i]-downRatio[i] if downRatio is not None else 0.),
                       "error_high" : (upRatio[i]-nomRatio[i] if upRatio is not None else 0.)
                       } for i in range(nomRatio.shape[0])
                   ],
                 }
     else:
         raise ValueError(f"Unsupported output format: {args.format}")
     if args.gzip:
         outN = args.output
         if not outN.endswith(".gz"):
             outN = outN+".gz"
         with gzip.open(outN, "wb") as outF, io.TextIOWrapper(outF, encoding="utf-8") as outE:
             json.dump(out, outE)
     else:
         with open(args.output, "w") as outF:
             json.dump(out, outF)
  
     if args.makePlot:
         fig,(ax,rax) = plt.subplots(2,1, figsize=(6,6), sharex=True)
         rax.set_yscale("log", nonposy="clip")
         #rax = ax.twinx()
         dBinCenters = .5*(mcPUBins[:-1]+mcPUBins[1:])
         nBinCenters = .5*(nomBins[:-1]+nomBins[1:])
         rBinCenters = .5*(ratioBins[:-1]+ratioBins[1:])
         ax.hist(dBinCenters, bins=mcPUBins, weights=mcPUVals, histtype="step", label="MC")
         ax.hist(nBinCenters, bins=nomBins, weights=nomCont, histtype="step", label="Nominal", color="k")
         rax.hist(rBinCenters, bins=ratioBins, weights=nomRatio, histtype="step", color="k")
         if upCont is not None:
             ax.hist(nBinCenters, bins=nomBins, weights=upCont, histtype="step", label="Up", color="r")
             ax.hist(nBinCenters, bins=nomBins, weights=downCont, histtype="step", label="Down", color="b")
             rax.hist(rBinCenters, bins=ratioBins, weights=upRatio, histtype="step", color="r")
             rax.hist(rBinCenters, bins=ratioBins, weights=downRatio, histtype="step", color="b")
         rax.axhline(1.)
         ax.legend()
         rax.set_ylim(.02, 2.)
         rax.set_xlim(ratioBins[0], ratioBins[-1])
         if args.mcfiles:
             rax.set_xlabel(args.mcreweightvar)
         elif args.mcprofile:
             ax.set_title(args.mcprofile)
         if args.output.endswith(".json"):
             plt.savefig(args.output.replace(".json", ".png"))
  

◆ normAndExtract()

def makePUReWeightJSON.normAndExtract	(	hist,
		norm = `1.`
	)

 def normAndExtract(hist, norm=1.):
     nB = hist.GetNbinsX()
     xAx = hist.GetXaxis()
     if norm:
         hist.Scale(norm/(hist.Integral()*(xAx.GetXmax()-xAx.GetXmin())/nB))
     bEdges = np.array([ xAx.GetBinLowEdge(i) for i in range(1,nB+1) ]+[ xAx.GetBinUpEdge(nB) ])
     contents = np.array([ hist.GetBinContent(i) for i in range(1,nB+1) ])
     return bEdges, contents
  

Functions

Variables

Detailed Description

Function Documentation

◆ getHist()

◆ getRatio()

◆ main()

◆ normAndExtract()

Variable Documentation

◆ logger

◆ mcPUProfiles