DAS  3.0
Das Analysis System
makePUReWeightJSON Namespace Reference

Functions

def getHist (fName, hName="pileup")
 
def normAndExtract (hist, norm=1.)
 
def getRatio (numBins, numCont, denBins, denCont)
 
def main ()
 

Variables

 logger = logging.getLogger(__name__)
 
dictionary mcPUProfiles
 

Detailed Description

A script to generate a BinnedValues-JSON file for pileup reweighting of MC

Function Documentation

◆ getHist()

def makePUReWeightJSON.getHist (   fName,
  hName = "pileup" 
)
70 def getHist(fName, hName="pileup"):
71  from cppyy import gbl
72  tf = gbl.TFile.Open(fName)
73  if not tf:
74  raise RuntimeError("Could not open file '{0}'".format(fName))
75  hist = tf.Get(hName)
76  if not hist:
77  raise RuntimeError("No histogram with name '{0}' found in file '{1}'".format(hName, fName))
78  return tf, hist
79 

◆ getRatio()

def makePUReWeightJSON.getRatio (   numBins,
  numCont,
  denBins,
  denCont 
)
89 def getRatio(numBins, numCont, denBins, denCont):
90 
91  if not all(db in numBins for db in denBins):
92  raise RuntimeError("Numerator (data) needs to have at least the bin edges that are the denominator (MC)")
93 
94  xMinC, xMaxC = denBins[0], denBins[-1]
95  inMn = np.where(numBins == xMinC)[0][0]
96  inMx = np.where(numBins == xMaxC)[0][0]
97  ratio = np.zeros((inMx-inMn,))
98  di = 0
99  for ni in range(inMn, inMx):
100  if numBins[ni+1] > denBins[di+1]:
101  di += 1
102  assert ( denBins[di] <= numBins[ni] ) and ( numBins[ni+1] <= denBins[di+1] )
103  if denCont[di] == 0.:
104  ratio[ni-inMn] = 1.
105  else:
106  ratio[ni-inMn] = numCont[ni]/denCont[di]
107  bR = np.array(numBins[inMn:inMx+1])
108 
109  bR[0] = numBins[0]
110  bR[-1] = numBins[-1]
111  return bR, ratio
112 

◆ main()

def makePUReWeightJSON.main ( )
113 def main():
114  import argparse
115  parser = argparse.ArgumentParser(description="Produce a BinnedValues-JSON file for pileup reweighting, using data pileup distributions obtained with `pileupCalc.py -i analysis-lumi-json.txt --inputLumiJSON pileup-json.txt --calcMode true --minBiasXsec MBXSECINNB --maxPileupBin NMAX --numPileupBins N outname.root` (see also https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJSONFileforData#Pileup_JSON_Files_For_Run_II)")
116  parser.add_argument("-o", "--output", default="puweights.json", type=str, help="Output file name")
117  parser.add_argument("-f", "--format", type=str, choices=["correctionlib", "cp3-llbb"], default="cp3-llbb", help="Output JSON format")
118  parser.add_argument("--name", type=str, default="puweights", help="Name of the correction inside the CorrectionSet (only used for the correctionlib format)")
119  parser.add_argument("--mcprofile", help="Pileup profile used to generate the MC sample (use --listmcprofiles to see the list of defined profiles)")
120  parser.add_argument("--listmcprofiles", action="store_true", help="list the available MC pileup profiles")
121  parser.add_argument("--nominal", type=str, help="File with the data (true) pileup distribution histogram assuming the nominal minimum bias cross-section value")
122  parser.add_argument("--up", type=str, help="File with the data (true) pileup distribution histogram assuming the nominal+1sigma minimum bias cross-section value")
123  parser.add_argument("--down", type=str, help="File with the data (true) pileup distribution histogram assuming the nominal-1sigma minimum bias cross-section value")
124  parser.add_argument("--rebin", type=int, help="Factor to rebin the data histograms by")
125  parser.add_argument("--makePlot", action="store_true", help="Make a plot of the PU profiles and weights (requires matplotlib)")
126  parser.add_argument("mcfiles", nargs="*", help="MC ROOT files to extract a pileup profile from (if used)")
127  parser.add_argument("--mctreename", type=str, default="Events", help="Name of the tree to use in mcfiles")
128  parser.add_argument("--mcreweightvar", type=str, default="Pileup_nTrueInt", help="Name of the branch in the tree of the mcfiles to use for getting a histogram")
129  parser.add_argument("-v", "--verbose", action="store_true", help="Print verbose output")
130  parser.add_argument("--gzip", action="store_true", help="Save the output as gzip file")
131  args = parser.parse_args()
132  logging.basicConfig(level=(logging.DEBUG if args.verbose else logging.INFO))
133  if args.makePlot:
134  try:
135  import matplotlib
136  matplotlib.use("agg")
137  from matplotlib import pyplot as plt
138  except Exception as ex:
139  logger.warning("matplotlib could not be imported, so no plot will be produced")
140  args.makePlot = False
141  if args.gzip:
142  try:
143  import gzip, io
144  except Exception as ex:
145  logger.warning("gzip or io could not be imported, output will be stored as regular file")
146  args.gzip = False
147  if args.listmcprofiles:
148  logger.info("The known PU profiles are: {0}".format(", ".join(repr(k) for k in mcPUProfiles)))
149  return
150  elif args.mcfiles:
151  if args.mcprofile:
152  logger.warning("MC PU profile and MC files are passed - extracting from the files")
153  logger.info("Extracting the MC profile from {0} in the {1} tree of: {2}".format(args.mcreweightvar, args.mctreename, ", ".join(args.mcfiles)))
154  from cppyy import gbl
155  tup = gbl.TChain(args.mctreename)
156  for mcfn in args.mcfiles:
157  tup.Add(mcfn)
158  hMCPU = gbl.TH1F("hMCPU", "MC PU profile", 100, 0., 100.)
159  tup.Draw(f"{args.mcreweightvar}>>hMCPU", "", "GOFF")
160  mcPUBins, mcPUVals = normAndExtract(hMCPU)
161  elif args.mcprofile:
162  if args.mcprofile not in mcPUProfiles:
163  raise ValueError("No MC PU profile with tag '{0}' is known".format(args.mcprofile))
164 
165  mcPUBins, mcPUVals = mcPUProfiles[args.mcprofile]
166  if len(mcPUBins) != len(mcPUVals)+1:
167  logger.verbose(len(mcPUBins), len(mcPUVals))
168  else:
169  raise RuntimeError("Either one of --listmcprofiles or --mcprofile, or a list of MC files to extract a MC profile from, must be passed")
170 
171  if not args.nominal:
172  raise RuntimeError("No --nominal argument")
173 
174 
175  fNom, hNom = getHist(args.nominal)
176  if args.rebin:
177  hNom.Rebin(args.rebin)
178  nomBins, nomCont = normAndExtract(hNom)
179  ratioBins, nomRatio = getRatio(nomBins, nomCont, mcPUBins, mcPUVals)
180 
181  upCont, upRatio, downCont, downRatio = None, None, None, None
182  if bool(args.up) != bool(args.down):
183  raise ValueError("If either one of --up and --down is specified, both should be")
184  if args.up and args.down:
185  fUp, hUp = getHist(args.up)
186  if args.rebin:
187  hUp.Rebin(args.rebin)
188  upBins, upCont = normAndExtract(hUp)
189  #if not all(ub == nb for ub,nb in zip(upBins, nomBins)):
190  # raise RuntimeError("Up-variation and nominal binning is different: {0} vs {1}".format(upBins, nomBins))
191  _, upRatio = getRatio(upBins, upCont, mcPUBins, mcPUVals)
192  fDown, hDown = getHist(args.down)
193  if args.rebin:
194  hDown.Rebin(args.rebin)
195  downBins, downCont = normAndExtract(hDown)
196  #if not all(db == nb for db,nb in zip(downBins, nomBins)):
197  # raise RuntimeError("Up-variation and nominal binning is different: {0} vs {1}".format(upBins, nomBins))
198  _, downRatio = getRatio(downBins, downCont, mcPUBins, mcPUVals)
199 
200  if args.format == "correctionlib":
201  out = {
202  "schema_version": 2,
203  "corrections": [{
204  "name": args.name,
205  "version" : 0,
206  "inputs": [
207  {
208  "name": "NumTrueInteractions",
209  "type": "real",
210  "description": "Number of true interactions"
211  },
212  {
213  "name": "weights",
214  "type": "string",
215  "description": "nominal, up, or down"
216  }
217  ],
218  "output": {
219  "name": "weight",
220  "type": "real",
221  "description": "Event weight for pileup reweighting"
222  },
223  "data": {
224  "nodetype": "category",
225  "input": "weights",
226  "content": ([{
227  "key": "nominal",
228  "value": {
229  "nodetype": "binning",
230  "input": "NumTrueInteractions",
231  "flow": "clamp",
232  "edges": list(ratioBins),
233  "content": list(nomRatio)
234  }}]+([{
235  "key": "up",
236  "value": {
237  "nodetype": "binning",
238  "input": "NumTrueInteractions",
239  "flow": "clamp",
240  "edges": list(ratioBins),
241  "content": list(upRatio)
242  }}] if upRatio is not None else []
243  )+([{
244  "key": "down",
245  "value": {
246  "nodetype": "binning",
247  "input": "NumTrueInteractions",
248  "flow": "clamp",
249  "edges": list(ratioBins),
250  "content": list(downRatio)
251  }}] if downRatio is not None else [])
252  )
253  }
254  }]
255  }
256  elif args.format == "cp3-llbb":
257  out = {
258  "dimension" : 1,
259  "variables" : ["NumTrueInteractions"],
260  "binning" : {"x": list(ratioBins)},
261  "error_type" : "absolute",
262  "data" : [
263  {
264  "bin" : [ratioBins[i], ratioBins[i+1]],
265  "value" : nomRatio[i],
266  "error_low" : (nomRatio[i]-downRatio[i] if downRatio is not None else 0.),
267  "error_high" : (upRatio[i]-nomRatio[i] if upRatio is not None else 0.)
268  } for i in range(nomRatio.shape[0])
269  ],
270  }
271  else:
272  raise ValueError(f"Unsupported output format: {args.format}")
273  if args.gzip:
274  outN = args.output
275  if not outN.endswith(".gz"):
276  outN = outN+".gz"
277  with gzip.open(outN, "wb") as outF, io.TextIOWrapper(outF, encoding="utf-8") as outE:
278  json.dump(out, outE)
279  else:
280  with open(args.output, "w") as outF:
281  json.dump(out, outF)
282 
283  if args.makePlot:
284  fig,(ax,rax) = plt.subplots(2,1, figsize=(6,6), sharex=True)
285  rax.set_yscale("log", nonposy="clip")
286  #rax = ax.twinx()
287  dBinCenters = .5*(mcPUBins[:-1]+mcPUBins[1:])
288  nBinCenters = .5*(nomBins[:-1]+nomBins[1:])
289  rBinCenters = .5*(ratioBins[:-1]+ratioBins[1:])
290  ax.hist(dBinCenters, bins=mcPUBins, weights=mcPUVals, histtype="step", label="MC")
291  ax.hist(nBinCenters, bins=nomBins, weights=nomCont, histtype="step", label="Nominal", color="k")
292  rax.hist(rBinCenters, bins=ratioBins, weights=nomRatio, histtype="step", color="k")
293  if upCont is not None:
294  ax.hist(nBinCenters, bins=nomBins, weights=upCont, histtype="step", label="Up", color="r")
295  ax.hist(nBinCenters, bins=nomBins, weights=downCont, histtype="step", label="Down", color="b")
296  rax.hist(rBinCenters, bins=ratioBins, weights=upRatio, histtype="step", color="r")
297  rax.hist(rBinCenters, bins=ratioBins, weights=downRatio, histtype="step", color="b")
298  rax.axhline(1.)
299  ax.legend()
300  rax.set_ylim(.02, 2.)
301  rax.set_xlim(ratioBins[0], ratioBins[-1])
302  if args.mcfiles:
303  rax.set_xlabel(args.mcreweightvar)
304  elif args.mcprofile:
305  ax.set_title(args.mcprofile)
306  if args.output.endswith(".json"):
307  plt.savefig(args.output.replace(".json", ".png"))
308 

◆ normAndExtract()

def makePUReWeightJSON.normAndExtract (   hist,
  norm = 1. 
)
80 def normAndExtract(hist, norm=1.):
81  nB = hist.GetNbinsX()
82  xAx = hist.GetXaxis()
83  if norm:
84  hist.Scale(norm/(hist.Integral()*(xAx.GetXmax()-xAx.GetXmin())/nB))
85  bEdges = np.array([ xAx.GetBinLowEdge(i) for i in range(1,nB+1) ]+[ xAx.GetBinUpEdge(nB) ])
86  contents = np.array([ hist.GetBinContent(i) for i in range(1,nB+1) ])
87  return bEdges, contents
88 

Variable Documentation

◆ logger

logger = logging.getLogger(__name__)

◆ mcPUProfiles

dictionary mcPUProfiles
makePUReWeightJSON.main
def main()
Definition: makePUReWeightJSON.py:113
makePUReWeightJSON.getHist
def getHist(fName, hName="pileup")
Definition: makePUReWeightJSON.py:70
makePUReWeightJSON.normAndExtract
def normAndExtract(hist, norm=1.)
Definition: makePUReWeightJSON.py:80
makePUReWeightJSON.getRatio
def getRatio(numBins, numCont, denBins, denCont)
Definition: makePUReWeightJSON.py:89
metPhiCorrectionExample.range
range
Definition: metPhiCorrectionExample.py:100