Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat : add ratioplot #416

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/mplhep/__init__.py
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@
histplot,
make_square_add_cbar,
mpl_magic,
ratioplot,
rescale_to_axessize,
sort_legend,
ylow,
@@ -63,6 +64,7 @@
# Log plot functions
"histplot",
"hist2dplot",
"ratioplot",
"mpl_magic",
"yscale_legend",
"yscale_anchored_text",
56 changes: 56 additions & 0 deletions src/mplhep/error_estimation.py
Original file line number Diff line number Diff line change
@@ -52,3 +52,59 @@ def poisson_interval(sumw, sumw2, coverage=_coverage1sd):
interval = np.array([lo, hi])
interval[interval == np.nan] = 0.0 # chi2.ppf produces nan for counts=0
return interval


def normal_interval(pw, tw, pw2, tw2, coverage=_coverage1sd):
"""Compute errors based on the expansion of pass/(pass + fail), possibly weighted
Parameters
----------
pw : np.ndarray
Numerator, or number of (weighted) successes, vectorized
tw : np.ndarray
Denominator or number of (weighted) trials, vectorized
pw2 : np.ndarray
Numerator sum of weights squared, vectorized
tw2 : np.ndarray
Denominator sum of weights squared, vectorized
coverage : float, optional
Central coverage interval, defaults to 68%
c.f. https://root.cern.ch/doc/master/TEfficiency_8cxx_source.html#l02515
"""

eff = pw / tw

variance = (pw2 * (1 - 2 * eff) + tw2 * eff**2) / (tw**2)
sigma = np.sqrt(variance)

prob = 0.5 * (1 - coverage)
delta = np.zeros_like(sigma)
delta[sigma != 0] = scipy.stats.norm.ppf(prob, scale=sigma[sigma != 0])

lo = eff - np.minimum(eff + delta, np.ones_like(eff))
hi = np.maximum(eff - delta, np.zeros_like(eff)) - eff

return np.array([lo, hi])


def clopper_pearson_interval(num, denom, coverage=_coverage1sd):
"""Compute Clopper-Pearson coverage interval for a binomial distribution
Parameters
----------
num : numpy.ndarray
Numerator, or number of successes, vectorized
denom : numpy.ndarray
Denominator or number of trials, vectorized
coverage : float, optional
Central coverage interval, defaults to 68%
c.f. http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
"""
if np.any(num > denom):
raise ValueError(
"Found numerator larger than denominator while calculating binomial uncertainty"
)
lo = scipy.stats.beta.ppf((1 - coverage) / 2, num, denom - num + 1)
hi = scipy.stats.beta.ppf((1 + coverage) / 2, num + 1, denom - num)
interval = np.array([lo, hi])
interval[:, num == 0.0] = 0.0
interval[1, num == denom] = 1.0
return interval
293 changes: 293 additions & 0 deletions src/mplhep/plot.py
Original file line number Diff line number Diff line change
@@ -13,8 +13,14 @@
from matplotlib.transforms import Bbox
from mpl_toolkits.axes_grid1 import axes_size, make_axes_locatable

from .error_estimation import (
clopper_pearson_interval,
normal_interval,
poisson_interval,
)
from .utils import (
Plottable,
compatible,
get_histogram_axes_title,
get_plottable_protocol_bins,
hist_object_handler,
@@ -920,6 +926,293 @@ def hist2dplot(
return ColormeshArtists(pc, cb_obj, text_artists)


# copy functions coffea.hist.plotratio https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/plot.py to boost-hist
def ratioplot(
num,
denom,
ax=None,
clear=True,
flow=None,
xerr=False,
error_opts=None,
denom_fill_opts=None,
guide_opts=None,
unc="num",
label=None,
ext_denom_error=None,
):
"""Create a ratio plot, dividing two compatible histograms
Parameters
----------
num : Hist
Numerator, a single-axis histogram
denom : Hist
Denominator, a single-axis histogram
ax : matplotlib.axes.Axes, optional
Axes object (if None, one is created)
clear : bool, optional
Whether to clear Axes before drawing (if passed); if False, this function will skip drawing the legend
flow : str, optional {None, "show", "sum"}
Whether plot the under/overflow bin. If "show", add additional under/overflow bin. If "sum", add the under/overflow bin content to first/last bin.
xerr: bool, optional
If true, then error bars are drawn for x-axis to indicate the size of the bin.
error_opts : dict, optional
A dictionary of options to pass to the matplotlib
`ax.errorbar <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.errorbar.html>`_ call
internal to this function. Leave blank for defaults. Some special options are interpreted by
this function and not passed to matplotlib: 'emarker' (default: '') specifies the marker type
to place at cap of the errorbar.
denom_fill_opts : dict, optional
A dictionary of options to pass to the matplotlib
`ax.fill_between <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.fill_between.html>`_ call
internal to this function, filling the denominator uncertainty band. Leave blank for defaults.
guide_opts : dict, optional
A dictionary of options to pass to the matplotlib
`ax.axhline <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.axhline.html>`_ call
internal to this function, to plot a horizontal guide line at ratio of 1. Leave blank for defaults.
unc : str, optional
Uncertainty calculation option: 'clopper-pearson' interval for efficiencies; 'poisson-ratio' interval
for ratio of poisson distributions; 'num' poisson interval of numerator scaled by denominator value
(common for data/mc, for better or worse).
label : str, optional
Associate a label to this entry (note: y axis label set by ``num.label``)
ext_denom_error: list of np.array[error_up,error_down], optional
External MC errors not stored in the original histogram
Returns
-------
ax : matplotlib.axes.Axes
A matplotlib `Axes <https://matplotlib.org/3.1.1/api/axes_api.html>`_ object
"""
if ax is None:
fig, ax = plt.subplots(1, 1)
else:
if not isinstance(ax, plt.Axes):
raise ValueError("ax must be a matplotlib Axes object")
if clear:
ax.clear()
if not compatible(num, denom):
raise ValueError(
"numerator and denominator histograms have incompatible axis definitions"
)
if len(num.axes) > 1:
raise ValueError("ratioplot() can only support one-dimensional histograms")
if error_opts is None and denom_fill_opts is None and guide_opts is None:
error_opts = {}
denom_fill_opts = {}
guide_opts = {}
axis = num.axes[0]

ax.set_xlabel(axis.label)
ax.set_ylabel(num.label)
edges = axis.edges
if flow == "show":
edges = np.array(
[
edges[0] - (edges[1] - edges[0]) * 3,
edges[0] - (edges[1] - edges[0]),
*edges,
edges[-1] + (edges[1] - edges[0]),
edges[-1] + (edges[1] - edges[0]) * 3,
]
)
centers = (edges[1:] + edges[:-1]) / 2
ranges = (edges[1:] - edges[:-1]) / 2 if xerr else None
sumw_num, sumw2_num = num.values(), num.variances()
sumw_denom, sumw2_denom = denom.values(), denom.variances()

if flow == "show":
sumw_num, sumw2_num = (
num.view(flow=True)["value"],
num.view(flow=True)["variance"],
)
sumw_num, sumw2_num = np.insert(sumw_num, -1, 0), np.insert(sumw2_num, -1, 0)
sumw_num, sumw2_num = np.insert(sumw_num, 1, 0), np.insert(sumw2_num, 1, 0)
sumw_denom, sumw2_denom = (
denom.view(flow=True)["value"],
denom.view(flow=True)["variance"],
)
sumw_denom, sumw2_denom = (
np.insert(sumw_denom, -1, 0),
np.insert(sumw2_denom, -1, 0),
)
sumw_denom, sumw2_denom = (
np.insert(sumw_denom, 1, 0),
np.insert(sumw2_denom, 1, 0),
)

elif flow == "sum":
sumw_num[0], sumw2_num[0] = (
sumw_num[0] + num.view(flow=True)["value"][0],
sumw2_num[0] + num.view(flow=True)["value"][0],
)
sumw_num[-1], sumw2_num[-1] = (
sumw_num[-1] + num.view(flow=True)["value"][-1],
sumw2_num[-1] + num.view(flow=True)["value"][-1],
)
sumw_denom[0], sumw2_denom[0] = (
sumw_denom[0] + denom.view(flow=True)["value"][0],
sumw2_denom[0] + denom.view(flow=True)["value"][0],
)
sumw_denom[-1], sumw2_denom[-1] = (
sumw_denom[-1] + denom.view(flow=True)["value"][-1],
sumw2_denom[-1] + denom.view(flow=True)["value"][-1],
)
else:
sumw_num, sumw2_num = num.values(), num.variances()
sumw_denom, sumw2_denom = denom.values(), denom.variances()
rsumw = sumw_num / sumw_denom
if unc == "clopper-pearson":
rsumw_err = np.abs(clopper_pearson_interval(sumw_num, sumw_denom) - rsumw)
elif unc == "poisson-ratio":
# poisson ratio n/m is equivalent to binomial n/(n+m)
rsumw_err = np.abs(
clopper_pearson_interval(sumw_num, sumw_num + sumw_denom) - rsumw
)
elif unc == "num":
rsumw_err = np.abs(poisson_interval(rsumw, sumw2_num / sumw_denom**2) - rsumw)
elif unc == "efficiency":
rsumw_err = np.abs(
normal_interval(sumw_num, sumw_denom, sumw2_num, sumw2_denom)
)
else:
raise ValueError("Unrecognized uncertainty option: %r" % unc)

# if additional uncertainties
if ext_denom_error is not None:
if denom_fill_opts is {}:
raise ValueError("suggest to use different style for additional error")
if np.shape(rsumw_err) != np.shape(ext_denom_error / sumw_denom):
raise ValueError("Incompatible error length")
rsumw_err = np.sqrt(rsumw_err**2 + (ext_denom_error / sumw_denom) ** 2)

if error_opts is not None:
opts = {
"label": label,
"linestyle": "none",
"lw": 1,
"marker": "o",
"color": "k",
}
opts.update(error_opts)
emarker = opts.pop("emarker", "")
errbar = ax.errorbar(x=centers, y=rsumw, xerr=ranges, yerr=rsumw_err, **opts)
plt.setp(errbar[1], "marker", emarker)
if denom_fill_opts is not None:
unity = np.ones_like(sumw_denom)
denom_unc = poisson_interval(unity, sumw2_denom / sumw_denom**2)
opts = {
"hatch": "////",
"facecolor": "none",
"lw": 0,
"color": "k",
"alpha": 0.4,
}
if ext_denom_error is not None:
denom_unc[0] = (
unity[0]
- np.sqrt(
(denom_unc - unity) ** 2 + (ext_denom_error / sumw_denom) ** 2
)[0]
)
denom_unc[1] = (
unity[1]
+ np.sqrt(
(denom_unc - unity) ** 2 + (ext_denom_error / sumw_denom) ** 2
)[1]
)
opts = denom_fill_opts
ax.stairs(denom_unc[0], edges=edges, baseline=denom_unc[1], **opts)
if guide_opts is not None:
opts = {"linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1}
opts.update(guide_opts)
if clear is not False:
ax.axhline(1.0, **opts)

if clear:
ax.autoscale(axis="x", tight=True)
ax.set_ylim(0, None)
if flow == "hint" or flow == "show":
d = 0.9 # proportion of vertical to horizontal extent of the slanted line
trans = mpl.transforms.blended_transform_factory(ax.transData, ax.transAxes)
kwargs = dict(
marker=[(-0.5, -d), (0.5, d)],
markersize=15,
linestyle="none",
color="k",
mec="k",
mew=1,
clip_on=False,
transform=trans,
)
xticks = ax.get_xticks().tolist()
if sumw_num[0] > 0.0 or sumw_denom[0] > 0.0:
if flow == "hint":
ax.plot(
[
edges[0] - (edges[1] - edges[0]) / 2.0,
edges[0],
],
[0, 0],
**kwargs,
)
ax.plot(
[
edges[0] - (edges[1] - edges[0]) / 2.0,
edges[0],
],
[1, 1],
**kwargs,
)
if flow == "show":
ax.plot(
[edges[1], edges[2]],
[0, 0],
**kwargs,
)
ax.plot(
[edges[1], edges[2]],
[1, 1],
**kwargs,
)
xticks[0] = ""
xticks[1] = f"<{edges[2]}"

ax.set_xticklabels(xticks)
if sumw_num[-1] > 0.0 or sumw_denom[-1] > 0.0:
if flow == "hint":
ax.plot(
[
edges[-1],
edges[-1] + (edges[1] - edges[0]) / 2.0,
],
[0, 0],
**kwargs,
)
ax.plot(
[
edges[-1],
edges[-1] + (edges[1] - edges[0]) / 2.0,
],
[1, 1],
**kwargs,
)
if flow == "show":
ax.plot(
[edges[-3], edges[-2]],
[0, 0],
**kwargs,
)
ax.plot(
[edges[-3], edges[-2]],
[1, 1],
**kwargs,
)
xticks[-1] = ""
xticks[-2] = f">{edges[-3]}"
ax.set_xticklabels(xticks)
return ax


#############################################
# Utils
def overlap(ax, bbox, get_vertices=False):
10 changes: 10 additions & 0 deletions src/mplhep/utils.py
Original file line number Diff line number Diff line change
@@ -403,3 +403,13 @@ def to_padded2d(h, variances=False):
return padded, padded_varis
else:
return padded


def compatible(self, other):
"""Checks if this histogram is compatible with another, i.e. they have identical binning"""
if len(self.axes) != len(other.axes):
return False
if set(self.axes.name) != set(other.axes.name):
return False

return True
62 changes: 62 additions & 0 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
@@ -122,7 +122,7 @@
return fig


@pytest.mark.mpl_image_compare(style="default")

Check failure on line 125 in tests/test_basic.py

GitHub Actions / 🐍 3.8 • ubuntu-latest

test_ratioplot TypeError: Cannot give bins with existing histogram

Check failure on line 125 in tests/test_basic.py

GitHub Actions / 🐍 3.10 • ubuntu-latest

test_ratioplot TypeError: Cannot give bins with existing histogram

Check failure on line 125 in tests/test_basic.py

GitHub Actions / 🐍 3.8 • macOS-latest

test_ratioplot TypeError: Cannot give bins with existing histogram
def test_histplot_flow():
np.random.seed(0)
h = hist.new.Reg(20, 5, 15, name="x").Weight()
@@ -589,6 +589,68 @@
return fig


@pytest.mark.mpl_image_compare(style="default")
def test_ratioplot():
np.random.seed(0)
h, bins = np.histogram(np.random.normal(10, 3, 1000), bins=np.geomspace(1, 20, 10))
h = hist.Hist(hist.axis.Regular(20, 5, 15, name="x"), hist.storage.Weight())
hdata = hist.Hist(hist.axis.Regular(20, 5, 15, name="x"), hist.storage.Weight())
hdata.fill(np.random.poisson(h.view(flow=True)["value"] * 3))
fig, ((ax), (rax)) = plt.subplots(
2, 1, figsize=(10, 10), gridspec_kw={"height_ratios": (3, 1)}, sharex=True
)
a, b, c = h, h * 2, hdata

hep.histplot(
[a, b], bins=bins, ax=ax, stack=True, histtype="fill", label=["MC1", "MC2"]
)
hep.histplot([c], bins=bins, ax=ax, yerr=True, histtype="errorbar", label="Data")
hep.ratioplot(c, a + b, ax=rax, yerr=True)

ax.set_title("stacked")
rax.set_ylabel("Data/MC")
ax.set_ylabel("Events")

return fig


def test_ratioplot_flow():
np.random.seed(0)
h, bins = np.histogram(np.random.normal(10, 3, 1000), bins=np.geomspace(1, 20, 10))
h = hist.Hist(hist.axis.Regular(20, 5, 15, name="x"), hist.storage.Weight())
hdata = hist.Hist(hist.axis.Regular(20, 5, 15, name="x"), hist.storage.Weight())
hdata.fill(np.random.poisson(h.view(flow=True)["value"] * 3))
fig, ((ax), (rax)) = plt.subplots(

Check failure on line 623 in tests/test_basic.py

GitHub Actions / 🐍 3.8 • ubuntu-latest

test_ratioplot_flow ValueError: too many values to unpack (expected 2)

Check failure on line 623 in tests/test_basic.py

GitHub Actions / 🐍 3.10 • ubuntu-latest

test_ratioplot_flow ValueError: too many values to unpack (expected 2)

Check failure on line 623 in tests/test_basic.py

GitHub Actions / 🐍 3.8 • macOS-latest

test_ratioplot_flow ValueError: too many values to unpack (expected 2)
4,
2,
figsize=(10, 10),
gridspec_kw={"height_ratios": (3, 1, 3, 1)},
sharex=True,
sharey=True,
)
a, b, c = h, h * 2, hdata

hep.histplot(
[a, b],
bins=bins,
ax=ax,
stack=True,
histtype="fill",
label=["MC1", "MC2"],
flow=None,
)
hep.histplot(
[c], bins=bins, ax=ax, yerr=True, histtype="errorbar", label="Data", flow=None
)
hep.ratioplot(c, a + b, ax=rax, yerr=True, flow="")

ax.set_title("stacked")
rax.set_ylabel("Data/MC")
ax.set_ylabel("Events")

return fig


@pytest.mark.mpl_image_compare(style="default", remove_text=True)
def test_histplot_w2():
fig, ax = plt.subplots()