scikit-hep · Ming-Yan · Apr 3, 2023
diff --git a/src/mplhep/__init__.py b/src/mplhep/__init__.py
@@ -19,6 +19,7 @@
     histplot,
     make_square_add_cbar,
     mpl_magic,
+    ratioplot,
     rescale_to_axessize,
     sort_legend,
     ylow,
@@ -63,6 +64,7 @@
     # Log plot functions
     "histplot",
     "hist2dplot",
+    "ratioplot",
     "mpl_magic",
     "yscale_legend",
     "yscale_anchored_text",

diff --git a/src/mplhep/error_estimation.py b/src/mplhep/error_estimation.py
@@ -52,3 +52,59 @@ def poisson_interval(sumw, sumw2, coverage=_coverage1sd):
     interval = np.array([lo, hi])
     interval[interval == np.nan] = 0.0  # chi2.ppf produces nan for counts=0
     return interval
+
+
+def normal_interval(pw, tw, pw2, tw2, coverage=_coverage1sd):
+    """Compute errors based on the expansion of pass/(pass + fail), possibly weighted
+    Parameters
+    ----------
+    pw : np.ndarray
+        Numerator, or number of (weighted) successes, vectorized
+    tw : np.ndarray
+        Denominator or number of (weighted) trials, vectorized
+    pw2 : np.ndarray
+        Numerator sum of weights squared, vectorized
+    tw2 : np.ndarray
+        Denominator sum of weights squared, vectorized
+    coverage : float, optional
+        Central coverage interval, defaults to 68%
+    c.f. https://root.cern.ch/doc/master/TEfficiency_8cxx_source.html#l02515
+    """
+
+    eff = pw / tw
+
+    variance = (pw2 * (1 - 2 * eff) + tw2 * eff**2) / (tw**2)
+    sigma = np.sqrt(variance)
+
+    prob = 0.5 * (1 - coverage)
+    delta = np.zeros_like(sigma)
+    delta[sigma != 0] = scipy.stats.norm.ppf(prob, scale=sigma[sigma != 0])
+
+    lo = eff - np.minimum(eff + delta, np.ones_like(eff))
+    hi = np.maximum(eff - delta, np.zeros_like(eff)) - eff
+
+    return np.array([lo, hi])
+
+
+def clopper_pearson_interval(num, denom, coverage=_coverage1sd):
+    """Compute Clopper-Pearson coverage interval for a binomial distribution
+    Parameters
+    ----------
+        num : numpy.ndarray
+            Numerator, or number of successes, vectorized
+        denom : numpy.ndarray
+            Denominator or number of trials, vectorized
+        coverage : float, optional
+            Central coverage interval, defaults to 68%
+    c.f. http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
+    """
+    if np.any(num > denom):
+        raise ValueError(
+            "Found numerator larger than denominator while calculating binomial uncertainty"
+        )
+    lo = scipy.stats.beta.ppf((1 - coverage) / 2, num, denom - num + 1)
+    hi = scipy.stats.beta.ppf((1 + coverage) / 2, num + 1, denom - num)
+    interval = np.array([lo, hi])
+    interval[:, num == 0.0] = 0.0
+    interval[1, num == denom] = 1.0
+    return interval
diff --git a/src/mplhep/plot.py b/src/mplhep/plot.py
@@ -13,8 +13,14 @@
 from matplotlib.transforms import Bbox
 from mpl_toolkits.axes_grid1 import axes_size, make_axes_locatable
 
+from .error_estimation import (
+    clopper_pearson_interval,
+    normal_interval,
+    poisson_interval,
+)
 from .utils import (
     Plottable,
+    compatible,
     get_histogram_axes_title,
     get_plottable_protocol_bins,
     hist_object_handler,
@@ -920,6 +926,293 @@ def hist2dplot(
     return ColormeshArtists(pc, cb_obj, text_artists)
 
 
+# copy functions coffea.hist.plotratio https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/plot.py to boost-hist
+def ratioplot(
+    num,
+    denom,
+    ax=None,
+    clear=True,
+    flow=None,
+    xerr=False,
+    error_opts=None,
+    denom_fill_opts=None,
+    guide_opts=None,
+    unc="num",
+    label=None,
+    ext_denom_error=None,
+):
+    """Create a ratio plot, dividing two compatible histograms
+    Parameters
+    ----------
+        num : Hist
+            Numerator, a single-axis histogram
+        denom : Hist
+            Denominator, a single-axis histogram
+        ax : matplotlib.axes.Axes, optional
+            Axes object (if None, one is created)
+        clear : bool, optional
+            Whether to clear Axes before drawing (if passed); if False, this function will skip drawing the legend
+        flow : str, optional {None, "show", "sum"}
+            Whether plot the under/overflow bin. If "show", add additional under/overflow bin. If "sum", add the under/overflow bin content to first/last bin.
+        xerr: bool, optional
+            If true, then error bars are drawn for x-axis to indicate the size of the bin.
+        error_opts : dict, optional
+            A dictionary of options to pass to the matplotlib
+            `ax.errorbar <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.errorbar.html>`_ call
+            internal to this function.  Leave blank for defaults.  Some special options are interpreted by
+            this function and not passed to matplotlib: 'emarker' (default: '') specifies the marker type
+            to place at cap of the errorbar.
+        denom_fill_opts : dict, optional
+            A dictionary of options to pass to the matplotlib
+            `ax.fill_between <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.fill_between.html>`_ call
+            internal to this function, filling the denominator uncertainty band.  Leave blank for defaults.
+        guide_opts : dict, optional
+            A dictionary of options to pass to the matplotlib
+            `ax.axhline <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.axhline.html>`_ call
+            internal to this function, to plot a horizontal guide line at ratio of 1.  Leave blank for defaults.
+        unc : str, optional
+            Uncertainty calculation option: 'clopper-pearson' interval for efficiencies; 'poisson-ratio' interval
+            for ratio of poisson distributions; 'num' poisson interval of numerator scaled by denominator value
+            (common for data/mc, for better or worse).
+        label : str, optional
+            Associate a label to this entry (note: y axis label set by ``num.label``)
+        ext_denom_error: list of np.array[error_up,error_down], optional
+            External MC errors not stored in the original histogram
+    Returns
+    -------
+        ax : matplotlib.axes.Axes
+            A matplotlib `Axes <https://matplotlib.org/3.1.1/api/axes_api.html>`_ object
+    """
+    if ax is None:
+        fig, ax = plt.subplots(1, 1)
+    else:
+        if not isinstance(ax, plt.Axes):
+            raise ValueError("ax must be a matplotlib Axes object")
+        if clear:
+            ax.clear()
+    if not compatible(num, denom):
+        raise ValueError(
+            "numerator and denominator histograms have incompatible axis definitions"
+        )
+    if len(num.axes) > 1:
+        raise ValueError("ratioplot() can only support one-dimensional histograms")
+    if error_opts is None and denom_fill_opts is None and guide_opts is None:
+        error_opts = {}
+        denom_fill_opts = {}
+        guide_opts = {}
+    axis = num.axes[0]
+
+    ax.set_xlabel(axis.label)
+    ax.set_ylabel(num.label)
+    edges = axis.edges
+    if flow == "show":
+        edges = np.array(
+            [
+                edges[0] - (edges[1] - edges[0]) * 3,
+                edges[0] - (edges[1] - edges[0]),
+                *edges,
+                edges[-1] + (edges[1] - edges[0]),
+                edges[-1] + (edges[1] - edges[0]) * 3,
+            ]
+        )
+    centers = (edges[1:] + edges[:-1]) / 2
+    ranges = (edges[1:] - edges[:-1]) / 2 if xerr else None
+    sumw_num, sumw2_num = num.values(), num.variances()
+    sumw_denom, sumw2_denom = denom.values(), denom.variances()
+
+    if flow == "show":
+        sumw_num, sumw2_num = (
+            num.view(flow=True)["value"],
+            num.view(flow=True)["variance"],
+        )
+        sumw_num, sumw2_num = np.insert(sumw_num, -1, 0), np.insert(sumw2_num, -1, 0)
+        sumw_num, sumw2_num = np.insert(sumw_num, 1, 0), np.insert(sumw2_num, 1, 0)
+        sumw_denom, sumw2_denom = (
+            denom.view(flow=True)["value"],
+            denom.view(flow=True)["variance"],
+        )
+        sumw_denom, sumw2_denom = (
+            np.insert(sumw_denom, -1, 0),
+            np.insert(sumw2_denom, -1, 0),
+        )
+        sumw_denom, sumw2_denom = (
+            np.insert(sumw_denom, 1, 0),
+            np.insert(sumw2_denom, 1, 0),
+        )
+
+    elif flow == "sum":
+        sumw_num[0], sumw2_num[0] = (
+            sumw_num[0] + num.view(flow=True)["value"][0],
+            sumw2_num[0] + num.view(flow=True)["value"][0],
+        )
+        sumw_num[-1], sumw2_num[-1] = (
+            sumw_num[-1] + num.view(flow=True)["value"][-1],
+            sumw2_num[-1] + num.view(flow=True)["value"][-1],
+        )
+        sumw_denom[0], sumw2_denom[0] = (
+            sumw_denom[0] + denom.view(flow=True)["value"][0],
+            sumw2_denom[0] + denom.view(flow=True)["value"][0],
+        )
+        sumw_denom[-1], sumw2_denom[-1] = (
+            sumw_denom[-1] + denom.view(flow=True)["value"][-1],
+            sumw2_denom[-1] + denom.view(flow=True)["value"][-1],
+        )
+    else:
+        sumw_num, sumw2_num = num.values(), num.variances()
+        sumw_denom, sumw2_denom = denom.values(), denom.variances()
+    rsumw = sumw_num / sumw_denom
+    if unc == "clopper-pearson":
+        rsumw_err = np.abs(clopper_pearson_interval(sumw_num, sumw_denom) - rsumw)
+    elif unc == "poisson-ratio":
+        # poisson ratio n/m is equivalent to binomial n/(n+m)
+        rsumw_err = np.abs(
+            clopper_pearson_interval(sumw_num, sumw_num + sumw_denom) - rsumw
+        )
+    elif unc == "num":
+        rsumw_err = np.abs(poisson_interval(rsumw, sumw2_num / sumw_denom**2) - rsumw)
+    elif unc == "efficiency":
+        rsumw_err = np.abs(
+            normal_interval(sumw_num, sumw_denom, sumw2_num, sumw2_denom)
+        )
+    else:
+        raise ValueError("Unrecognized uncertainty option: %r" % unc)
+
+    # if additional uncertainties
+    if ext_denom_error is not None:
+        if denom_fill_opts is {}:
+            raise ValueError("suggest to use different style for additional error")
+        if np.shape(rsumw_err) != np.shape(ext_denom_error / sumw_denom):
+            raise ValueError("Incompatible error length")
+        rsumw_err = np.sqrt(rsumw_err**2 + (ext_denom_error / sumw_denom) ** 2)
+
+    if error_opts is not None:
+        opts = {
+            "label": label,
+            "linestyle": "none",
+            "lw": 1,
+            "marker": "o",
+            "color": "k",
+        }
+        opts.update(error_opts)
+        emarker = opts.pop("emarker", "")
+        errbar = ax.errorbar(x=centers, y=rsumw, xerr=ranges, yerr=rsumw_err, **opts)
+        plt.setp(errbar[1], "marker", emarker)
+    if denom_fill_opts is not None:
+        unity = np.ones_like(sumw_denom)
+        denom_unc = poisson_interval(unity, sumw2_denom / sumw_denom**2)
+        opts = {
+            "hatch": "////",
+            "facecolor": "none",
+            "lw": 0,
+            "color": "k",
+            "alpha": 0.4,
+        }
+        if ext_denom_error is not None:
+            denom_unc[0] = (
+                unity[0]
+                - np.sqrt(
+                    (denom_unc - unity) ** 2 + (ext_denom_error / sumw_denom) ** 2
+                )[0]
+            )
+            denom_unc[1] = (
+                unity[1]
+                + np.sqrt(
+                    (denom_unc - unity) ** 2 + (ext_denom_error / sumw_denom) ** 2
+                )[1]
+            )
+            opts = denom_fill_opts
+        ax.stairs(denom_unc[0], edges=edges, baseline=denom_unc[1], **opts)
+    if guide_opts is not None:
+        opts = {"linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1}
+        opts.update(guide_opts)
+        if clear is not False:
+            ax.axhline(1.0, **opts)
+
+    if clear:
+        ax.autoscale(axis="x", tight=True)
+        ax.set_ylim(0, None)
+    if flow == "hint" or flow == "show":
+        d = 0.9  # proportion of vertical to horizontal extent of the slanted line
+        trans = mpl.transforms.blended_transform_factory(ax.transData, ax.transAxes)
+        kwargs = dict(
+            marker=[(-0.5, -d), (0.5, d)],
+            markersize=15,
+            linestyle="none",
+            color="k",
+            mec="k",
+            mew=1,
+            clip_on=False,
+            transform=trans,
+        )
+        xticks = ax.get_xticks().tolist()
+        if sumw_num[0] > 0.0 or sumw_denom[0] > 0.0:
+            if flow == "hint":
+                ax.plot(
+                    [
+                        edges[0] - (edges[1] - edges[0]) / 2.0,
+                        edges[0],
+                    ],
+                    [0, 0],
+                    **kwargs,
+                )
+                ax.plot(
+                    [
+                        edges[0] - (edges[1] - edges[0]) / 2.0,
+                        edges[0],
+                    ],
+                    [1, 1],
+                    **kwargs,
+                )
+            if flow == "show":
+                ax.plot(
+                    [edges[1], edges[2]],
+                    [0, 0],
+                    **kwargs,
+                )
+                ax.plot(
+                    [edges[1], edges[2]],
+                    [1, 1],
+                    **kwargs,
+                )
+                xticks[0] = ""
+                xticks[1] = f"<{edges[2]}"
+
+                ax.set_xticklabels(xticks)
+        if sumw_num[-1] > 0.0 or sumw_denom[-1] > 0.0:
+            if flow == "hint":
+                ax.plot(
+                    [
+                        edges[-1],
+                        edges[-1] + (edges[1] - edges[0]) / 2.0,
+                    ],
+                    [0, 0],
+                    **kwargs,
+                )
+                ax.plot(
+                    [
+                        edges[-1],
+                        edges[-1] + (edges[1] - edges[0]) / 2.0,
+                    ],
+                    [1, 1],
+                    **kwargs,
+                )
+            if flow == "show":
+                ax.plot(
+                    [edges[-3], edges[-2]],
+                    [0, 0],
+                    **kwargs,
+                )
+                ax.plot(
+                    [edges[-3], edges[-2]],
+                    [1, 1],
+                    **kwargs,
+                )
+                xticks[-1] = ""
+                xticks[-2] = f">{edges[-3]}"
+                ax.set_xticklabels(xticks)
+    return ax
+
+
 #############################################
 # Utils
 def overlap(ax, bbox, get_vertices=False):

diff --git a/src/mplhep/utils.py b/src/mplhep/utils.py
@@ -403,3 +403,13 @@ def to_padded2d(h, variances=False):
         return padded, padded_varis
     else:
         return padded
+
+
+def compatible(self, other):
+    """Checks if this histogram is compatible with another, i.e. they have identical binning"""
+    if len(self.axes) != len(other.axes):
+        return False
+    if set(self.axes.name) != set(other.axes.name):
+        return False
+
+    return True
diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -122,7 +122,7 @@
    return fig


 @pytest.mark.mpl_image_compare(style="default")
 def test_histplot_flow():
    np.random.seed(0)
    h = hist.new.Reg(20, 5, 15, name="x").Weight()
@@ -589,6 +589,68 @@
     return fig
 
 
+@pytest.mark.mpl_image_compare(style="default")
+def test_ratioplot():
+    np.random.seed(0)
+    h, bins = np.histogram(np.random.normal(10, 3, 1000), bins=np.geomspace(1, 20, 10))
+    h = hist.Hist(hist.axis.Regular(20, 5, 15, name="x"), hist.storage.Weight())
+    hdata = hist.Hist(hist.axis.Regular(20, 5, 15, name="x"), hist.storage.Weight())
+    hdata.fill(np.random.poisson(h.view(flow=True)["value"] * 3))
+    fig, ((ax), (rax)) = plt.subplots(
+        2, 1, figsize=(10, 10), gridspec_kw={"height_ratios": (3, 1)}, sharex=True
+    )
+    a, b, c = h, h * 2, hdata
+
+    hep.histplot(
+        [a, b], bins=bins, ax=ax, stack=True, histtype="fill", label=["MC1", "MC2"]
+    )
+    hep.histplot([c], bins=bins, ax=ax, yerr=True, histtype="errorbar", label="Data")
+    hep.ratioplot(c, a + b, ax=rax, yerr=True)
+
+    ax.set_title("stacked")
+    rax.set_ylabel("Data/MC")
+    ax.set_ylabel("Events")
+
+    return fig
+
+
+def test_ratioplot_flow():
+    np.random.seed(0)
+    h, bins = np.histogram(np.random.normal(10, 3, 1000), bins=np.geomspace(1, 20, 10))
+    h = hist.Hist(hist.axis.Regular(20, 5, 15, name="x"), hist.storage.Weight())
+    hdata = hist.Hist(hist.axis.Regular(20, 5, 15, name="x"), hist.storage.Weight())
+    hdata.fill(np.random.poisson(h.view(flow=True)["value"] * 3))
+    fig, ((ax), (rax)) = plt.subplots(
+        4,
+        2,
+        figsize=(10, 10),
+        gridspec_kw={"height_ratios": (3, 1, 3, 1)},
+        sharex=True,
+        sharey=True,
+    )
+    a, b, c = h, h * 2, hdata
+
+    hep.histplot(
+        [a, b],
+        bins=bins,
+        ax=ax,
+        stack=True,
+        histtype="fill",
+        label=["MC1", "MC2"],
+        flow=None,
+    )
+    hep.histplot(
+        [c], bins=bins, ax=ax, yerr=True, histtype="errorbar", label="Data", flow=None
+    )
+    hep.ratioplot(c, a + b, ax=rax, yerr=True, flow="")
+
+    ax.set_title("stacked")
+    rax.set_ylabel("Data/MC")
+    ax.set_ylabel("Events")
+
+    return fig
+
+
 @pytest.mark.mpl_image_compare(style="default", remove_text=True)
 def test_histplot_w2():
     fig, ax = plt.subplots()