Pandas 1.1 update (#115)

* wakey wakey github tests * tests showing the issue lies with the weights of NestedSamples objects * Tests pass locally * Added credit to Lukas Hergt * Updated badge for travis readme org -> com * Changed samples.weight to be a numpy array * Adjusted version number to be 2.0.0-beta * Updated level_values(1) to weight * Updated hashing function to be more intuitive * Converted weight -> weights * Removed defunct array_to_seed code * remove FutureWarning for types alongside old types interface, adjust tests accordingly * use index.repeat instead of np.repeat to keep MultiIndex structure * add tests for WeightedDataFrame.hist and WeightedSeries.hist * Closed plots for weighted series Co-authored-by: Will Handley <[email protected]>
handley-lab · Jul 31, 2020 · c1202ce · c1202ce
1 parent 2351380
commit c1202ce
Show file tree

Hide file tree

Showing 10 changed files with 197 additions and 206 deletions.
diff --git a/README.rst b/README.rst
@@ -2,13 +2,13 @@
 anesthetic: nested sampling visualisation
 =========================================
 :anesthetic: nested sampling visualisation
-:Author: Will Handley
-:Version: 1.3.6
+:Author: Will Handley and Lukas Hergt
+:Version: 2.0.0-beta.1
 :Homepage: https://github.com/williamjameshandley/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 
-.. image:: https://travis-ci.org/williamjameshandley/anesthetic.svg?branch=master
-   :target: https://travis-ci.org/williamjameshandley/anesthetic
+.. image:: https://travis-ci.com/williamjameshandley/anesthetic.svg?branch=master
+   :target: https://travis-ci.com/williamjameshandley/anesthetic
    :alt: Build Status
 .. image:: https://circleci.com/gh/williamjameshandley/anesthetic.svg?style=svg
    :target: https://circleci.com/gh/williamjameshandley/anesthetic

diff --git a/anesthetic/convert.py b/anesthetic/convert.py
@@ -16,7 +16,7 @@ def to_getdist(nested_samples):
     """
     import getdist
     samples = nested_samples.values
-    weights = nested_samples.weight.values
+    weights = nested_samples.weights
     loglikes = -2*nested_samples.logL.values
     names = nested_samples.columns
     ranges = {name: nested_samples._limits(name) for name in names}

diff --git a/anesthetic/samples.py b/anesthetic/samples.py
@@ -39,7 +39,7 @@ class MCMCSamples(WeightedDataFrame):
     columns: list(str)
         reference names of parameters
 
-    weight: np.array
+    weights: np.array
         weights of samples.
 
     logL: np.array
@@ -78,12 +78,12 @@ def __init__(self, *args, **kwargs):
                                  "MCMCSamples and more. MCMCSamples should be "
                                  "used for MCMC chains only." % root)
             burn_in = kwargs.pop('burn_in', False)
-            weight, logL, samples = reader.samples(burn_in=burn_in)
+            weights, logL, samples = reader.samples(burn_in=burn_in)
             params, tex = reader.paramnames()
             columns = kwargs.pop('columns', params)
             limits = reader.limits()
             kwargs['label'] = kwargs.get('label', os.path.basename(root))
-            self.__init__(data=samples, columns=columns, weight=weight,
+            self.__init__(data=samples, columns=columns, weights=weights,
                           logL=logL, tex=tex, limits=limits, *args, **kwargs)
             self.root = root
         else:
@@ -101,10 +101,6 @@ def __init__(self, *args, **kwargs):
                 self['logL'] = logL
                 self.tex['logL'] = r'$\log\mathcal{L}$'
 
-            if self._weight is not None:
-                self['weight'] = self.weight
-                self.tex['weight'] = r'MCMC weight'
-
             self._set_automatic_limits()
 
     def _set_automatic_limits(self):
@@ -174,15 +170,15 @@ def plot(self, ax, paramname_x, paramname_y=None, *args, **kwargs):
                     if ncompress is None:
                         ncompress = 1000
                     return kde_plot_1d(ax, self[paramname_x],
-                                       weights=self.weight,
+                                       weights=self.weights,
                                        ncompress=ncompress,
                                        *args, **kwargs)
                 elif plot_type == 'fastkde':
                     x = self[paramname_x].compress(ncompress)
                     return fastkde_plot_1d(ax, x, *args, **kwargs)
                 elif plot_type == 'hist':
                     return hist_plot_1d(ax, self[paramname_x],
-                                        weights=self.weight,
+                                        weights=self.weights,
                                         *args, **kwargs)
                 elif plot_type == 'astropyhist':
                     x = self[paramname_x].compress(ncompress)
@@ -210,7 +206,7 @@ def plot(self, ax, paramname_x, paramname_y=None, *args, **kwargs):
                         ncompress = 1000
                     x = self[paramname_x]
                     y = self[paramname_y]
-                    return kde_contour_plot_2d(ax, x, y, weights=self.weight,
+                    return kde_contour_plot_2d(ax, x, y, weights=self.weights,
                                                ncompress=ncompress,
                                                *args, **kwargs)
                 elif plot_type == 'fastkde':
@@ -227,7 +223,7 @@ def plot(self, ax, paramname_x, paramname_y=None, *args, **kwargs):
                 elif plot_type == 'hist':
                     x = self[paramname_x]
                     y = self[paramname_y]
-                    return hist_plot_2d(ax, x, y, weights=self.weight,
+                    return hist_plot_2d(ax, x, y, weights=self.weights,
                                         *args, **kwargs)
                 else:
                     raise NotImplementedError("plot_type is '%s', but must be"
@@ -320,27 +316,6 @@ def plot_2d(self, axes, *args, **kwargs):
         """
         default_types = {'diagonal': 'kde', 'lower': 'kde', 'upper': 'scatter'}
         types = kwargs.pop('types', default_types)
-        diagonal = kwargs.pop('diagonal', True)
-        if isinstance(types, list) or isinstance(types, str):
-            from warnings import warn
-            warn("MCMCSamples.plot_2d's argument 'types' might stop accepting "
-                 "str or list(str) as input in the future. It takes a "
-                 "dictionary as input, now, with keys 'diagonal' for the 1D "
-                 "plots and 'lower' and 'upper' for the 2D plots. 'diagonal' "
-                 "accepts the values 'kde' or 'hist' and both 'lower' and "
-                 "'upper' accept the values 'kde' or 'scatter'. "
-                 "Default: {'diagonal': 'kde', 'lower': 'kde'}.",
-                 FutureWarning)
-
-            if isinstance(types, str):
-                types = {'lower': types}
-                if diagonal:
-                    types['diagonal'] = types['lower']
-            elif isinstance(types, list):
-                types = {'lower': types[0], 'upper': types[-1]}
-                if diagonal:
-                    types['diagonal'] = types['lower']
-
         local_kwargs = {pos: kwargs.pop('%s_kwargs' % pos, {})
                         for pos in default_types}
 
@@ -471,11 +446,7 @@ def beta(self, beta):
         self._beta = beta
         logw = self.dlogX() + np.where(self.logL == -np.inf, -np.inf,
                                        self.beta * self.logL)
-        self._weight = np.exp(logw - logw.max())
-
-        if self._weight is not None:
-            self['weight'] = self.weight
-            self.tex['weight'] = r'MCMC weight'
+        self.weights = np.exp(logw - logw.max())
 
     def set_beta(self, beta, inplace=False):
         """Change the inverse temperature.
@@ -593,9 +564,11 @@ def live_points(self, logL=None):
         """
         if logL is None:
             logL = self.logL_birth.max()
-        elif is_int(logL):
-            logL = self.logL[logL]
-
+        else:
+            try:
+                logL = float(self.logL[logL])
+            except KeyError:
+                pass
         return self[(self.logL > logL) & (self.logL_birth <= logL)]
 
     def posterior_points(self, beta=1):
@@ -635,9 +608,9 @@ def dlogX(self, nsamples=None):
 
         if nsamples is None:
             dlogX = np.squeeze(dlogX)
-            return WeightedSeries(dlogX, self.index, weight=self.weight)
+            return WeightedSeries(dlogX, self.index, weights=self.weights)
         else:
-            return WeightedDataFrame(dlogX, self.index, weight=self.weight)
+            return WeightedDataFrame(dlogX, self.index, weights=self.weights)
 
     def _compute_nlive(self, logL_birth):
         if is_int(logL_birth):

diff --git a/anesthetic/utils.py b/anesthetic/utils.py
@@ -5,6 +5,7 @@
 from scipy.interpolate import interp1d
 from scipy.stats import kstwobign
 from matplotlib.tri import Triangulation
+import contextlib
 
 
 def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False):
@@ -464,3 +465,14 @@ def insertion_p_value(indexes, nlive, batch=0):
         if ks_result["p-value"] == 0.:
             ks_result["p-value"] = p * n
         return ks_result
+
+
+@contextlib.contextmanager
+def temporary_seed(seed):
+    """Context for temporarily setting a numpy seed."""
+    state = np.random.get_state()
+    np.random.seed(seed)
+    try:
+        yield
+    finally:
+        np.random.set_state(state)
diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
@@ -2,31 +2,31 @@
 
 import numpy as np
 import pandas
-from warnings import warn
-from anesthetic.utils import compress_weights, channel_capacity, quantile
+from anesthetic.utils import (compress_weights, channel_capacity, quantile,
+                              temporary_seed)
 
 
 class _WeightedObject(object):
     @property
-    def weight(self):
+    def weights(self):
         """Sample weights."""
-        if self._weight is None:
+        if self.index.nlevels == 1:
             return pandas.Series(index=self.index, data=1.)
         else:
-            return self._weight[self.index]
+            return self.index.get_level_values('weights').to_numpy()
+
+    @weights.setter
+    def weights(self, weights):
+        if weights is not None:
+            self.index = [self.index.get_level_values(0), weights]
+            self.index.set_names(['#', 'weights'], inplace=True)
 
     @property
     def _rand(self):
         """Random number for consistent compression."""
-        return self._rand_[self.index]
-
-    def _construct_weights(self, weight):
-        if weight is not None:
-            self._weight = pandas.Series(index=self.index, data=weight)
-        else:
-            self._weight = None
-        rand = np.random.rand(len(self))
-        self._rand_ = pandas.Series(index=self.index, data=rand)
+        seed = pandas.util.hash_pandas_object(self.index).sum() % 2**32
+        with temporary_seed(seed):
+            return np.random.rand(len(self))
 
     def std(self):
         """Weighted standard deviation of the sampled distribution."""
@@ -38,39 +38,35 @@ def median(self):
 
     def neff(self):
         """Effective number of samples."""
-        return channel_capacity(self.weight)
+        return channel_capacity(self.weights)
 
 
 class WeightedSeries(_WeightedObject, pandas.Series):
     """Weighted version of pandas.Series."""
 
     def __init__(self, *args, **kwargs):
-        if 'w' in kwargs:
-            warn("'w' as a kwarg will be deprecated in the future. "
-                 "Please use 'weight'", FutureWarning)
-        weight = kwargs.pop('w', None)
-        weight = kwargs.pop('weight', weight)
+        weights = kwargs.pop('weights', None)
         super(WeightedSeries, self).__init__(*args, **kwargs)
-        self._construct_weights(weight)
+        self.weights = weights
 
     def mean(self):
         """Weighted mean of the sampled distribution."""
-        nonzero = self.weight != 0
-        return np.average(self[nonzero], weights=self.weight[nonzero])
+        nonzero = self.weights != 0
+        return np.average(self[nonzero], weights=self.weights[nonzero])
 
     def var(self):
         """Weighted variance of the sampled distribution."""
-        nonzero = self.weight != 0
+        nonzero = self.weights != 0
         return np.average((self[nonzero]-self.mean())**2,
-                          weights=self.weight[nonzero])
+                          weights=self.weights[nonzero])
 
     def quantile(self, q=0.5):
         """Weighted quantile of the sampled distribution."""
-        return quantile(self.values, q, self.weight.values)
+        return quantile(self.values, q, self.weights)
 
     def hist(self, *args, **kwargs):
         """Weighted histogram of the sampled distribution."""
-        return super(WeightedSeries, self).hist(weights=self.weight,
+        return super(WeightedSeries, self).hist(weights=self.weights,
                                                 *args, **kwargs)
 
     def compress(self, nsamples=None):
@@ -84,53 +80,43 @@ def compress(self, nsamples=None):
             compression). If <=0, then compress so that all weights are unity.
 
         """
-        i = compress_weights(self.weight, self._rand, nsamples)
+        i = compress_weights(self.weights, self._rand, nsamples)
         return self.repeat(i)
 
-    _metadata = ['_weight', '_rand_']
-
     @property
     def _constructor(self):
         return WeightedSeries
 
     @property
     def _constructor_expanddim(self):
-        def __constructor_expanddim(*args, **kwargs):
-            frame = WeightedDataFrame(*args, weight=self._weight, **kwargs)
-            frame._rand_ = self._rand_
-            return frame
-        return __constructor_expanddim
+        return WeightedDataFrame
 
 
 class WeightedDataFrame(_WeightedObject, pandas.DataFrame):
     """Weighted version of pandas.DataFrame."""
 
     def __init__(self, *args, **kwargs):
-        if 'w' in kwargs:
-            warn("'w' as a kwarg will be deprecated in the future. "
-                 "Please use 'weight'", FutureWarning)
-        weight = kwargs.pop('w', None)
-        weight = kwargs.pop('weight', weight)
+        weights = kwargs.pop('weights', None)
         super(WeightedDataFrame, self).__init__(*args, **kwargs)
-        self._construct_weights(weight)
+        self.weights = weights
 
     def mean(self):
         """Weighted mean of the sampled distribution."""
-        nonzero = self.weight != 0
-        mean = np.average(self[nonzero], weights=self.weight[nonzero], axis=0)
+        nonzero = self.weights != 0
+        mean = np.average(self[nonzero], weights=self.weights[nonzero], axis=0)
         return pandas.Series(mean, index=self.columns)
 
     def var(self):
         """Weighted variance of the sampled distribution."""
-        nonzero = self.weight != 0
+        nonzero = self.weights != 0
         var = np.average((self[nonzero]-self.mean())**2,
-                         weights=self.weight[nonzero], axis=0)
+                         weights=self.weights[nonzero], axis=0)
         return pandas.Series(var, index=self.columns)
 
     def cov(self):
         """Weighted covariance of the sampled distribution."""
-        nonzero = self.weight != 0
-        cov = np.cov(self[nonzero].T, aweights=self.weight[nonzero])
+        nonzero = self.weights != 0
+        cov = np.cov(self[nonzero].T, aweights=self.weights[nonzero])
         return pandas.DataFrame(cov, index=self.columns, columns=self.columns)
 
     def quantile(self, q=0.5):
@@ -143,7 +129,7 @@ def quantile(self, q=0.5):
 
     def hist(self, *args, **kwargs):
         """Weighted histogram of the sampled distribution."""
-        return super(WeightedDataFrame, self).hist(weights=self.weight,
+        return super(WeightedDataFrame, self).hist(weights=self.weights,
                                                    *args, **kwargs)
 
     def compress(self, nsamples=None):
@@ -157,24 +143,16 @@ def compress(self, nsamples=None):
             compression). If <=0, then compress so that all weights are unity.
 
         """
-        i = compress_weights(self.weight, self._rand, nsamples)
+        i = compress_weights(self.weights, self._rand, nsamples)
         data = np.repeat(self.values, i, axis=0)
-        index = np.repeat(self.index.values, i)
+        index = self.index.repeat(i)
         df = pandas.DataFrame(data=data, index=index, columns=self.columns)
-        if 'weight' in self:
-            return df.drop(columns='weight')
-        else:
-            return df
-
-    _metadata = ['_weight', '_rand_']
+        df.index = df.index.get_level_values('#')
+        return df
 
     @property
     def _constructor_sliced(self):
-        def __constructor_sliced(*args, **kwargs):
-            series = WeightedSeries(*args, weight=self._weight, **kwargs)
-            series._rand_ = self._rand_
-            return series
-        return __constructor_sliced
+        return WeightedSeries
 
     @property
     def _constructor(self):

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
 scipy>=1.2.0
 numpy
-pandas<=1.0.5
+pandas
 matplotlib>=3.1.2