Skip to content

Commit

Permalink
Pandas 1.1 update (#115)
Browse files Browse the repository at this point in the history
* wakey wakey github tests

* tests showing the issue lies with the weights of NestedSamples objects

* Tests pass locally

* Added credit to Lukas Hergt

* Updated badge for travis readme org -> com

* Changed samples.weight to be a numpy array

* Adjusted version number to be 2.0.0-beta

* Updated level_values(1) to weight

* Updated hashing function to be more intuitive

* Converted weight -> weights

* Removed defunct array_to_seed code

* remove FutureWarning for types alongside old types interface, adjust tests accordingly

* use index.repeat instead of np.repeat to keep MultiIndex structure

* add tests for WeightedDataFrame.hist and WeightedSeries.hist

* Closed plots for weighted series

Co-authored-by: Will Handley <[email protected]>
  • Loading branch information
Lukas Hergt and williamjameshandley authored Jul 31, 2020
1 parent 2351380 commit c1202ce
Show file tree
Hide file tree
Showing 10 changed files with 197 additions and 206 deletions.
8 changes: 4 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
anesthetic: nested sampling visualisation
=========================================
:anesthetic: nested sampling visualisation
:Author: Will Handley
:Version: 1.3.6
:Author: Will Handley and Lukas Hergt
:Version: 2.0.0-beta.1
:Homepage: https://github.com/williamjameshandley/anesthetic
:Documentation: http://anesthetic.readthedocs.io/

.. image:: https://travis-ci.org/williamjameshandley/anesthetic.svg?branch=master
:target: https://travis-ci.org/williamjameshandley/anesthetic
.. image:: https://travis-ci.com/williamjameshandley/anesthetic.svg?branch=master
:target: https://travis-ci.com/williamjameshandley/anesthetic
:alt: Build Status
.. image:: https://circleci.com/gh/williamjameshandley/anesthetic.svg?style=svg
:target: https://circleci.com/gh/williamjameshandley/anesthetic
Expand Down
2 changes: 1 addition & 1 deletion anesthetic/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def to_getdist(nested_samples):
"""
import getdist
samples = nested_samples.values
weights = nested_samples.weight.values
weights = nested_samples.weights
loglikes = -2*nested_samples.logL.values
names = nested_samples.columns
ranges = {name: nested_samples._limits(name) for name in names}
Expand Down
57 changes: 15 additions & 42 deletions anesthetic/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class MCMCSamples(WeightedDataFrame):
columns: list(str)
reference names of parameters
weight: np.array
weights: np.array
weights of samples.
logL: np.array
Expand Down Expand Up @@ -78,12 +78,12 @@ def __init__(self, *args, **kwargs):
"MCMCSamples and more. MCMCSamples should be "
"used for MCMC chains only." % root)
burn_in = kwargs.pop('burn_in', False)
weight, logL, samples = reader.samples(burn_in=burn_in)
weights, logL, samples = reader.samples(burn_in=burn_in)
params, tex = reader.paramnames()
columns = kwargs.pop('columns', params)
limits = reader.limits()
kwargs['label'] = kwargs.get('label', os.path.basename(root))
self.__init__(data=samples, columns=columns, weight=weight,
self.__init__(data=samples, columns=columns, weights=weights,
logL=logL, tex=tex, limits=limits, *args, **kwargs)
self.root = root
else:
Expand All @@ -101,10 +101,6 @@ def __init__(self, *args, **kwargs):
self['logL'] = logL
self.tex['logL'] = r'$\log\mathcal{L}$'

if self._weight is not None:
self['weight'] = self.weight
self.tex['weight'] = r'MCMC weight'

self._set_automatic_limits()

def _set_automatic_limits(self):
Expand Down Expand Up @@ -174,15 +170,15 @@ def plot(self, ax, paramname_x, paramname_y=None, *args, **kwargs):
if ncompress is None:
ncompress = 1000
return kde_plot_1d(ax, self[paramname_x],
weights=self.weight,
weights=self.weights,
ncompress=ncompress,
*args, **kwargs)
elif plot_type == 'fastkde':
x = self[paramname_x].compress(ncompress)
return fastkde_plot_1d(ax, x, *args, **kwargs)
elif plot_type == 'hist':
return hist_plot_1d(ax, self[paramname_x],
weights=self.weight,
weights=self.weights,
*args, **kwargs)
elif plot_type == 'astropyhist':
x = self[paramname_x].compress(ncompress)
Expand Down Expand Up @@ -210,7 +206,7 @@ def plot(self, ax, paramname_x, paramname_y=None, *args, **kwargs):
ncompress = 1000
x = self[paramname_x]
y = self[paramname_y]
return kde_contour_plot_2d(ax, x, y, weights=self.weight,
return kde_contour_plot_2d(ax, x, y, weights=self.weights,
ncompress=ncompress,
*args, **kwargs)
elif plot_type == 'fastkde':
Expand All @@ -227,7 +223,7 @@ def plot(self, ax, paramname_x, paramname_y=None, *args, **kwargs):
elif plot_type == 'hist':
x = self[paramname_x]
y = self[paramname_y]
return hist_plot_2d(ax, x, y, weights=self.weight,
return hist_plot_2d(ax, x, y, weights=self.weights,
*args, **kwargs)
else:
raise NotImplementedError("plot_type is '%s', but must be"
Expand Down Expand Up @@ -320,27 +316,6 @@ def plot_2d(self, axes, *args, **kwargs):
"""
default_types = {'diagonal': 'kde', 'lower': 'kde', 'upper': 'scatter'}
types = kwargs.pop('types', default_types)
diagonal = kwargs.pop('diagonal', True)
if isinstance(types, list) or isinstance(types, str):
from warnings import warn
warn("MCMCSamples.plot_2d's argument 'types' might stop accepting "
"str or list(str) as input in the future. It takes a "
"dictionary as input, now, with keys 'diagonal' for the 1D "
"plots and 'lower' and 'upper' for the 2D plots. 'diagonal' "
"accepts the values 'kde' or 'hist' and both 'lower' and "
"'upper' accept the values 'kde' or 'scatter'. "
"Default: {'diagonal': 'kde', 'lower': 'kde'}.",
FutureWarning)

if isinstance(types, str):
types = {'lower': types}
if diagonal:
types['diagonal'] = types['lower']
elif isinstance(types, list):
types = {'lower': types[0], 'upper': types[-1]}
if diagonal:
types['diagonal'] = types['lower']

local_kwargs = {pos: kwargs.pop('%s_kwargs' % pos, {})
for pos in default_types}

Expand Down Expand Up @@ -471,11 +446,7 @@ def beta(self, beta):
self._beta = beta
logw = self.dlogX() + np.where(self.logL == -np.inf, -np.inf,
self.beta * self.logL)
self._weight = np.exp(logw - logw.max())

if self._weight is not None:
self['weight'] = self.weight
self.tex['weight'] = r'MCMC weight'
self.weights = np.exp(logw - logw.max())

def set_beta(self, beta, inplace=False):
"""Change the inverse temperature.
Expand Down Expand Up @@ -593,9 +564,11 @@ def live_points(self, logL=None):
"""
if logL is None:
logL = self.logL_birth.max()
elif is_int(logL):
logL = self.logL[logL]

else:
try:
logL = float(self.logL[logL])
except KeyError:
pass
return self[(self.logL > logL) & (self.logL_birth <= logL)]

def posterior_points(self, beta=1):
Expand Down Expand Up @@ -635,9 +608,9 @@ def dlogX(self, nsamples=None):

if nsamples is None:
dlogX = np.squeeze(dlogX)
return WeightedSeries(dlogX, self.index, weight=self.weight)
return WeightedSeries(dlogX, self.index, weights=self.weights)
else:
return WeightedDataFrame(dlogX, self.index, weight=self.weight)
return WeightedDataFrame(dlogX, self.index, weights=self.weights)

def _compute_nlive(self, logL_birth):
if is_int(logL_birth):
Expand Down
12 changes: 12 additions & 0 deletions anesthetic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from scipy.interpolate import interp1d
from scipy.stats import kstwobign
from matplotlib.tri import Triangulation
import contextlib


def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False):
Expand Down Expand Up @@ -464,3 +465,14 @@ def insertion_p_value(indexes, nlive, batch=0):
if ks_result["p-value"] == 0.:
ks_result["p-value"] = p * n
return ks_result


@contextlib.contextmanager
def temporary_seed(seed):
"""Context for temporarily setting a numpy seed."""
state = np.random.get_state()
np.random.seed(seed)
try:
yield
finally:
np.random.set_state(state)
100 changes: 39 additions & 61 deletions anesthetic/weighted_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,31 @@

import numpy as np
import pandas
from warnings import warn
from anesthetic.utils import compress_weights, channel_capacity, quantile
from anesthetic.utils import (compress_weights, channel_capacity, quantile,
temporary_seed)


class _WeightedObject(object):
@property
def weight(self):
def weights(self):
"""Sample weights."""
if self._weight is None:
if self.index.nlevels == 1:
return pandas.Series(index=self.index, data=1.)
else:
return self._weight[self.index]
return self.index.get_level_values('weights').to_numpy()

@weights.setter
def weights(self, weights):
if weights is not None:
self.index = [self.index.get_level_values(0), weights]
self.index.set_names(['#', 'weights'], inplace=True)

@property
def _rand(self):
"""Random number for consistent compression."""
return self._rand_[self.index]

def _construct_weights(self, weight):
if weight is not None:
self._weight = pandas.Series(index=self.index, data=weight)
else:
self._weight = None
rand = np.random.rand(len(self))
self._rand_ = pandas.Series(index=self.index, data=rand)
seed = pandas.util.hash_pandas_object(self.index).sum() % 2**32
with temporary_seed(seed):
return np.random.rand(len(self))

def std(self):
"""Weighted standard deviation of the sampled distribution."""
Expand All @@ -38,39 +38,35 @@ def median(self):

def neff(self):
"""Effective number of samples."""
return channel_capacity(self.weight)
return channel_capacity(self.weights)


class WeightedSeries(_WeightedObject, pandas.Series):
"""Weighted version of pandas.Series."""

def __init__(self, *args, **kwargs):
if 'w' in kwargs:
warn("'w' as a kwarg will be deprecated in the future. "
"Please use 'weight'", FutureWarning)
weight = kwargs.pop('w', None)
weight = kwargs.pop('weight', weight)
weights = kwargs.pop('weights', None)
super(WeightedSeries, self).__init__(*args, **kwargs)
self._construct_weights(weight)
self.weights = weights

def mean(self):
"""Weighted mean of the sampled distribution."""
nonzero = self.weight != 0
return np.average(self[nonzero], weights=self.weight[nonzero])
nonzero = self.weights != 0
return np.average(self[nonzero], weights=self.weights[nonzero])

def var(self):
"""Weighted variance of the sampled distribution."""
nonzero = self.weight != 0
nonzero = self.weights != 0
return np.average((self[nonzero]-self.mean())**2,
weights=self.weight[nonzero])
weights=self.weights[nonzero])

def quantile(self, q=0.5):
"""Weighted quantile of the sampled distribution."""
return quantile(self.values, q, self.weight.values)
return quantile(self.values, q, self.weights)

def hist(self, *args, **kwargs):
"""Weighted histogram of the sampled distribution."""
return super(WeightedSeries, self).hist(weights=self.weight,
return super(WeightedSeries, self).hist(weights=self.weights,
*args, **kwargs)

def compress(self, nsamples=None):
Expand All @@ -84,53 +80,43 @@ def compress(self, nsamples=None):
compression). If <=0, then compress so that all weights are unity.
"""
i = compress_weights(self.weight, self._rand, nsamples)
i = compress_weights(self.weights, self._rand, nsamples)
return self.repeat(i)

_metadata = ['_weight', '_rand_']

@property
def _constructor(self):
return WeightedSeries

@property
def _constructor_expanddim(self):
def __constructor_expanddim(*args, **kwargs):
frame = WeightedDataFrame(*args, weight=self._weight, **kwargs)
frame._rand_ = self._rand_
return frame
return __constructor_expanddim
return WeightedDataFrame


class WeightedDataFrame(_WeightedObject, pandas.DataFrame):
"""Weighted version of pandas.DataFrame."""

def __init__(self, *args, **kwargs):
if 'w' in kwargs:
warn("'w' as a kwarg will be deprecated in the future. "
"Please use 'weight'", FutureWarning)
weight = kwargs.pop('w', None)
weight = kwargs.pop('weight', weight)
weights = kwargs.pop('weights', None)
super(WeightedDataFrame, self).__init__(*args, **kwargs)
self._construct_weights(weight)
self.weights = weights

def mean(self):
"""Weighted mean of the sampled distribution."""
nonzero = self.weight != 0
mean = np.average(self[nonzero], weights=self.weight[nonzero], axis=0)
nonzero = self.weights != 0
mean = np.average(self[nonzero], weights=self.weights[nonzero], axis=0)
return pandas.Series(mean, index=self.columns)

def var(self):
"""Weighted variance of the sampled distribution."""
nonzero = self.weight != 0
nonzero = self.weights != 0
var = np.average((self[nonzero]-self.mean())**2,
weights=self.weight[nonzero], axis=0)
weights=self.weights[nonzero], axis=0)
return pandas.Series(var, index=self.columns)

def cov(self):
"""Weighted covariance of the sampled distribution."""
nonzero = self.weight != 0
cov = np.cov(self[nonzero].T, aweights=self.weight[nonzero])
nonzero = self.weights != 0
cov = np.cov(self[nonzero].T, aweights=self.weights[nonzero])
return pandas.DataFrame(cov, index=self.columns, columns=self.columns)

def quantile(self, q=0.5):
Expand All @@ -143,7 +129,7 @@ def quantile(self, q=0.5):

def hist(self, *args, **kwargs):
"""Weighted histogram of the sampled distribution."""
return super(WeightedDataFrame, self).hist(weights=self.weight,
return super(WeightedDataFrame, self).hist(weights=self.weights,
*args, **kwargs)

def compress(self, nsamples=None):
Expand All @@ -157,24 +143,16 @@ def compress(self, nsamples=None):
compression). If <=0, then compress so that all weights are unity.
"""
i = compress_weights(self.weight, self._rand, nsamples)
i = compress_weights(self.weights, self._rand, nsamples)
data = np.repeat(self.values, i, axis=0)
index = np.repeat(self.index.values, i)
index = self.index.repeat(i)
df = pandas.DataFrame(data=data, index=index, columns=self.columns)
if 'weight' in self:
return df.drop(columns='weight')
else:
return df

_metadata = ['_weight', '_rand_']
df.index = df.index.get_level_values('#')
return df

@property
def _constructor_sliced(self):
def __constructor_sliced(*args, **kwargs):
series = WeightedSeries(*args, weight=self._weight, **kwargs)
series._rand_ = self._rand_
return series
return __constructor_sliced
return WeightedSeries

@property
def _constructor(self):
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
scipy>=1.2.0
numpy
pandas<=1.0.5
pandas
matplotlib>=3.1.2
Loading

0 comments on commit c1202ce

Please sign in to comment.