diff --git a/CHANGELOG.md b/CHANGELOG.md index 97dded7a7..06f98bd8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 3.5.x + +- Detect and fix incomplete last lines when resuming or minimizing from existing runs (#378) +- Added functions module and refactored some numerical functions into it + ## 3.5.4 - Allow classes to have both yaml and class attributes as long as no duplicate keys diff --git a/cobaya/collection.py b/cobaya/collection.py index 8da770d6c..e0c5d11ca 100644 --- a/cobaya/collection.py +++ b/cobaya/collection.py @@ -96,7 +96,7 @@ def compute_temperature(logpost, logprior, loglike, check=True, extra_tolerance= """ Returns the temperature of a sample. - If ``check=True`` and the log-probabilites passed are arrays, checks consistency + If ``check=True`` and the log-probabilities passed are arrays, checks consistency of the sample temperature, and raises ``AssertionError`` if inconsistent. """ temp = (logprior + loglike) / logpost @@ -286,8 +286,7 @@ def __init__(self, model, output=None, cache_size=_default_cache_size, name=None self.reset() # If loaded, check sample weights, consistent logp sums, # and temperature (ignores the given one) - samples_loaded = len(self) > 0 - if samples_loaded: + if len(self) > 0: try: try: self.temperature = self._check_logps(extra_tolerance=False) @@ -499,8 +498,9 @@ def _check_logps(self, temperature_only=False, extra_tolerance=False): check=True, extra_tolerance=extra_tolerance) except AssertionError as excpt: raise LoggedError( - self.log, "The sample seems to have an inconsistent temperature.") \ - from excpt + self.log, "The sample seems to have an inconsistent temperature. " + "This could be due to input file truncation on the last line " + "due to crash/being killed before complete.") from excpt if not temperature_only: tols = { "rtol": 1e-4 * (10 if extra_tolerance else 1), diff --git a/cobaya/functions.py b/cobaya/functions.py index ea97837d5..791478d8e 100644 --- a/cobaya/functions.py +++ b/cobaya/functions.py @@ -1,5 +1,6 @@ import numpy as np import logging +import scipy try: import numba @@ -13,9 +14,6 @@ random_SO_N = special_ortho_group.rvs - _fast_chi_squared = None - _sym_chi_squared = None - else: import warnings @@ -65,25 +63,29 @@ def _rvs(dim, xx, H): H[:, :] = (D * H.T).T - @numba.njit(parallel=True) - def _fast_chi_squared(c_inv, delta): - """ - Calculate chi-squared from inverse matrix and delta vector, - using symmetry. Note parallel is slower for small sizes. - - """ - n = delta.shape[0] - chi2 = 0.0 - - for j in numba.prange(n): - z_temp = np.dot(c_inv[j, j + 1:], delta[j + 1:]) - chi2 += (2 * z_temp + c_inv[j, j] * delta[j]) * delta[j] - - return chi2 - - def chi_squared(c_inv, delta): - if len(delta) < 1500 or not _fast_chi_squared: + """ + Compute chi squared, i.e. delta.T @ c_inv @ delta + + :param c_inv: symmetric positive definite inverse covariance matrix + :param delta: 1D array + :return: delta.T @ c_inv @ delta + """ + if len(delta) < 1500: return c_inv.dot(delta).dot(delta) else: - return _fast_chi_squared(c_inv, delta) + # use symmetry + return scipy.linalg.blas.dsymv(alpha=1.0, + a=c_inv if np.isfortran(c_inv) else c_inv.T, + x=delta, lower=0).dot(delta) + + +def inverse_cholesky(cov): + """ + Get inverse of Cholesky decomposition + + :param cov: symmetric positive definite matrix + :return: L^{-1} where cov = L L^T + """ + cholesky = np.linalg.cholesky(cov) + return scipy.linalg.lapack.dtrtri(cholesky, lower=True)[0] diff --git a/cobaya/likelihoods/base_classes/DataSetLikelihood.py b/cobaya/likelihoods/base_classes/DataSetLikelihood.py index cac4db05b..86ca331c8 100644 --- a/cobaya/likelihoods/base_classes/DataSetLikelihood.py +++ b/cobaya/likelihoods/base_classes/DataSetLikelihood.py @@ -14,8 +14,6 @@ from cobaya.typing import InfoDict from cobaya.conventions import packages_path_input from .InstallableLikelihood import InstallableLikelihood -# import _fast_chi_square for backwards compatibility -from .InstallableLikelihood import ChiSquaredFunctionLoader as _fast_chi_square # noqa class DataSetLikelihood(InstallableLikelihood): diff --git a/cobaya/likelihoods/base_classes/InstallableLikelihood.py b/cobaya/likelihoods/base_classes/InstallableLikelihood.py index 6834e1016..42e3a7af4 100644 --- a/cobaya/likelihoods/base_classes/InstallableLikelihood.py +++ b/cobaya/likelihoods/base_classes/InstallableLikelihood.py @@ -17,28 +17,7 @@ from cobaya.install import _version_filename from cobaya.component import ComponentNotInstalledError from cobaya.tools import VersionCheckError, resolve_packages_path - - -class ChiSquaredFunctionLoader: - - def __init__(self, method_name='_fast_chi_squared'): - """ - On first use will replace method_name with direct reference to chi2 function - :param method_name: name of the property that chi_squared() is assigned to - """ - self._method_name = method_name - - def __get__(self, instance, owner): - # delay testing active camb until run time - from cobaya.functions import numba, chi_squared - camb_chi_squared = None - if numba is None: - try: - from camb.mathutils import chi_squared as camb_chi_squared - except ImportError: - pass - setattr(instance, self._method_name, camb_chi_squared or chi_squared) - return chi_squared +from cobaya.functions import chi_squared class InstallableLikelihood(Likelihood): @@ -50,7 +29,7 @@ class InstallableLikelihood(Likelihood): # fast convenience function, to get chi-squared (exploiting symmetry); can call # self._fast_chi_squared(cov_inv, delta) - _fast_chi_squared = ChiSquaredFunctionLoader() + _fast_chi_squared = staticmethod(chi_squared) def __init__(self, *args, **kwargs): # Ensure check for install and version errors diff --git a/cobaya/likelihoods/base_classes/__init__.py b/cobaya/likelihoods/base_classes/__init__.py index 6955b94fd..dec992bf6 100644 --- a/cobaya/likelihoods/base_classes/__init__.py +++ b/cobaya/likelihoods/base_classes/__init__.py @@ -1,6 +1,6 @@ from .InstallableLikelihood import InstallableLikelihood from .bao import BAO -from .DataSetLikelihood import DataSetLikelihood, _fast_chi_square +from .DataSetLikelihood import DataSetLikelihood from .cmblikes import CMBlikes, make_forecast_cmb_dataset from .des import DES from .H0 import H0 diff --git a/cobaya/likelihoods/gaussian_mixture/gaussian_mixture.py b/cobaya/likelihoods/gaussian_mixture/gaussian_mixture.py index 385300bf1..a4ab4457f 100644 --- a/cobaya/likelihoods/gaussian_mixture/gaussian_mixture.py +++ b/cobaya/likelihoods/gaussian_mixture/gaussian_mixture.py @@ -16,6 +16,7 @@ from cobaya.log import LoggedError from cobaya.mpi import share_mpi, is_main_process from cobaya.typing import InputDict, Union, Sequence +from cobaya.functions import inverse_cholesky derived_suffix = "_derived" @@ -111,7 +112,7 @@ def initialize_with_params(self): self.weights = 1 / len(self.gaussians) # Prepare the transformation(s) for the derived parameters - self.inv_choleskyL = [np.linalg.inv(np.linalg.cholesky(cov)) for cov in self.covs] + self.inv_choleskyL = [inverse_cholesky(cov) for cov in self.covs] def logp(self, **params_values): """ diff --git a/cobaya/samplers/mcmc/mcmc.py b/cobaya/samplers/mcmc/mcmc.py index 00b8ecca8..7f7d51c79 100644 --- a/cobaya/samplers/mcmc/mcmc.py +++ b/cobaya/samplers/mcmc/mcmc.py @@ -24,6 +24,7 @@ from cobaya.samplers.mcmc.proposal import BlockedProposer from cobaya.log import LoggedError, always_stop_exceptions from cobaya.tools import get_external_function, NumberWithUnits, load_DataFrame +from cobaya.functions import inverse_cholesky from cobaya.yaml import yaml_dump_file from cobaya.model import LogPosterior from cobaya import mpi @@ -724,7 +725,7 @@ def check_convergence_and_learn_proposal(self): converged_means = False # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L try: - L = np.linalg.cholesky(norm_mean_of_covs) + Linv = inverse_cholesky(norm_mean_of_covs) except np.linalg.LinAlgError: self.log.warning( "Negative covariance eigenvectors. " @@ -732,7 +733,6 @@ def check_convergence_and_learn_proposal(self): "contain enough information at this point. " "Skipping learning a new covmat for now.") else: - Linv = np.linalg.inv(L) try: eigvals = np.linalg.eigvalsh(Linv.dot(corr_of_means).dot(Linv.T)) success_means = True diff --git a/cobaya/samplers/mcmc/proposal.py b/cobaya/samplers/mcmc/proposal.py index 093597e0f..715ff2052 100644 --- a/cobaya/samplers/mcmc/proposal.py +++ b/cobaya/samplers/mcmc/proposal.py @@ -20,7 +20,7 @@ import numpy as np from cobaya.log import LoggedError, HasLogger -from cobaya.tools import choleskyL +from cobaya.tools import choleskyL_corr from cobaya.functions import random_SO_N @@ -217,7 +217,7 @@ def set_covariance(self, propose_matrix): "symmetric square matrix.") self.propose_matrix = propose_matrix.copy() propose_matrix_j_sorted = self.propose_matrix[np.ix_(self.i_of_j, self.i_of_j)] - sigmas_diag, L = choleskyL(propose_matrix_j_sorted, return_scale_free=True) + sigmas_diag, L = choleskyL_corr(propose_matrix_j_sorted) # Store the basis as transformation matrices self.transform = [] for j_start, bp in zip(self.j_start, self.proposer): diff --git a/cobaya/tools.py b/cobaya/tools.py index 9cb21ead7..7858a1091 100644 --- a/cobaya/tools.py +++ b/cobaya/tools.py @@ -457,6 +457,19 @@ def read_dnumber(n: Any, dim: int): return NumberWithUnits(n, "d", dtype=int, scale=dim).value +def truncate_to_end_line(file_name): + with open(file_name, "r+b") as inp: + # Find the last complete line + inp.seek(0, 2) # Go to the end of the file + pos = inp.tell() - 1 + while pos > 0 and inp.read(1) != b"\n": + pos -= 1 + inp.seek(pos, 0) + if pos > 0: + inp.seek(pos + 1, 0) + inp.truncate() + + def load_DataFrame(file_name, skip=0, root_file_name=None): """ Loads a `pandas.DataFrame` from a text file @@ -493,7 +506,17 @@ def load_DataFrame(file_name, skip=0, root_file_name=None): inp, sep=" ", header=None, names=cols, comment="#", skipinitialspace=True, skiprows=skip, index_col=False) - return data + if not data.empty: + # Check if the last row contains any NaNs + if data.iloc[-1].isna().any(): + log.warning("Last row of %s is incomplete or contains NaNs", file_name) + # If the second-to-last row exists and doesn't contain NaNs, + # delete the last row assuming this was due to crash on write + if len(data) > 1 and not data.iloc[-2].isna().any(): + data = data.iloc[:-1] + log.info(f"Saving {file_name} deleting last (in)complete line") + truncate_to_end_line(file_name) + return data def prepare_comment(comment): @@ -610,14 +633,14 @@ def fast_logpdf(x): def _KL_norm(m1, S1, m2, S2): - """Performs the Guassian KL computation, without input testing.""" + """Performs the Gaussian KL computation, without input testing.""" dim = S1.shape[0] S2inv = np.linalg.inv(S2) return 0.5 * (np.trace(S2inv.dot(S1)) + (m1 - m2).dot(S2inv).dot(m1 - m2) - - dim + np.log(np.linalg.det(S2) / np.linalg.det(S1))) + dim + np.linalg.slogdet(S2)[1] - np.linalg.slogdet(S1)[1]) -def KL_norm(m1=None, S1=np.array([]), m2=None, S2=np.array([]), symmetric=False): +def KL_norm(m1=None, S1=(), m2=None, S2=(), symmetric=False): """Kullback-Leibler divergence between 2 gaussians.""" S1, S2 = [np.atleast_2d(S) for S in [S1, S2]] assert S1.shape[0], "Must give at least S1" @@ -634,37 +657,34 @@ def KL_norm(m1=None, S1=np.array([]), m2=None, S2=np.array([]), symmetric=False) return _KL_norm(m1, S1, m2, S2) -def choleskyL(M, return_scale_free=False): +def choleskyL_corr(M): r""" Gets the Cholesky lower triangular matrix :math:`L` (defined as :math:`M=LL^T`) - of a given matrix ``M``. + for the matrix ``M``, in the form :math:`L = S L^\prime` where S is diagonal. Can be used to create an affine transformation that decorrelates a sample - :math:`x=\{x_i\}` as :math:`y=Lx`, where :math:`L` is extracted from the - covariance of the sample. + :math:`x=\{x_i\}` with covariance M, as :math:`x=Ly`, + where :math:`L` is extracted from M and y has identity covariance. - If ``return_scale_free=True`` (default: ``False``), returns a tuple of - a matrix :math:`S` containing the square roots of the diagonal of the input matrix - (the standard deviations, if a covariance is given), and the scale-free - :math:`L^\prime=S^{-1}L`. + Returns a tuple of a matrix :math:`S` containing the square roots of the diagonal + of the input matrix (the standard deviations, if a covariance is given), + and the scale-free :math:`L^\prime=S^{-1}L`. + (could just use Cholesky directly for proposal) """ std_diag, corr = cov_to_std_and_corr(M) - Lprime = np.linalg.cholesky(corr) - if return_scale_free: - return std_diag, Lprime - else: - return np.linalg.inv(std_diag).dot(Lprime) + return np.diag(std_diag), np.linalg.cholesky(corr) def cov_to_std_and_corr(cov): """ - Gets the standard deviations (as a diagonal matrix) + Gets the standard deviations (as a 1D array and the correlation matrix of a covariance matrix. """ - std_diag = np.diag(np.sqrt(np.diag(cov))) - invstd_diag = np.linalg.inv(std_diag) - corr = invstd_diag.dot(cov).dot(invstd_diag) - return std_diag, corr + std = np.sqrt(np.diag(cov)) + inv_std = 1 / std + corr = inv_std[:, np.newaxis] * cov * inv_std[np.newaxis, :] + np.fill_diagonal(corr, 1.0) + return std, corr def are_different_params_lists(list_A, list_B, name_A="A", name_B="B"):