From 92e66ad3041fb38e45192c3ae8f96dee35ab8703 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Fri, 1 Mar 2024 05:14:10 -0800 Subject: [PATCH 01/48] Split `model.py` into subcomponents. - This commit separates the objects defined in `model.py` and places them into dedicated files. We utilize `__init__.py` to maintain the same import syntax. `model.py` is removed. - Tests and linting are passing. - This commit may have interfered with the `autosummary` directive for the documentation, and we will revisit this later. --- .pylintrc | 2 +- pelicun/model.py | 3998 -------------------------------- pelicun/model/__init__.py | 49 + pelicun/model/asset_model.py | 436 ++++ pelicun/model/damage_model.py | 1590 +++++++++++++ pelicun/model/demand_model.py | 851 +++++++ pelicun/model/loss_model.py | 1129 +++++++++ pelicun/model/pelicun_model.py | 227 ++ pelicun/tests/test_model.py | 4 +- 9 files changed, 4285 insertions(+), 4001 deletions(-) delete mode 100644 pelicun/model.py create mode 100644 pelicun/model/__init__.py create mode 100644 pelicun/model/asset_model.py create mode 100644 pelicun/model/damage_model.py create mode 100644 pelicun/model/demand_model.py create mode 100644 pelicun/model/loss_model.py create mode 100644 pelicun/model/pelicun_model.py diff --git a/.pylintrc b/.pylintrc index 75c9aedcf..bc8ac23d4 100644 --- a/.pylintrc +++ b/.pylintrc @@ -152,7 +152,7 @@ notes=FIXME,XXX,TODO [SIMILARITIES] # Minimum lines number of a similarity. -min-similarity-lines=6 +min-similarity-lines=8 # Ignore comments when computing similarities. ignore-comments=yes diff --git a/pelicun/model.py b/pelicun/model.py deleted file mode 100644 index 22655a8e4..000000000 --- a/pelicun/model.py +++ /dev/null @@ -1,3998 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Leland Stanford Junior University -# Copyright (c) 2018 The Regents of the University of California -# -# This file is part of pelicun. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its contributors -# may be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# You should have received a copy of the BSD 3-Clause License along with -# pelicun. If not, see . -# -# Contributors: -# Adam Zsarnóczay -# John Vouvakis Manousakis - -""" -This module has classes and methods that define and access the model used for -loss assessment. - -.. rubric:: Contents - -.. autosummary:: - - prep_constant_median_DV - prep_bounded_multilinear_median_DV - - DemandModel - AssetModel - DamageModel - LossModel - BldgRepairModel - -""" - -from itertools import product -from copy import deepcopy -import numpy as np -import pandas as pd -from . import base -from . import uq -from . import file_io - - -idx = base.idx - - -class PelicunModel: - """ - Generic model class to manage methods shared between all models in Pelicun. - - """ - - def __init__(self, assessment): - - # link the PelicunModel object to its Assessment object - self._asmnt = assessment - - # link logging methods as attributes enabling more - # concise syntax - self.log_msg = self._asmnt.log.msg - self.log_div = self._asmnt.log.div - - def convert_marginal_params(self, marginal_params, units, arg_units=None): - """ - Converts the parameters of marginal distributions in a model to SI units. - - Parameters - ---------- - marginal_params: DataFrame - Each row corresponds to a marginal distribution with Theta - parameters and TruncateLower, TruncateUpper truncation limits - identified in separate columns. - units: Series - Identifies the input units of each marginal. The index shall be - identical to the index of the marginal_params argument. The values - are strings that correspond to the units listed in base.py. - arg_units: Series - Identifies the size of a reference entity for the marginal - parameters. For example, when the parameters refer to a component - repair cost, the reference size is the component block size the - repair cost corresponds to. When the parameters refer to a capacity, - demand, or component quantity, the reference size can be omitted - and the default value will ensure that the corresponding scaling is - skipped. This Series provides the units of the reference entities - for each component. Use '1 EA' if you want to skip such scaling for - select components but provide arg units for others. - - Returns - ------- - marginal_params: DataFrame - Same structure as the input DataFrame but with values scaled to - represent internal Standard International units. - - """ - assert np.all(marginal_params.index == units.index) - if arg_units is not None: - assert np.all( - marginal_params.index == arg_units.index) - - # preserve the columns in the input marginal_params - original_cols = marginal_params.columns - - # add extra columns if they are not available in the marginals - for col_name in ('Family', - 'Theta_0', 'Theta_1', 'Theta_2', - 'TruncateLower', 'TruncateUpper'): - if col_name not in marginal_params.columns: - - marginal_params[col_name] = np.nan - - # get a list of unique units - unique_units = units.unique() - - # for each unit - for unit_name in unique_units: - - # get the scale factor for converting from the source unit - unit_factor = self._asmnt.calc_unit_scale_factor(unit_name) - - # get the variables that use the given unit - unit_ids = marginal_params.loc[units == unit_name].index - - # for each variable - for row_id in unit_ids: - - # pull the parameters of the marginal distribution - family = marginal_params.at[row_id, 'Family'] - - if family == 'empirical': - continue - - # load the theta values - theta = marginal_params.loc[ - row_id, ['Theta_0', 'Theta_1', 'Theta_2']].values - - # for each theta - args = [] - for t_i, theta_i in enumerate(theta): - - # if theta_i evaluates to NaN, it is considered undefined - if pd.isna(theta_i): - args.append([]) - continue - - try: - # if theta is a scalar, just store it - theta[t_i] = float(theta_i) - args.append([]) - - except ValueError: - - # otherwise, we assume it is a string using SimCenter - # array notation to identify coordinates of a - # multilinear function - values = [val.split(',') for val in theta_i.split('|')] - - # the first set of values defines the ordinates that - # need to be passed to the distribution scaling method - theta[t_i] = np.array(values[0], dtype=float) - - # the second set of values defines the abscissae that - # we will use after the distribution scaling - args.append(np.array(values[1], dtype=float)) - - # load the truncation limits - tr_limits = marginal_params.loc[ - row_id, ['TruncateLower', 'TruncateUpper']] - - arg_unit_factor = 1.0 - - # check if there is a need to scale due to argument units - if not (arg_units is None): - - # get the argument unit for the given marginal - arg_unit = arg_units.get(row_id) - - if arg_unit != '1 EA': - - # get the scale factor - arg_unit_factor = self._asmnt.calc_unit_scale_factor( - arg_unit - ) - - # scale arguments, if needed - for a_i, arg in enumerate(args): - - if isinstance(arg, np.ndarray): - args[a_i] = arg * arg_unit_factor - - # convert the distribution parameters to SI - theta, tr_limits = uq.scale_distribution( - unit_factor / arg_unit_factor, family, theta, tr_limits) - - # convert multilinear function parameters back into strings - for a_i, arg in enumerate(args): - - if len(arg) > 0: - - theta[a_i] = '|'.join( - [','.join([f'{val:g}' for val in vals]) - for vals in (theta[a_i], args[a_i])]) - - # and update the values in the DF - marginal_params.loc[ - row_id, ['Theta_0', 'Theta_1', 'Theta_2']] = theta - - marginal_params.loc[ - row_id, ['TruncateLower', 'TruncateUpper']] = tr_limits - - # remove the added columns - marginal_params = marginal_params[original_cols] - - return marginal_params - - -class DemandModel(PelicunModel): - """ - Manages demand information used in assessments. - - Parameters - ---------- - marginal_params: DataFrame - Available after the model has been calibrated or calibration data has - been imported. Defines the marginal distribution of each demand - variable. - correlation: DataFrame - Available after the model has been calibrated or calibration data has - been imported. Defines the correlation between the demand variables in - standard normal space. That is, the variables are sampled in standard - normal space and then transformed into the space of their respective - distributions and the correlation matrix corresponds to the space where - they are sampled. - empirical_data: DataFrame - Available after the model has been calibrated or calibration data has - been imported. It provides an empirical dataset for the demand - variables that are modeled with an empirical distribution. - sample: DataFrame - Available after a sample has been generated. Demand variables are - listed in columns and each row provides an independent realization of - the joint demand distribution. - units: Series - Available after any demand data has been loaded. The index identifies - the demand variables and the values provide the unit for each variable. - - """ - - def __init__(self, assessment): - - super().__init__(assessment) - - self.marginal_params = None - self.correlation = None - self.empirical_data = None - self.units = None - - self._RVs = None - self.sample = None - - def save_sample(self, filepath=None, save_units=False): - """ - Save demand sample to a csv file or return it in a DataFrame - - """ - - self.log_div() - if filepath is not None: - self.log_msg('Saving demand sample...') - - res = file_io.save_to_csv( - self.sample, filepath, units=self.units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - use_simpleindex=(filepath is not None), - log=self._asmnt.log) - - if filepath is not None: - self.log_msg('Demand sample successfully saved.', - prepend_timestamp=False) - return None - - # else: - units = res.loc["Units"] - res.drop("Units", inplace=True) - - if save_units: - return res.astype(float), units - - # else: - return res.astype(float) - - def load_sample(self, filepath): - """ - Load demand sample data and parse it. - - Besides parsing the sample, the method also reads and saves the units - specified for each demand variable. If no units are specified, Standard - Units are assumed. - - Parameters - ---------- - filepath: string or DataFrame - Location of the file with the demand sample. - - """ - - def parse_header(raw_header): - - old_MI = raw_header - - # The first number (event_ID) in the demand labels is optional and - # currently not used. We remove it if it was in the raw data. - if old_MI.nlevels == 4: - - if self._asmnt.log.verbose: - self.log_msg('Removing event_ID from header...', - prepend_timestamp=False) - - new_column_index_array = np.array( - [old_MI.get_level_values(i) for i in range(1, 4)]) - - else: - new_column_index_array = np.array( - [old_MI.get_level_values(i) for i in range(3)]) - - # Remove whitespace to avoid ambiguity - - if self._asmnt.log.verbose: - self.log_msg('Removing whitespace from header...', - prepend_timestamp=False) - - wspace_remove = np.vectorize(lambda name: str(name).replace(' ', '')) - - new_column_index = wspace_remove(new_column_index_array) - - # Creating new, cleaned-up header - - new_MI = pd.MultiIndex.from_arrays( - new_column_index, names=['type', 'loc', 'dir']) - - return new_MI - - self.log_div() - self.log_msg('Loading demand data...') - - demand_data, units = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, - return_units=True, log=self._asmnt.log) - - parsed_data = demand_data.copy() - - # start with cleaning up the header - - parsed_data.columns = parse_header(parsed_data.columns) - - # Remove errors, if needed - if 'ERROR' in parsed_data.columns.get_level_values(0): - - self.log_msg('Removing errors from the raw data...', - prepend_timestamp=False) - - error_list = parsed_data.loc[:, idx['ERROR', :, :]].values.astype(bool) - - parsed_data = parsed_data.loc[~error_list, :].copy() - parsed_data.drop('ERROR', level=0, axis=1, inplace=True) - - self.log_msg("\nBased on the values in the ERROR column, " - f"{np.sum(error_list)} demand samples were removed.\n", - prepend_timestamp=False) - - self.sample = parsed_data - - self.log_msg('Demand data successfully parsed.', prepend_timestamp=False) - - # parse the index for the units - units.index = parse_header(units.index) - - self.units = units - - self.log_msg('Demand units successfully parsed.', prepend_timestamp=False) - - def estimate_RID(self, demands, params, method='FEMA P58'): - """ - Estimate residual drift realizations based on other demands - - Parameters - ---------- - demands: DataFrame - Sample of demands required for the method to estimate the RID values - params: dict - Parameters required for the method to estimate the RID values - method: {'FEMA P58'}, default: 'FEMA P58' - Method to use for the estimation - currently, only one is available. - """ - - if method == 'FEMA P58': - - # method is described in FEMA P-58 Volume 1 Section 5.4 & Appendix C - - # the provided demands shall be PID values at various loc-dir pairs - PID = demands - - # there's only one parameter needed: the yield drift - yield_drift = params['yield_drift'] - - # three subdomains of demands are identified - small = PID < yield_drift - medium = PID < 4 * yield_drift - large = PID >= 4 * yield_drift - - # convert PID to RID in each subdomain - RID = PID.copy() - RID[large] = PID[large] - 3 * yield_drift - RID[medium] = 0.3 * (PID[medium] - yield_drift) - RID[small] = 0. - - # add extra uncertainty to nonzero values - rng = self._asmnt.options.rng - eps = rng.normal(scale=0.2, size=RID.shape) - RID[RID > 0] = np.exp(np.log(RID[RID > 0]) + eps) - - # finally, make sure the RID values are never larger than the PIDs - RID = pd.DataFrame( - np.minimum(PID.values, RID.values), - columns=pd.DataFrame( - 1, index=['RID', ], - columns=PID.columns).stack(level=[0, 1]).index, - index=PID.index) - - else: - RID = None - - # return the generated drift realizations - return RID - - def calibrate_model(self, config): - """ - Calibrate a demand model to describe the raw demand data - - The raw data shall be parsed first to ensure that it follows the - schema expected by this method. The calibration settings define the - characteristics of the multivariate distribution that is fit to the - raw data. - - Parameters - ---------- - config: dict - A dictionary, typically read from a json file, that specifies the - distribution family, truncation and censoring limits, and other - settings for the calibration. - - """ - - def parse_settings(settings, demand_type): - - def parse_str_to_float(in_str, context_string): - - try: - out_float = float(in_str) - - except ValueError: - - self.log_msg(f"WARNING: Could not parse {in_str} provided as " - f"{context_string}. Using NaN instead.", - prepend_timestamp=False) - - out_float = np.nan - - return out_float - - active_d_types = ( - demand_sample.columns.get_level_values('type').unique()) - - if demand_type == 'ALL': - cols = tuple(active_d_types) - - else: - cols_lst = [] - - for d_type in active_d_types: - if d_type.split('_')[0] == demand_type: - cols_lst.append(d_type) - - cols = tuple(cols_lst) - - # load the distribution family - cal_df.loc[idx[cols, :, :], 'Family'] = settings['DistributionFamily'] - - # load limits - for lim in ('CensorLower', 'CensorUpper', - 'TruncateLower', 'TruncateUpper'): - - if lim in settings.keys(): - val = parse_str_to_float(settings[lim], lim) - if not pd.isna(val): - cal_df.loc[idx[cols, :, :], lim] = val - - # scale the censor and truncation limits, if needed - scale_factor = self._asmnt.scale_factor(settings.get('Unit', None)) - - rows_to_scale = ['CensorLower', 'CensorUpper', - 'TruncateLower', 'TruncateUpper'] - cal_df.loc[idx[cols, :, :], rows_to_scale] *= scale_factor - - # load the prescribed additional uncertainty - if 'AddUncertainty' in settings.keys(): - - sig_increase = parse_str_to_float(settings['AddUncertainty'], - 'AddUncertainty') - - # scale the sig value if the target distribution family is normal - if settings['DistributionFamily'] == 'normal': - sig_increase *= scale_factor - - cal_df.loc[idx[cols, :, :], 'SigIncrease'] = sig_increase - - def get_filter_mask(lower_lims, upper_lims): - - demands_of_interest = demand_sample.iloc[:, pd.notna(upper_lims)] - limits_of_interest = upper_lims[pd.notna(upper_lims)] - upper_mask = np.all(demands_of_interest < limits_of_interest, - axis=1) - - demands_of_interest = demand_sample.iloc[:, pd.notna(lower_lims)] - limits_of_interest = lower_lims[pd.notna(lower_lims)] - lower_mask = np.all(demands_of_interest > limits_of_interest, - axis=1) - - return np.all([lower_mask, upper_mask], axis=0) - - self.log_div() - self.log_msg('Calibrating demand model...') - - demand_sample = self.sample - - # initialize a DataFrame that contains calibration information - cal_df = pd.DataFrame( - columns=['Family', - 'CensorLower', 'CensorUpper', - 'TruncateLower', 'TruncateUpper', - 'SigIncrease', 'Theta_0', 'Theta_1'], - index=demand_sample.columns, - dtype=float - ) - - cal_df['Family'] = cal_df['Family'].astype(str) - - # start by assigning the default option ('ALL') to every demand column - parse_settings(config['ALL'], 'ALL') - - # then parse the additional settings and make the necessary adjustments - for demand_type in config.keys(): - if demand_type != 'ALL': - parse_settings(config[demand_type], demand_type) - - if self._asmnt.log.verbose: - self.log_msg( - "\nCalibration settings successfully parsed:\n" + str(cal_df), - prepend_timestamp=False) - else: - self.log_msg( - "\nCalibration settings successfully parsed:\n", - prepend_timestamp=False) - - # save the settings - model_params = cal_df.copy() - - # Remove the samples outside of censoring limits - # Currently, non-empirical demands are assumed to have some level of - # correlation, hence, a censored value in any demand triggers the - # removal of the entire sample from the population. - upper_lims = cal_df.loc[:, 'CensorUpper'].values - lower_lims = cal_df.loc[:, 'CensorLower'].values - - if ~np.all(pd.isna(np.array([upper_lims, lower_lims]))): - - censor_mask = get_filter_mask(lower_lims, upper_lims) - censored_count = np.sum(~censor_mask) - - demand_sample = demand_sample.loc[censor_mask, :] - - self.log_msg("\nBased on the provided censoring limits, " - f"{censored_count} samples were censored.", - prepend_timestamp=False) - else: - censored_count = 0 - - # Check if there is any sample outside of truncation limits - # If yes, that suggests an error either in the samples or the - # configuration. We handle such errors gracefully: the analysis is not - # terminated, but we show an error in the log file. - upper_lims = cal_df.loc[:, 'TruncateUpper'].values - lower_lims = cal_df.loc[:, 'TruncateLower'].values - - if ~np.all(pd.isna(np.array([upper_lims, lower_lims]))): - - truncate_mask = get_filter_mask(lower_lims, upper_lims) - truncated_count = np.sum(~truncate_mask) - - if truncated_count > 0: - - demand_sample = demand_sample.loc[truncate_mask, :] - - self.log_msg("\nBased on the provided truncation limits, " - f"{truncated_count} samples were removed before demand " - "calibration.", - prepend_timestamp=False) - - # Separate and save the demands that are kept empirical -> i.e., no - # fitting. Currently, empirical demands are decoupled from those that - # have a distribution fit to their samples. The correlation between - # empirical and other demands is not preserved in the demand model. - empirical_edps = [] - for edp in cal_df.index: - if cal_df.loc[edp, 'Family'] == 'empirical': - empirical_edps.append(edp) - - self.empirical_data = demand_sample.loc[:, empirical_edps].copy() - - # remove the empirical demands from the samples used for calibration - demand_sample = demand_sample.drop(empirical_edps, axis=1) - - # and the calibration settings - cal_df = cal_df.drop(empirical_edps, axis=0) - - if self._asmnt.log.verbose: - self.log_msg(f"\nDemand data used for calibration:\n{demand_sample}", - prepend_timestamp=False) - - # fit the joint distribution - self.log_msg("\nFitting the prescribed joint demand distribution...", - prepend_timestamp=False) - - demand_theta, demand_rho = uq.fit_distribution_to_sample( - raw_samples=demand_sample.values.T, - distribution=cal_df.loc[:, 'Family'].values, - censored_count=censored_count, - detection_limits=cal_df.loc[ - :, ['CensorLower', 'CensorUpper']].values, - truncation_limits=cal_df.loc[ - :, ['TruncateLower', 'TruncateUpper']].values, - multi_fit=False, - logger_object=self._asmnt.log - ) - # fit the joint distribution - self.log_msg("\nCalibration successful, processing results...", - prepend_timestamp=False) - - # save the calibration results - model_params.loc[cal_df.index, ['Theta_0', 'Theta_1']] = demand_theta - - # increase the variance of the marginal distributions, if needed - if ~np.all(pd.isna(model_params.loc[:, 'SigIncrease'].values)): - - self.log_msg("\nIncreasing demand variance...", - prepend_timestamp=False) - - sig_inc = np.nan_to_num(model_params.loc[:, 'SigIncrease'].values) - sig_0 = model_params.loc[:, 'Theta_1'].values - - model_params.loc[:, 'Theta_1'] = ( - np.sqrt(sig_0 ** 2. + sig_inc ** 2.)) - - # remove unneeded fields from model_params - for col in ('SigIncrease', 'CensorLower', 'CensorUpper'): - model_params = model_params.drop(col, axis=1) - - # reorder the remaining fields for clarity - model_params = model_params[[ - 'Family', 'Theta_0', 'Theta_1', 'TruncateLower', 'TruncateUpper']] - - self.marginal_params = model_params - - self.log_msg("\nCalibrated demand model marginal distributions:\n" - + str(model_params), - prepend_timestamp=False) - - # save the correlation matrix - self.correlation = pd.DataFrame(demand_rho, - columns=cal_df.index, - index=cal_df.index) - - self.log_msg("\nCalibrated demand model correlation matrix:\n" - + str(self.correlation), - prepend_timestamp=False) - - def save_model(self, file_prefix): - """ - Save parameters of the demand model to a set of csv files - - """ - - self.log_div() - self.log_msg('Saving demand model...') - - # save the correlation and empirical data - file_io.save_to_csv(self.correlation, file_prefix + '_correlation.csv') - file_io.save_to_csv( - self.empirical_data, - file_prefix + '_empirical.csv', - units=self.units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - log=self._asmnt.log, - ) - - # the log standard deviations in the marginal parameters need to be - # scaled up before feeding to the saving method where they will be - # scaled back down and end up being saved unscaled to the target file - - marginal_params = self.marginal_params.copy() - - log_rows = marginal_params['Family'] == 'lognormal' - log_demands = marginal_params.loc[log_rows, :] - - for label in log_demands.index: - - if label in self.units.index: - - unit_factor = self._asmnt.calc_unit_scale_factor(self.units[label]) - - marginal_params.loc[label, 'Theta_1'] *= unit_factor - - file_io.save_to_csv( - marginal_params, - file_prefix + '_marginals.csv', - units=self.units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - orientation=1, - log=self._asmnt.log, - ) - - self.log_msg('Demand model successfully saved.', prepend_timestamp=False) - - def load_model(self, data_source): - """ - Load the model that describes demands on the asset. - - Parameters - ---------- - data_source: string or dict - If string, the data_source is a file prefix ( in the - following description) that identifies the following files: - _marginals.csv, _empirical.csv, - _correlation.csv. If dict, the data source is a dictionary - with the following optional keys: 'marginals', 'empirical', and - 'correlation'. The value under each key shall be a DataFrame. - """ - - self.log_div() - self.log_msg('Loading demand model...') - - # prepare the marginal data source variable to load the data - if isinstance(data_source, dict): - marginal_data_source = data_source.get('marginals') - empirical_data_source = data_source.get('empirical', None) - correlation_data_source = data_source.get('correlation', None) - else: - marginal_data_source = data_source + '_marginals.csv' - empirical_data_source = data_source + '_empirical.csv' - correlation_data_source = data_source + '_correlation.csv' - - if empirical_data_source is not None: - self.empirical_data = file_io.load_data( - empirical_data_source, - self._asmnt.unit_conversion_factors, - log=self._asmnt.log, - ) - if not self.empirical_data.empty: - self.empirical_data.columns.set_names( - ['type', 'loc', 'dir'], inplace=True - ) - else: - self.empirical_data = None - else: - self.empirical_data = None - - if correlation_data_source is not None: - self.correlation = file_io.load_data( - correlation_data_source, - self._asmnt.unit_conversion_factors, - reindex=False, log=self._asmnt.log) - self.correlation.index.set_names(['type', 'loc', 'dir'], inplace=True) - self.correlation.columns.set_names(['type', 'loc', 'dir'], inplace=True) - else: - self.correlation = None - - # the log standard deviations in the marginal parameters need to be - # adjusted after getting the data from the loading method where they - # were scaled according to the units of the corresponding variable - - # Note that a data source without marginal information is not valid - marginal_params, units = file_io.load_data( - marginal_data_source, - None, - orientation=1, - reindex=False, - return_units=True, - log=self._asmnt.log, - ) - marginal_params.index.set_names(['type', 'loc', 'dir'], inplace=True) - - marginal_params = self.convert_marginal_params(marginal_params.copy(), - units) - - self.marginal_params = marginal_params - self.units = units - - self.log_msg('Demand model successfully loaded.', prepend_timestamp=False) - - def _create_RVs(self, preserve_order=False): - """ - Create a random variable registry for the joint distribution of demands. - - """ - - # initialize the registry - RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) - - # add a random variable for each demand variable - for rv_params in self.marginal_params.itertuples(): - - edp = rv_params.Index - rv_tag = f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' - family = getattr(rv_params, "Family", np.nan) - - if family == 'empirical': - - if preserve_order: - dist_family = 'coupled_empirical' - else: - dist_family = 'empirical' - - # empirical RVs need the data points - RV_reg.add_RV(uq.RandomVariable( - name=rv_tag, - distribution=dist_family, - raw_samples=self.empirical_data.loc[:, edp].values - )) - - else: - - # all other RVs need parameters of their distributions - RV_reg.add_RV(uq.RandomVariable( - name=rv_tag, - distribution=family, - theta=[getattr(rv_params, f"Theta_{t_i}", np.nan) - for t_i in range(3)], - truncation_limits=[ - getattr(rv_params, f"Truncate{side}", np.nan) - for side in ("Lower", "Upper")], - - - )) - - self.log_msg(f"\n{self.marginal_params.shape[0]} random variables created.", - prepend_timestamp=False) - - # add an RV set to consider the correlation between demands, if needed - if self.correlation is not None: - rv_set_tags = [f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' - for edp in self.correlation.index.values] - - RV_reg.add_RV_set(uq.RandomVariableSet( - 'EDP_set', list(RV_reg.RVs(rv_set_tags).values()), - self.correlation.values)) - - self.log_msg( - f"\nCorrelations between {len(rv_set_tags)} random variables " - "successfully defined.", - prepend_timestamp=False) - - self._RVs = RV_reg - - def clone_demands(self, demand_cloning): - """ - Clones demands. This means copying over columns of the - original demand sample and assigning given names to them. The - columns to be copied over and the names to assign to the - copies are defined as the keys and values of the - `demand_cloning` dictionary, respectively. - The method modifies `sample` inplace. - - Parameters - ---------- - demand_cloning: dict - Keys correspond to the columns of the original sample to - be copied over and the values correspond to the intended - names for the copies. Caution: It's possible to define a - dictionary with duplicate keys, and Python will just keep - the last entry without warning. Users need to be careful - enough to avoid duplicate keys, because we can't validate - them. - E.g.: x = {'1': 1.00, '1': 2.00} results in x={'1': 2.00}. - - Raises - ------ - ValueError - In multiple instances of invalid demand_cloning entries. - - """ - - # it's impossible to have duplicate keys, because - # demand_cloning is a dictionary. - new_columns_list = demand_cloning.values() - # The following prevents duplicate entries in the values - # corresponding to a single cloned demand (1), but - # also the same column being specified as the cloned - # entry of multiple demands (2). - # e.g. - # (1): {'PGV-0-1': ['PGV-1-1', 'PGV-1-1', ...]} - # (2): {'PGV-0-1': ['PGV-1-1', ...], 'PGV-0-2': ['PGV-1-1', ...]} - flat_list = [] - for new_columns in new_columns_list: - flat_list.extend(new_columns) - if len(set(flat_list)) != len(flat_list): - raise ValueError( - 'Duplicate entries in demand cloning ' - 'configuration.' - ) - - # turn the config entries to tuples - def turn_to_tuples(demand_cloning): - demand_cloning_tuples = {} - for key, values in demand_cloning.items(): - demand_cloning_tuples[tuple(key.split('-'))] = [ - tuple(x.split('-')) for x in values - ] - return demand_cloning_tuples - - demand_cloning = turn_to_tuples(demand_cloning) - - # The demand cloning confuguration should not include - # columns that are not present in the orignal sample. - warn_columns = [] - for column in demand_cloning: - if column not in self.sample.columns: - warn_columns.append(column) - if warn_columns: - warn_columns = ['-'.join(x) for x in warn_columns] - self.log_msg( - "\nWARNING: The demand cloning configuration lists " - "columns that are not present in the original demand sample's " - f"columns: {warn_columns}.\n", - prepend_timestamp=False, - ) - - # we iterate over the existing columns of the sample and try - # to locate columns that need to be copied as required by the - # demand cloning configuration. If a column does not need - # to be cloned it is left as is. Otherwise, we keep track - # of its initial index location (in `column_index`) and the - # number of times it needs to be replicated, along with the - # new names of its copies (in `column_values`). - column_index = [] - column_values = [] - for i, column in enumerate(self.sample.columns): - if column not in demand_cloning: - column_index.append(i) - column_values.append(column) - else: - new_column_values = demand_cloning[column] - column_index.extend([i] * len(new_column_values)) - column_values.extend(new_column_values) - # copy the columns - self.sample = self.sample.iloc[:, column_index] - # update the column index - self.sample.columns = pd.MultiIndex.from_tuples(column_values) - - def generate_sample(self, config): - """ - Generates an RV sample with the specified configuration. - """ - - if self.marginal_params is None: - raise ValueError('Model parameters have not been specified. Either' - 'load parameters from a file or calibrate the ' - 'model using raw demand data.') - - self.log_div() - self.log_msg('Generating sample from demand variables...') - - self._create_RVs( - preserve_order=config.get('PreserveRawOrder', False)) - - sample_size = config['SampleSize'] - self._RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) - - # replace the potentially existing raw sample with the generated one - assert self._RVs is not None - assert self._RVs.RV_sample is not None - sample = pd.DataFrame(self._RVs.RV_sample) - sample.sort_index(axis=0, inplace=True) - sample.sort_index(axis=1, inplace=True) - - sample = base.convert_to_MultiIndex(sample, axis=1)['EDP'] - - sample.columns.names = ['type', 'loc', 'dir'] - self.sample = sample - - if config.get('DemandCloning', False): - self.clone_demands(config['DemandCloning']) - - self.log_msg(f"\nSuccessfully generated {sample_size} realizations.", - prepend_timestamp=False) - - -class AssetModel(PelicunModel): - """ - Manages asset information used in assessments. - - Parameters - ---------- - - """ - - def __init__(self, assessment): - - super().__init__(assessment) - - self.cmp_marginal_params = None - self.cmp_units = None - - self._cmp_RVs = None - self._cmp_sample = None - - @property - def cmp_sample(self): - """ - Assigns the _cmp_sample attribute if it is None and returns - the component sample. - """ - - if self._cmp_sample is None: - - cmp_sample = pd.DataFrame(self._cmp_RVs.RV_sample) - cmp_sample.sort_index(axis=0, inplace=True) - cmp_sample.sort_index(axis=1, inplace=True) - - cmp_sample = base.convert_to_MultiIndex(cmp_sample, axis=1)['CMP'] - - cmp_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] - - self._cmp_sample = cmp_sample - - else: - cmp_sample = self._cmp_sample - - return cmp_sample - - def save_cmp_sample(self, filepath=None, save_units=False): - """ - Save component quantity sample to a csv file - - """ - - self.log_div() - if filepath is not None: - self.log_msg('Saving asset components sample...') - - # prepare a units array - sample = self.cmp_sample - - units = pd.Series(name='Units', index=sample.columns, dtype=object) - - for cmp_id, unit_name in self.cmp_units.items(): - units.loc[cmp_id, :] = unit_name - - res = file_io.save_to_csv( - sample, filepath, units=units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - use_simpleindex=(filepath is not None), - log=self._asmnt.log) - - if filepath is not None: - self.log_msg('Asset components sample successfully saved.', - prepend_timestamp=False) - return None - # else: - units = res.loc["Units"] - res.drop("Units", inplace=True) - - if save_units: - return res.astype(float), units - - return res.astype(float) - - def load_cmp_sample(self, filepath): - """ - Load component quantity sample from a csv file - - """ - - self.log_div() - self.log_msg('Loading asset components sample...') - - sample, units = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, - return_units=True, log=self._asmnt.log) - - sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] - - self._cmp_sample = sample - - self.cmp_units = units.groupby(level=0).first() - - self.log_msg('Asset components sample successfully loaded.', - prepend_timestamp=False) - - def load_cmp_model(self, data_source): - """ - Load the model that describes component quantities in the asset. - - Parameters - ---------- - data_source: string or dict - If string, the data_source is a file prefix ( in the - following description) that identifies the following files: - _marginals.csv, _empirical.csv, - _correlation.csv. If dict, the data source is a dictionary - with the following optional keys: 'marginals', 'empirical', and - 'correlation'. The value under each key shall be a DataFrame. - """ - - def get_locations(loc_str): - - try: - res = str(int(loc_str)) - return np.array([res, ]) - - except ValueError as exc: - - stories = self._asmnt.stories - - if "--" in loc_str: - s_low, s_high = loc_str.split('--') - s_low = get_locations(s_low) - s_high = get_locations(s_high) - return np.arange(int(s_low[0]), int(s_high[0]) + 1).astype(str) - - if "," in loc_str: - return np.array(loc_str.split(','), dtype=int).astype(str) - - if loc_str == "all": - return np.arange(1, stories + 1).astype(str) - - if loc_str == "top": - return np.array([stories, ]).astype(str) - - if loc_str == "roof": - return np.array([stories + 1, ]).astype(str) - - raise ValueError(f"Cannot parse location string: " - f"{loc_str}") from exc - - def get_directions(dir_str): - - if pd.isnull(dir_str): - return np.ones(1).astype(str) - - # else: - try: - res = str(int(dir_str)) - return np.array([res, ]) - - except ValueError as exc: - - if "," in dir_str: - return np.array(dir_str.split(','), dtype=int).astype(str) - - if "--" in dir_str: - d_low, d_high = dir_str.split('--') - d_low = get_directions(d_low) - d_high = get_directions(d_high) - return np.arange( - int(d_low[0]), int(d_high[0]) + 1).astype(str) - - # else: - raise ValueError(f"Cannot parse direction string: " - f"{dir_str}") from exc - - def get_attribute(attribute_str, dtype=float, default=np.nan): - - if pd.isnull(attribute_str): - return default - - # else: - - try: - - res = dtype(attribute_str) - return res - - except ValueError as exc: - - if "," in attribute_str: - # a list of weights - w = np.array(attribute_str.split(','), dtype=float) - - # return a normalized vector - return w / np.sum(w) - - # else: - raise ValueError(f"Cannot parse Blocks string: " - f"{attribute_str}") from exc - - self.log_div() - self.log_msg('Loading component model...') - - # Currently, we assume independent component distributions are defined - # throughout the building. Correlations may be added afterward or this - # method can be extended to read correlation matrices too if needed. - - # prepare the marginal data source variable to load the data - if isinstance(data_source, dict): - marginal_data_source = data_source['marginals'] - else: - marginal_data_source = data_source + '_marginals.csv' - - marginal_params, units = file_io.load_data( - marginal_data_source, - None, - orientation=1, - reindex=False, - return_units=True, - log=self._asmnt.log, - ) - - # group units by cmp id to avoid redundant entries - self.cmp_units = units.copy().groupby(level=0).first() - - marginal_params = pd.concat([marginal_params, units], axis=1) - - cmp_marginal_param_dct = { - 'Family': [], 'Theta_0': [], 'Theta_1': [], 'Theta_2': [], - 'TruncateLower': [], 'TruncateUpper': [], 'Blocks': [], - 'Units': [] - } - index_list = [] - for row in marginal_params.itertuples(): - locs = get_locations(row.Location) - dirs = get_directions(row.Direction) - indices = list(product((row.Index, ), locs, dirs)) - num_vals = len(indices) - for col, cmp_marginal_param in cmp_marginal_param_dct.items(): - if col == 'Blocks': - cmp_marginal_param.extend( - [ - get_attribute( - getattr(row, 'Blocks', np.nan), - dtype=int, - default=1.0, - ) - ] - * num_vals - ) - elif col == 'Units': - cmp_marginal_param.extend( - [self.cmp_units[row.Index]] * num_vals - ) - elif col == 'Family': - cmp_marginal_param.extend( - [getattr(row, col, np.nan)] * num_vals - ) - else: - cmp_marginal_param.extend( - [get_attribute(getattr(row, col, np.nan))] * num_vals - ) - index_list.extend(indices) - index = pd.MultiIndex.from_tuples(index_list, names=['cmp', 'loc', 'dir']) - dtypes = { - 'Family': object, 'Theta_0': float, 'Theta_1': float, - 'Theta_2': float, 'TruncateLower': float, - 'TruncateUpper': float, 'Blocks': int, 'Units': object - } - cmp_marginal_param_series = [] - for col, cmp_marginal_param in cmp_marginal_param_dct.items(): - cmp_marginal_param_series.append( - pd.Series( - cmp_marginal_param, - dtype=dtypes[col], name=col, index=index)) - - cmp_marginal_params = pd.concat( - cmp_marginal_param_series, axis=1 - ) - - assert not cmp_marginal_params['Theta_0'].isnull().values.any() - - cmp_marginal_params.dropna(axis=1, how='all', inplace=True) - - self.log_msg("Model parameters successfully parsed. " - f"{cmp_marginal_params.shape[0]} performance groups identified", - prepend_timestamp=False) - - # Now we can take care of converting the values to base units - self.log_msg("Converting model parameters to internal units...", - prepend_timestamp=False) - - # ensure that the index has unique entries by introducing an - # internal component uid - base.dedupe_index(cmp_marginal_params) - - cmp_marginal_params = self.convert_marginal_params( - cmp_marginal_params, cmp_marginal_params['Units'] - ) - - self.cmp_marginal_params = cmp_marginal_params.drop('Units', axis=1) - - self.log_msg("Model parameters successfully loaded.", - prepend_timestamp=False) - - self.log_msg("\nComponent model marginal distributions:\n" - + str(cmp_marginal_params), - prepend_timestamp=False) - - # the empirical data and correlation files can be added later, if needed - - def _create_cmp_RVs(self): - """ - Defines the RVs used for sampling component quantities. - """ - - # initialize the registry - RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) - - # add a random variable for each component quantity variable - for rv_params in self.cmp_marginal_params.itertuples(): - - cmp = rv_params.Index - - # create a random variable and add it to the registry - RV_reg.add_RV(uq.RandomVariable( - name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', - distribution=getattr(rv_params, "Family", np.nan), - theta=[getattr(rv_params, f"Theta_{t_i}", np.nan) - for t_i in range(3)], - truncation_limits=[getattr(rv_params, f"Truncate{side}", np.nan) - for side in ("Lower", "Upper")], - )) - - self.log_msg(f"\n{self.cmp_marginal_params.shape[0]} " - "random variables created.", - prepend_timestamp=False) - - self._cmp_RVs = RV_reg - - def generate_cmp_sample(self, sample_size=None): - """ - Generates component quantity realizations. If a sample_size - is not specified, the sample size found in the demand model is - used. - """ - - if self.cmp_marginal_params is None: - raise ValueError('Model parameters have not been specified. Load' - 'parameters from a file before generating a ' - 'sample.') - - self.log_div() - self.log_msg('Generating sample from component quantity variables...') - - if sample_size is None: - if self._asmnt.demand.sample is None: - raise ValueError( - 'Sample size was not specified, ' - 'and it cannot be determined from ' - 'the demand model.') - sample_size = self._asmnt.demand.sample.shape[0] - - self._create_cmp_RVs() - - self._cmp_RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) - - # replace the potentially existing sample with the generated one - self._cmp_sample = None - - self.log_msg(f"\nSuccessfully generated {sample_size} realizations.", - prepend_timestamp=False) - - -class DamageModel(PelicunModel): - """ - Manages damage information used in assessments. - - This class contains the following methods: - - - save_sample() - - load_sample() - - load_damage_model() - - calculate() - - _get_pg_batches() - - _generate_dmg_sample() - - _create_dmg_rvs() - - _get_required_demand_type() - - _assemble_required_demand_data() - - _evaluate_damage_state() - - _prepare_dmg_quantities() - - _perform_dmg_task() - - _apply_dmg_funcitons() - - Parameters - ---------- - - """ - - def __init__(self, assessment): - - super().__init__(assessment) - - self.damage_params = None - self.sample = None - - def save_sample(self, filepath=None, save_units=False): - """ - Save damage sample to a csv file - - """ - self.log_div() - self.log_msg('Saving damage sample...') - - cmp_units = self._asmnt.asset.cmp_units - qnt_units = pd.Series(index=self.sample.columns, name='Units', - dtype='object') - for cmp in cmp_units.index: - qnt_units.loc[cmp] = cmp_units.loc[cmp] - - res = file_io.save_to_csv( - self.sample, filepath, - units=qnt_units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - use_simpleindex=(filepath is not None), - log=self._asmnt.log) - - if filepath is not None: - self.log_msg('Damage sample successfully saved.', - prepend_timestamp=False) - return None - - # else: - units = res.loc["Units"] - res.drop("Units", inplace=True) - res.index = res.index.astype('int64') - - if save_units: - return res.astype(float), units - - return res.astype(float) - - def load_sample(self, filepath): - """ - Load damage state sample data. - - """ - self.log_div() - self.log_msg('Loading damage sample...') - - self.sample = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, - log=self._asmnt.log) - - # set the names of the columns - self.sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'ds'] - - self.log_msg('Damage sample successfully loaded.', - prepend_timestamp=False) - - def load_damage_model(self, data_paths): - """ - Load limit state damage model parameters and damage state assignments - - Parameters - ---------- - data_paths: list of string - List of paths to data files with damage model information. Default - XY datasets can be accessed as PelicunDefault/XY. - """ - - self.log_div() - self.log_msg('Loading damage model...') - - # replace default flag with default data path - for d_i, data_path in enumerate(data_paths): - - if 'PelicunDefault/' in data_path: - data_paths[d_i] = data_path.replace( - 'PelicunDefault/', - f'{base.pelicun_path}/resources/SimCenterDBDL/', - ) - - data_list = [] - # load the data files one by one - for data_path in data_paths: - - data = file_io.load_data( - data_path, None, orientation=1, reindex=False, log=self._asmnt.log - ) - - data_list.append(data) - - damage_params = pd.concat(data_list, axis=0) - - # drop redefinitions of components - damage_params = damage_params.groupby(damage_params.index).first() - - # get the component types defined in the asset model - cmp_labels = self._asmnt.asset.cmp_sample.columns - - # only keep the damage model parameters for the components in the model - cmp_unique = cmp_labels.unique(level=0) - cmp_mask = damage_params.index.isin(cmp_unique, level=0) - - damage_params = damage_params.loc[cmp_mask, :] - - if np.sum(cmp_mask) != len(cmp_unique): - - cmp_list = cmp_unique[ - np.isin(cmp_unique, damage_params.index.values, - invert=True)].to_list() - - self.log_msg("\nWARNING: The damage model does not provide " - "vulnerability information for the following component(s) " - f"in the asset model: {cmp_list}.\n", - prepend_timestamp=False) - - # TODO: load defaults for Demand-Offset and Demand-Directional - - # Now convert model parameters to base units - for LS_i in damage_params.columns.unique(level=0): - if LS_i.startswith('LS'): - - damage_params.loc[:, LS_i] = self.convert_marginal_params( - damage_params.loc[:, LS_i].copy(), - damage_params[('Demand', 'Unit')], - ).values - - # check for components with incomplete damage model information - cmp_incomplete_list = damage_params.loc[ - damage_params[('Incomplete', '')] == 1].index - - damage_params.drop(cmp_incomplete_list, inplace=True) - - if len(cmp_incomplete_list) > 0: - self.log_msg(f"\nWARNING: Damage model information is incomplete for " - f"the following component(s) {cmp_incomplete_list}. They " - f"were removed from the analysis.\n", - prepend_timestamp=False) - - self.damage_params = damage_params - - self.log_msg("Damage model parameters successfully parsed.", - prepend_timestamp=False) - - def _handle_operation(self, initial_value, operation, other_value): - """ - This method is used in `_create_dmg_RVs` to apply capacity - adjustment operations whenever required. It is defined as a - safer alternative to directly using `eval`. - - Parameters - ---------- - initial_value: float - Value before operation - operation: str - Any of +, -, *, / - other_value: float - Value used to apply the operation - - Returns - ------- - result: float - The result of the operation - - """ - if operation == '+': - return initial_value + other_value - if operation == '-': - return initial_value - other_value - if operation == '*': - return initial_value * other_value - if operation == '/': - return initial_value / other_value - raise ValueError(f'Invalid operation: {operation}') - - def _create_dmg_RVs(self, PGB, scaling_specification=None): - """ - Creates random variables required later for the damage calculation. - - The method initializes two random variable registries, - capacity_RV_reg and lsds_RV_reg, and loops through each - performance group in the input performance group block (PGB) - dataframe. For each performance group, it retrieves the - component sample and blocks and checks if the limit state is - defined for the component. If the limit state is defined, the - method gets the list of limit states and the parameters for - each limit state. The method assigns correlation between limit - state random variables, adds the limit state random variables - to the capacity_RV_reg registry, and adds LSDS assignments to - the lsds_RV_reg registry. After looping through all - performance groups, the method returns the two registries. - - Parameters - ---------- - PGB : DataFrame - A DataFrame that groups performance groups into batches - for efficient damage assessment. - scaling_specification: dict, optional - A dictionary defining the shift in median. - Example: {'CMP-1-1': '*1.2', 'CMP-1-2': '/1.4'} - The keys are individual components that should be present - in the `capacity_sample`. The values should be strings - containing an operation followed by the value formatted as - a float. The operation can be '+' for addition, '-' for - subtraction, '*' for multiplication, and '/' for division. - - """ - - def assign_lsds(ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag): - """ - Prepare random variables to handle mutually exclusive damage states. - - """ - - # If the limit state has a single damage state assigned - # to it, we don't need random sampling - if pd.isnull(ds_weights): - - ds_id += 1 - - lsds_RV_reg.add_RV( - uq.RandomVariable( - name=lsds_rv_tag, - distribution='deterministic', - theta=ds_id, - ) - ) - - # Otherwise, we create a multinomial random variable - else: - - # parse the DS weights - ds_weights = np.array( - ds_weights.replace(" ", "").split('|'), dtype=float - ) - - def map_ds(values, offset=int(ds_id + 1)): - return values + offset - - lsds_RV_reg.add_RV( - uq.RandomVariable( - name=lsds_rv_tag, - distribution='multinomial', - theta=ds_weights, - f_map=map_ds, - ) - ) - - ds_id += len(ds_weights) - - return ds_id - - if self._asmnt.log.verbose: - self.log_msg('Generating capacity variables ...', prepend_timestamp=True) - - # initialize the registry - capacity_RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) - lsds_RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) - - # capacity adjustment: - # ensure the scaling_specification is a dictionary - if not scaling_specification: - scaling_specification = {} - else: - # if there are contents, ensure they are valid. - # See docstring for an example of what is expected. - parsed_scaling_specification = {} - # validate contents - for key, value in scaling_specification.items(): - css = 'capacity adjustment specification' - if not isinstance(value, str): - raise ValueError( - f'Invalud entry in {css}: {value}. It has to be a string. ' - f'See docstring of DamageModel._create_dmg_RVs.' - ) - capacity_adjustment_operation = value[0] - number = value[1::] - if capacity_adjustment_operation not in ('+', '-', '*', '/'): - raise ValueError( - f'Invalid operation in {css}: ' - f'{capacity_adjustment_operation}' - ) - fnumber = base.float_or_None(number) - if fnumber is None: - raise ValueError(f'Invalid number in {css}: {number}') - parsed_scaling_specification[key] = ( - capacity_adjustment_operation, - fnumber, - ) - scaling_specification = parsed_scaling_specification - - # get the component sample and blocks from the asset model - for PG in PGB.index: - - # determine demand capacity adjustment operation, if required - cmp_loc_dir = '-'.join(PG[0:3]) - capacity_adjustment_operation = scaling_specification.get( - cmp_loc_dir, None - ) - - cmp_id = PG[0] - blocks = PGB.loc[PG, 'Blocks'] - - # if the number of blocks is provided, calculate the weights - if np.atleast_1d(blocks).shape[0] == 1: - blocks = np.full(int(blocks), 1.0 / blocks) - # otherwise, assume that the list contains the weights - - # initialize the damaged quantity sample variable - - assert self.damage_params is not None - if cmp_id in self.damage_params.index: - - frg_params = self.damage_params.loc[cmp_id, :] - - # get the list of limit states - limit_states = [] - - for val in frg_params.index.get_level_values(0).unique(): - if 'LS' in val: - limit_states.append(val[2:]) - - ds_id = 0 - - frg_rv_set_tags = [[] for b in blocks] - anchor_RVs = [] - - for ls_id in limit_states: - - frg_params_LS = frg_params[f'LS{ls_id}'] - - theta_0 = frg_params_LS.get('Theta_0', np.nan) - family = frg_params_LS.get('Family', np.nan) - ds_weights = frg_params_LS.get('DamageStateWeights', np.nan) - - # check if the limit state is defined for the component - if pd.isna(theta_0): - continue - - theta = [ - frg_params_LS.get(f"Theta_{t_i}", np.nan) for t_i in range(3) - ] - - if capacity_adjustment_operation: - if family in {'normal', 'lognormal'}: - theta[0] = self._handle_operation( - theta[0], - capacity_adjustment_operation[0], - capacity_adjustment_operation[1], - ) - else: - self.log_msg( - f'\nWARNING: Capacity adjustment is only supported ' - f'for `normal` or `lognormal` distributions. ' - f'Ignoring: {cmp_loc_dir}, which is {family}', - prepend_timestamp=False, - ) - - tr_lims = [ - frg_params_LS.get(f"Truncate{side}", np.nan) - for side in ("Lower", "Upper") - ] - - for block_i, _ in enumerate(blocks): - - frg_rv_tag = ( - 'FRG-' - f'{PG[0]}-' # cmp_id - f'{PG[1]}-' # loc - f'{PG[2]}-' # dir - f'{PG[3]}-' # uid - f'{block_i+1}-' # block - f'{ls_id}' - ) - - # Assign correlation between limit state random - # variables - # Note that we assume perfectly correlated limit - # state random variables here. This approach is in - # line with how mainstream PBE calculations are - # performed. Assigning more sophisticated - # correlations between limit state RVs is possible, - # if needed. Please let us know through the - # SimCenter Message Board if you are interested in - # such a feature. - # Anchor all other limit state random variables to - # the first one to consider the perfect correlation - # between capacities in each LS - if ls_id == limit_states[0]: - anchor = None - else: - anchor = anchor_RVs[block_i] - - # parse theta values for multilinear_CDF - if family == 'multilinear_CDF': - theta = np.column_stack( - ( - np.array( - theta[0].split('|')[0].split(','), - dtype=float, - ), - np.array( - theta[0].split('|')[1].split(','), - dtype=float, - ), - ) - ) - - RV = uq.RandomVariable( - name=frg_rv_tag, - distribution=family, - theta=theta, - truncation_limits=tr_lims, - anchor=anchor, - ) - - capacity_RV_reg.add_RV(RV) - - # add the RV to the set of correlated variables - frg_rv_set_tags[block_i].append(frg_rv_tag) - - if ls_id == limit_states[0]: - anchor_RVs.append(RV) - - # Now add the LS->DS assignments - lsds_rv_tag = ( - 'LSDS-' - f'{PG[0]}-' # cmp_id - f'{PG[1]}-' # loc - f'{PG[2]}-' # dir - f'{PG[3]}-' # uid - f'{block_i+1}-' # block - f'{ls_id}' - ) - - ds_id_next = assign_lsds( - ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag - ) - - ds_id = ds_id_next - - if self._asmnt.log.verbose: - rv_count = len(lsds_RV_reg.RV) - self.log_msg( - f"2x{rv_count} random variables created.", prepend_timestamp=False - ) - - return capacity_RV_reg, lsds_RV_reg - - def _generate_dmg_sample(self, sample_size, PGB, scaling_specification=None): - """ - This method generates a damage sample by creating random - variables (RVs) for capacities and limit-state-damage-states - (lsds), and then sampling from these RVs. The sample size and - performance group batches (PGB) are specified as inputs. The - method returns the capacity sample and the lsds sample. - - Parameters - ---------- - sample_size : int - The number of realizations to generate. - PGB : DataFrame - A DataFrame that groups performance groups into batches - for efficient damage assessment. - scaling_specification: dict, optional - A dictionary defining the shift in median. - Example: {'CMP-1-1': '*1.2', 'CMP-1-2': '/1.4'} - The keys are individual components that should be present - in the `capacity_sample`. The values should be strings - containing an operation followed by the value formatted as - a float. The operation can be '+' for addition, '-' for - subtraction, '*' for multiplication, and '/' for division. - - Returns - ------- - capacity_sample : DataFrame - A DataFrame that represents the capacity sample. - lsds_sample : DataFrame - A DataFrame that represents the . - - Raises - ------ - ValueError - If the damage parameters have not been specified. - - """ - - # Check if damage model parameters have been specified - if self.damage_params is None: - raise ValueError('Damage model parameters have not been specified. ' - 'Load parameters from the default damage model ' - 'databases or provide your own damage model ' - 'definitions before generating a sample.') - - # Create capacity and LSD RVs for each performance group - capacity_RVs, lsds_RVs = self._create_dmg_RVs(PGB, scaling_specification) - - if self._asmnt.log.verbose: - self.log_msg('Sampling capacities...', - prepend_timestamp=True) - - # Generate samples for capacity RVs - capacity_RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) - - # Generate samples for LSD RVs - lsds_RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) - - if self._asmnt.log.verbose: - self.log_msg("Raw samples are available", - prepend_timestamp=True) - - # get the capacity and lsds samples - capacity_sample = pd.DataFrame( - capacity_RVs.RV_sample).sort_index( - axis=0).sort_index(axis=1) - capacity_sample = base.convert_to_MultiIndex( - capacity_sample, axis=1)['FRG'] - capacity_sample.columns.names = [ - 'cmp', 'loc', 'dir', 'uid', 'block', 'ls'] - - lsds_sample = pd.DataFrame( - lsds_RVs.RV_sample).sort_index( - axis=0).sort_index(axis=1).astype(int) - lsds_sample = base.convert_to_MultiIndex( - lsds_sample, axis=1)['LSDS'] - lsds_sample.columns.names = [ - 'cmp', 'loc', 'dir', 'uid', 'block', 'ls'] - - if self._asmnt.log.verbose: - self.log_msg( - f"Successfully generated {sample_size} realizations.", - prepend_timestamp=True) - - return capacity_sample, lsds_sample - - def _get_required_demand_type(self, PGB): - """ - Returns the id of the demand needed to calculate damage to a - component. We assume that a damage model sample is available. - - This method returns the demand type and its properties - required to calculate the damage to a component. The - properties include whether the demand is directional, the - offset, and the type of the demand. The method takes as input - a dataframe PGB that contains information about the component - groups in the asset. For each component group PG in the PGB - dataframe, the method retrieves the relevant damage parameters - from the damage_params dataframe and parses the demand type - into its properties. If the demand type has a subtype, the - method splits it and adds the subtype to the demand type to - form the EDP (engineering demand parameter) type. The method - also considers the default offset for the demand type, if it - is specified in the options attribute of the assessment, and - adds the offset to the EDP. If the demand is directional, the - direction is added to the EDP. The method collects all the - unique EDPs for each component group and returns them as a - dictionary where each key is an EDP and its value is a list of - component groups that require that EDP. - - Parameters - ---------- - `PGB`: pd.DataFrame - A pandas DataFrame with the block information for - each component - - Returns - ------- - EDP_req: dict - A dictionary of EDP requirements, where each key is the EDP - string (e.g., "Peak Ground Acceleration-0-0"), and the - corresponding value is a list of tuples (component_id, - location, direction) - - """ - - # Assign the damage_params attribute to a local variable `DP` - DP = self.damage_params - - # Check if verbose logging is enabled in `self._asmnt.log` - if self._asmnt.log.verbose: - # If verbose logging is enabled, log a message indicating - # that we are collecting demand information - self.log_msg('Collecting required demand information...', - prepend_timestamp=True) - - # Initialize an empty dictionary to store the unique EDP - # requirements - EDP_req = {} - - # Iterate over the index of the `PGB` DataFrame - for PG in PGB.index: - # Get the component name from the first element of the - # `PG` tuple - cmp = PG[0] - - # Get the directional, offset, and demand_type parameters - # from the `DP` DataFrame - directional, offset, demand_type = DP.loc[ - cmp, [('Demand', 'Directional'), - ('Demand', 'Offset'), - ('Demand', 'Type')]] - - # Parse the demand type - - # Check if there is a subtype included in the demand_type - # string - if '|' in demand_type: - # If there is a subtype, split the demand_type string - # on the '|' character - demand_type, subtype = demand_type.split('|') - # Convert the demand type to the corresponding EDP - # type using `base.EDP_to_demand_type` - demand_type = base.EDP_to_demand_type[demand_type] - # Concatenate the demand type and subtype to form the - # EDP type - EDP_type = f'{demand_type}_{subtype}' - else: - # If there is no subtype, convert the demand type to - # the corresponding EDP type using - # `base.EDP_to_demand_type` - demand_type = base.EDP_to_demand_type[demand_type] - # Assign the EDP type to be equal to the demand type - EDP_type = demand_type - - # Consider the default offset, if needed - if demand_type in self._asmnt.options.demand_offset.keys(): - # If the demand type has a default offset in - # `self._asmnt.options.demand_offset`, add the offset - # to the default offset - offset = int(offset + self._asmnt.options.demand_offset[demand_type]) - else: - # If the demand type does not have a default offset in - # `self._asmnt.options.demand_offset`, convert the - # offset to an integer - offset = int(offset) - - # Determine the direction - if directional: - # If the demand is directional, use the third element - # of the `PG` tuple as the direction - direction = PG[2] - else: - # If the demand is not directional, use '0' as the - # direction - direction = '0' - - # Concatenate the EDP type, offset, and direction to form - # the EDP key - EDP = f"{EDP_type}-{str(int(PG[1]) + offset)}-{direction}" - - # If the EDP key is not already in the `EDP_req` - # dictionary, add it and initialize it with an empty list - if EDP not in EDP_req: - EDP_req.update({EDP: []}) - - # Add the current PG (performance group) to the list of - # PGs associated with the current EDP key - EDP_req[EDP].append(PG) - - # Return the unique EDP requirements - return EDP_req - - def _assemble_required_demand_data(self, EDP_req): - """ - Assembles demand data for damage state determination. - - The method takes the maximum of all available directions for - non-directional demand, scaling it using the non-directional - multiplier specified in self._asmnt.options, and returning the - result as a dictionary with keys in the format of - '--' and values as arrays of - demand values. If demand data is not found, logs a warning - message and skips the corresponding damages calculation. - - Parameters - ---------- - EDP_req : dict - A dictionary of unique EDP requirements - - Returns - ------- - demand_dict : dict - A dictionary of assembled demand data for calculation - - Raises - ------ - KeyError - If demand data for a given EDP cannot be found - - """ - - if self._asmnt.log.verbose: - self.log_msg('Assembling demand data for calculation...', - prepend_timestamp=True) - - demand_source = self._asmnt.demand.sample - - demand_dict = {} - - for EDP in EDP_req.keys(): - - EDP = EDP.split('-') - - # if non-directional demand is requested... - if EDP[2] == '0': - - # assume that the demand at the given location is available - try: - # take the maximum of all available directions and scale it - # using the nondirectional multiplier specified in the - # self._asmnt.options (the default value is 1.2) - demand = demand_source.loc[ - :, (EDP[0], EDP[1])].max(axis=1).values - demand = demand * self._asmnt.options.nondir_multi(EDP[0]) - - except KeyError: - - demand = None - - else: - demand = demand_source[(EDP[0], EDP[1], EDP[2])].values - - if demand is None: - - self.log_msg(f'\nWARNING: Cannot find demand data for {EDP}. The ' - 'corresponding damages cannot be calculated.', - prepend_timestamp=False) - else: - demand_dict.update({f'{EDP[0]}-{EDP[1]}-{EDP[2]}': demand}) - - return demand_dict - - def _evaluate_damage_state( - self, demand_dict, EDP_req, capacity_sample, lsds_sample): - """ - Use the demand and LS capacity sample to evaluate damage states - - Parameters - ---------- - demand_dict: dict - Dictionary containing the demand of each demand type. - EDP_req: dict - Dictionary containing the EDPs assigned to each demand - type. - capacity_sample: DataFrame - Provides a sample of the capacity. - lsds_sample: DataFrame - Provides the mapping between limit states and damage - states. - - Returns - ------- - dmg_sample: DataFrame - Assigns a Damage State to each component block in the - asset model. - """ - - # Log a message indicating that damage states are being - # evaluated - - if self._asmnt.log.verbose: - self.log_msg('Evaluating damage states...', prepend_timestamp=True) - - # Create an empty dataframe with columns and index taken from - # the input capacity sample - dmg_eval = pd.DataFrame(columns=capacity_sample.columns, - index=capacity_sample.index) - - # Initialize an empty list to store demand data - demand_df = [] - - # For each demand type in the demand dictionary - for demand_name, demand_vals in demand_dict.items(): - - # Get the list of PGs assigned to this demand type - PG_list = EDP_req[demand_name] - - # Create a list of columns for the demand data - # corresponding to each PG in the PG_list - PG_cols = pd.concat( - [dmg_eval.loc[:1, PG_i] for PG_i in PG_list], axis=1, keys=PG_list - ).columns - PG_cols.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ls'] - # Create a dataframe with demand values repeated for the - # number of PGs and assign the columns as PG_cols - demand_df.append(pd.concat([pd.Series(demand_vals)] * len(PG_cols), - axis=1, keys=PG_cols)) - - # Concatenate all demand dataframes into a single dataframe - demand_df = pd.concat(demand_df, axis=1) - # Sort the columns of the demand dataframe - demand_df.sort_index(axis=1, inplace=True) - - # Evaluate the damage exceedance by subtracting demand from - # capacity and checking if the result is less than zero - dmg_eval = (capacity_sample - demand_df) < 0 - - # Remove any columns with NaN values from the damage - # exceedance dataframe - dmg_eval.dropna(axis=1, inplace=True) - - # initialize the DataFrames that store the damage states and - # quantities - ds_sample = capacity_sample.groupby(level=[0, 1, 2, 3, 4], axis=1).first() - ds_sample.loc[:, :] = np.zeros(ds_sample.shape, dtype=int) - - # get a list of limit state ids among all components in the damage model - ls_list = dmg_eval.columns.get_level_values(5).unique() - - # for each consecutive limit state... - for LS_id in ls_list: - # get all cmp - loc - dir - block where this limit state occurs - dmg_e_ls = dmg_eval.loc[:, idx[:, :, :, :, :, LS_id]].dropna(axis=1) - - # Get the damage states corresponding to this limit state in each - # block - # Note that limit states with a set of mutually exclusive damage - # states options have their damage state picked here. - lsds = lsds_sample.loc[:, dmg_e_ls.columns] - - # Drop the limit state level from the columns to make the damage - # exceedance DataFrame compatible with the other DataFrames in the - # following steps - dmg_e_ls.columns = dmg_e_ls.columns.droplevel(5) - - # Same thing for the lsds DataFrame - lsds.columns = dmg_e_ls.columns - - # Update the damage state in the result with the values from the - # lsds DF if the limit state was exceeded according to the - # dmg_e_ls DF. - # This one-liner updates the given Limit State exceedance in the - # entire damage model. If subsequent Limit States are also exceeded, - # those cells in the result matrix will get overwritten by higher - # damage states. - ds_sample.loc[:, dmg_e_ls.columns] = ( - ds_sample.loc[:, dmg_e_ls.columns].mask(dmg_e_ls, lsds)) - - return ds_sample - - def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): - """ - Combine component quantity and damage state information in one - DataFrame. - - This method assumes that a component quantity sample is - available in the asset model and a damage state sample is - available in the damage model. - - Parameters - ---------- - PGB: DataFrame - A DataFrame that contains the Block identifier for each - component. - ds_sample: DataFrame - A DataFrame that assigns a damage state to each component - block in the asset model. - dropzero: bool, optional, default: True - If True, the quantity of non-damaged components is not - saved. - - Returns - ------- - res_df: DataFrame - A DataFrame that combines the component quantity and - damage state information. - - Raises - ------ - ValueError - If the number of blocks is not provided or if the list of - weights does not contain the same number of elements as - the number of blocks. - - """ - - # Log a message indicating that the calculation of damage - # quantities is starting - if self._asmnt.log.verbose: - self.log_msg('Calculating damage quantities...', - prepend_timestamp=True) - - # Store the damage state sample as a local variable - dmg_ds = ds_sample - - # Retrieve the component quantity information from the asset - # model - cmp_qnt = self._asmnt.asset.cmp_sample # .values - # Retrieve the component marginal parameters from the asset - # model - cmp_params = self._asmnt.asset.cmp_marginal_params - - # Combine the component quantity information for the columns - # in the damage state sample - dmg_qnt = pd.concat( - [cmp_qnt[PG[:4]] for PG in dmg_ds.columns], - axis=1, keys=dmg_ds.columns) - - # Initialize a list to store the block weights - block_weights = [] - - # For each component in the list of PG blocks - for PG in PGB.index: - - # Set the number of blocks to 1, unless specified - # otherwise in the component marginal parameters - blocks = 1 - if cmp_params is not None: - if 'Blocks' in cmp_params.columns: - - blocks = cmp_params.loc[PG, 'Blocks'] - - # If the number of blocks is specified, calculate the - # weights as the reciprocal of the number of blocks - if np.atleast_1d(blocks).shape[0] == 1: - blocks_array = np.full(int(blocks), 1. / blocks) - - # Otherwise, assume that the list contains the weights - block_weights += blocks_array.tolist() - - # Broadcast the block weights to match the shape of the damage - # quantity DataFrame - block_weights = np.broadcast_to( - block_weights, - (dmg_qnt.shape[0], len(block_weights))) - - # Multiply the damage quantities by the block weights - dmg_qnt *= block_weights - - # Get the unique damage states from the damage state sample - # Note that these might be fewer than all possible Damage - # States - ds_list = np.unique(dmg_ds.values) - # Filter out any NaN values from the list of damage states - ds_list = ds_list[pd.notna(ds_list)].astype(int) - - # If the dropzero option is True, remove the zero damage state - # from the list of damage states - if dropzero: - - ds_list = ds_list[ds_list != 0] - - # Only proceed with the calculation if there is at least one - # damage state in the list - if len(ds_list) > 0: - - # Create a list of DataFrames, where each DataFrame stores - # the damage quantities for a specific damage state - res_list = [pd.DataFrame( - np.where(dmg_ds == ds_i, dmg_qnt, 0), - columns=dmg_ds.columns, - index=dmg_ds.index - ) for ds_i in ds_list] - - # Combine the damage quantity DataFrames into a single - # DataFrame - res_df = pd.concat( - res_list, axis=1, - keys=[f'{ds_i:g}' for ds_i in ds_list]) - res_df.columns.names = ['ds', *res_df.columns.names[1::]] - # remove the block level from the columns - res_df.columns = res_df.columns.reorder_levels([1, 2, 3, 4, 0, 5]) - res_df = res_df.groupby(level=[0, 1, 2, 3, 4], axis=1).sum() - - # The damage states with no damaged quantities are dropped - # Note that some of these are not even valid DSs at the given PG - res_df = res_df.iloc[:, np.where(res_df.sum(axis=0) != 0)[0]] - - return res_df - - def _perform_dmg_task(self, task, qnt_sample): - """ - Perform a task from a damage process. - - The method performs a task from a damage process on a given - quantity sample. The method first checks if the source - component specified in the task exists among the available - components in the quantity sample. If the source component is - not found, a warning message is logged and the method returns - the original quantity sample unchanged. Otherwise, the method - executes the events specified in the task. The events can be - triggered by a limit state exceedance or a damage state - occurrence. If the event is triggered by a damage state, the - method moves all quantities of the target component(s) into - the target damage state in pre-selected realizations. If the - target event is "NA", the method removes quantity information - from the target components in the pre-selected - realizations. After executing the events, the method returns - the updated quantity sample. - - Parameters - ---------- - task : list - A list representing a task from the damage process. The - list contains two elements: - - The first element is a string representing the source - component, e.g., `'CMP_A'`. - - The second element is a dictionary representing the - events triggered by the damage state of the source - component. The keys of the dictionary are strings that - represent the damage state of the source component, - e.g., `'DS1'`. The values are lists of strings - representing the target component(s) and event(s), e.g., - `['CMP_B', 'CMP_C']`. - qnt_sample : pandas DataFrame - A DataFrame representing the quantities of the components - in the damage sample. It is modified in place to represent - the quantities of the components in the damage sample - after the task has been performed. - - Raises - ------ - ValueError - If the source component is not found among the components - in the damage sample - ValueError - If the source event is not a limit state (LS) or damage - state (DS) - ValueError - If the target event is not a limit state (LS), damage - state (DS), or not available (NA) - ValueError - If the target event is a limit state (LS) - - """ - - if self._asmnt.log.verbose: - self.log_msg('Applying task...', - prepend_timestamp=True) - - # get the list of available components - cmp_list = qnt_sample.columns.get_level_values(0).unique().tolist() - - # get the component quantities - cmp_qnt = self._asmnt.asset.cmp_sample - - # get the source component - source_cmp = task[0].split('_')[1] - - # check if it exists among the available ones - if source_cmp not in cmp_list: - - self.log_msg( - f"WARNING: Source component {source_cmp} in the prescribed " - "damage process not found among components in the damage " - "sample. The corresponding part of the damage process is " - "skipped.", prepend_timestamp=False) - - return - - # get the damage quantities for the source component - source_cmp_df = qnt_sample.loc[:, source_cmp] - - # execute the prescribed events - for source_event, target_infos in task[1].items(): - - # events triggered by limit state exceedance - if source_event.startswith('LS'): - - # ls_i = int(source_event[2:]) - # TODO: implement source LS support - raise ValueError('LS not supported yet.') - - # events triggered by damage state occurrence - if source_event.startswith('DS'): - - # get the ID of the damage state that triggers the event - ds_list = [source_event[2:], ] - - # if we are only looking for a single DS - if len(ds_list) == 1: - - ds_target = ds_list[0] - - # get the realizations with non-zero quantity of the target DS - source_ds_vals = source_cmp_df.groupby( - level=[3], axis=1).max() - - if ds_target in source_ds_vals.columns: - source_ds_vals = source_ds_vals[ds_target] - source_mask = source_cmp_df.loc[source_ds_vals > 0.0].index - else: - # if tge source_cmp is not in ds_target in any of the - # realizations, the prescribed event is not triggered - continue - - else: - pass # TODO: implement multiple DS support - - else: - raise ValueError(f"Unable to parse source event in damage " - f"process: {source_event}") - - # get the information about the events - target_infos = np.atleast_1d(target_infos) - - # for each event - for target_info in target_infos: - - # get the target component and event type - target_cmp, target_event = target_info.split('_') - - # ALL means all, but the source component - if target_cmp == 'ALL': - - # copy the list of available components - target_cmp = deepcopy(cmp_list) - - # remove the source component - if source_cmp in target_cmp: - target_cmp.remove(source_cmp) - - # otherwise we target a specific component - elif target_cmp in cmp_list: - target_cmp = [target_cmp, ] - - # trigger a limit state - if target_event.startswith('LS'): - - # ls_i = int(target_event[2:]) - # TODO: implement target LS support - raise ValueError('LS not supported yet.') - - # trigger a damage state - if target_event.startswith('DS'): - - # get the target damage state ID - ds_i = target_event[2:] - - # move all quantities of the target component(s) into the - # target damage state in the pre-selected realizations - qnt_sample.loc[source_mask, target_cmp] = 0.0 - - for target_cmp_i in target_cmp: - locs = cmp_qnt[target_cmp_i].columns.get_level_values(0) - dirs = cmp_qnt[target_cmp_i].columns.get_level_values(1) - uids = cmp_qnt[target_cmp_i].columns.get_level_values(2) - for loc, direction, uid in zip(locs, dirs, uids): - # because we cannot be certain that ds_i had been - # triggered earlier, we have to add this damage - # state manually for each PG of each component, if needed - if ds_i not in qnt_sample[ - (target_cmp_i, loc, direction, uid)].columns: - qnt_sample[ - (target_cmp_i, loc, direction, uid, ds_i)] = 0.0 - - qnt_sample.loc[ - source_mask, - (target_cmp_i, loc, direction, uid, ds_i)] = ( - cmp_qnt.loc[ - source_mask, - (target_cmp_i, loc, direction, uid)].values) - - # clear all damage information - elif target_event == 'NA': - - # remove quantity information from the target components - # in the pre-selected realizations - qnt_sample.loc[source_mask, target_cmp] = np.nan - - else: - raise ValueError(f"Unable to parse target event in damage " - f"process: {target_event}") - - if self._asmnt.log.verbose: - self.log_msg('Damage process task successfully applied.', - prepend_timestamp=False) - - def _get_pg_batches(self, block_batch_size): - """ - Group performance groups into batches for efficient damage assessment. - - The method takes as input the block_batch_size, which - specifies the maximum number of blocks per batch. The method - first checks if performance groups have been defined in the - cmp_marginal_params dataframe, and if so, it uses the 'Blocks' - column as the performance group information. If performance - groups have not been defined in cmp_marginal_params, the - method uses the cmp_sample dataframe to define the performance - groups, with each performance group having a single block. - - The method then checks if the performance groups are available - in the damage parameters dataframe, and removes any - performance groups that are not found in the damage - parameters. The method then groups the performance groups - based on the locations and directions of the components, and - calculates the cumulative sum of the blocks for each - group. The method then divides the performance groups into - batches of size specified by block_batch_size and assigns a - batch number to each group. Finally, the method groups the - performance groups by batch number, component, location, and - direction, and returns a dataframe that shows the number of - blocks for each batch. - - """ - - # Get the marginal parameters for the components from the - # asset model - cmp_marginals = self._asmnt.asset.cmp_marginal_params - - # Initialize the batch dataframe - pg_batch = None - - # If marginal parameters are available, use the 'Blocks' - # column to initialize the batch dataframe - if cmp_marginals is not None: - - # Check if the "Blocks" column exists in the component - # marginal parameters - if 'Blocks' in cmp_marginals.columns: - pg_batch = cmp_marginals['Blocks'].to_frame() - - # If the "Blocks" column doesn't exist, create a new dataframe - # with "Blocks" column filled with ones, using the component - # sample as the index. - if pg_batch is None: - cmp_sample = self._asmnt.asset.cmp_sample - pg_batch = pd.DataFrame(np.ones(cmp_sample.shape[1]), - index=cmp_sample.columns, - columns=['Blocks']) - - # Check if the damage model information exists for each - # performance group If not, remove the performance group from - # the analysis and log a warning message. - first_time = True - for pg_i in pg_batch.index: - - if np.any(np.isin(pg_i, self.damage_params.index)): - - blocks_i = pg_batch.loc[pg_i, 'Blocks'] - - # If the "Blocks" column contains a list of block - # weights, get the number of blocks from the shape of - # the list. - if np.atleast_1d(blocks_i).shape[0] != 1: - blocks_i = np.atleast_1d(blocks_i).shape[0] - - pg_batch.loc[pg_i, 'Blocks'] = blocks_i - - else: - pg_batch.drop(pg_i, inplace=True) - - if first_time: - self.log_msg("\nWARNING: Damage model information is " - "incomplete for some of the performance groups " - "and they had to be removed from the analysis:", - prepend_timestamp=False) - - first_time = False - - self.log_msg(f"{pg_i}", prepend_timestamp=False) - - # Convert the data types of the dataframe to be efficient - pg_batch = pg_batch.convert_dtypes() - - # Sum up the number of blocks for each performance group - pg_batch = pg_batch.groupby(['loc', 'dir', 'cmp', 'uid']).sum() - pg_batch.sort_index(axis=0, inplace=True) - - # Calculate cumulative sum of blocks - pg_batch['CBlocks'] = np.cumsum(pg_batch['Blocks'].values.astype(int)) - pg_batch['Batch'] = 0 - - # Group the performance groups into batches - for batch_i in range(1, pg_batch.shape[0] + 1): - - # Find the mask for blocks that are less than the batch - # size and greater than 0 - batch_mask = np.all( - np.array([pg_batch['CBlocks'] <= block_batch_size, - pg_batch['CBlocks'] > 0]), - axis=0) - - if np.sum(batch_mask) < 1: - batch_mask = np.full(batch_mask.shape, False) - batch_mask[np.where(pg_batch['CBlocks'] > 0)[0][0]] = True - - pg_batch.loc[batch_mask, 'Batch'] = batch_i - - # Decrement the cumulative block count by the max count in - # the current batch - pg_batch['CBlocks'] -= pg_batch.loc[ - pg_batch['Batch'] == batch_i, 'CBlocks'].max() - - # If the maximum cumulative block count is 0, exit the - # loop - if pg_batch['CBlocks'].max() == 0: - break - - # Group the performance groups by batch, component, location, - # and direction, and keep only the number of blocks for each - # group - pg_batch = pg_batch.groupby( - ['Batch', 'cmp', 'loc', 'dir', 'uid']).sum().loc[:, 'Blocks'].to_frame() - - return pg_batch - - def _complete_ds_cols(self, dmg_sample): - """ - Completes the damage sample dataframe with all possible damage - states for each component. - - Parameters - ---------- - dmg_sample : DataFrame - A DataFrame containing the damage state information for - each component block in the asset model. The columns are - MultiIndexed with levels corresponding to component - information ('cmp', 'loc', 'dir', 'uid') and the damage - state ('ds'). - - Returns - ------- - DataFrame - A DataFrame similar to `dmg_sample` but with additional - columns for missing damage states for each component, - ensuring that all possible damage states are - represented. The new columns are filled with zeros, - indicating no occurrence of those damage states in the - sample. - - Notes - ----- - - The method assumes that the damage model parameters - (`self.damage_params`) are available and contain the - necessary information to determine the total number of - damage states for each component. - - """ - # get a shortcut for the damage model parameters - DP = self.damage_params - - # Get the header for the results that we can use to identify - # cmp-loc-dir-uid sets - dmg_header = ( - dmg_sample.groupby(level=[0, 1, 2, 3], axis=1).first().iloc[:2, :] - ) - - # get the number of possible limit states - ls_list = [col for col in DP.columns.unique(level=0) if 'LS' in col] - - # initialize the result dataframe - res = pd.DataFrame() - - # walk through all components that have damage parameters provided - for cmp_id in DP.index: - - # get the component-specific parameters - cmp_data = DP.loc[cmp_id] - - # and initialize the damage state counter - ds_count = 0 - - # walk through all limit states for the component - for ls in ls_list: - - # check if the given limit state is defined - if not pd.isna(cmp_data[(ls, 'Theta_0')]): - - # check if there is only one damage state - if pd.isna(cmp_data[(ls, 'DamageStateWeights')]): - - ds_count += 1 - - else: - - # or if there are more than one, how many - ds_count += len( - cmp_data[(ls, 'DamageStateWeights')].split('|')) - - # get the list of valid cmp-loc-dir-uid sets - cmp_header = dmg_header.loc[:, [cmp_id, ]] - - # Create a dataframe where they are repeated ds_count times in the - # columns. The keys put the DS id in the first level of the - # multiindexed column - cmp_headers = pd.concat( - [cmp_header for ds_i in range(ds_count + 1)], - keys=[str(r) for r in range(0, ds_count + 1)], - axis=1) - cmp_headers.columns.names = ['ds', *cmp_headers.columns.names[1::]] - - # add these new columns to the result dataframe - res = pd.concat([res, cmp_headers], axis=1) - - # Fill the result dataframe with zeros and reorder its columns to have - # the damage states at the lowest like - matching the dmg_sample input - res = pd.DataFrame( - 0.0, - columns=res.columns.reorder_levels([1, 2, 3, 4, 0]), - index=dmg_sample.index, - ) - - # replace zeros wherever the dmg_sample has results - res.loc[:, dmg_sample.columns.to_list()] = dmg_sample - - return res - - def calculate( - self, dmg_process=None, block_batch_size=1000, scaling_specification=None - ): - """ - Calculate the damage state of each component block in the asset. - - """ - - self.log_div() - self.log_msg('Calculating damages...') - - sample_size = self._asmnt.demand.sample.shape[0] - - # Break up damage calculation and perform it by performance group. - # Compared to the simultaneous calculation of all PGs, this approach - # reduces demands on memory and increases the load on CPU. This leads - # to a more balanced workload on most machines for typical problems. - # It also allows for a straightforward extension with parallel - # computing. - - # get the list of performance groups - qnt_samples = [] - - self.log_msg(f'Number of Performance Groups in Asset Model:' - f' {self._asmnt.asset.cmp_sample.shape[1]}', - prepend_timestamp=False) - - pg_batch = self._get_pg_batches(block_batch_size) - batches = pg_batch.index.get_level_values(0).unique() - - self.log_msg(f'Number of Component Blocks: {pg_batch["Blocks"].sum()}', - prepend_timestamp=False) - - self.log_msg(f"{len(batches)} batches of Performance Groups prepared " - "for damage assessment", - prepend_timestamp=False) - - # for PG_i in self._asmnt.asset.cmp_sample.columns: - for PGB_i in batches: - - PGB = pg_batch.loc[PGB_i] - - self.log_msg(f"Calculating damage for PG batch {PGB_i} with " - f"{int(PGB['Blocks'].sum())} blocks") - - # Generate an array with component capacities for each block and - # generate a second array that assigns a specific damage state to - # each component limit state. The latter is primarily needed to - # handle limit states with multiple, mutually exclusive DS options - capacity_sample, lsds_sample = self._generate_dmg_sample( - sample_size, PGB, scaling_specification) - - # Get the required demand types for the analysis - EDP_req = self._get_required_demand_type(PGB) - - # Create the demand vector - demand_dict = self._assemble_required_demand_data(EDP_req) - - # Evaluate the Damage State of each Component Block - ds_sample = self._evaluate_damage_state( - demand_dict, EDP_req, - capacity_sample, lsds_sample) - qnt_sample = self._prepare_dmg_quantities(PGB, ds_sample, dropzero=False) - - qnt_samples.append(qnt_sample) - - qnt_sample = pd.concat(qnt_samples, axis=1) - - # Create a comprehensive table with all possible DSs to have a robust - # input for the damage processes evaluation below - qnt_sample = self._complete_ds_cols(qnt_sample) - qnt_sample.sort_index(axis=1, inplace=True) - - self.log_msg("Raw damage calculation successful.", - prepend_timestamp=False) - - # Apply the prescribed damage process, if any - if dmg_process is not None: - self.log_msg("Applying damage processes...") - - # sort the processes - dmg_process = {key: dmg_process[key] for key in sorted(dmg_process)} - - for task in dmg_process.items(): - - self._perform_dmg_task(task, qnt_sample) - - self.log_msg("Damage processes successfully applied.", - prepend_timestamp=False) - - # If requested, remove columns with no damage from the sample - if self._asmnt.options.list_all_ds is False: - qnt_sample = qnt_sample.iloc[:, np.where(qnt_sample.sum(axis=0) != 0)[0]] - - self.sample = qnt_sample - - self.log_msg('Damage calculation successfully completed.') - - -class LossModel(PelicunModel): - """ - Parent object for loss models. - - All loss assessment methods should be children of this class. - - Parameters - ---------- - - """ - - def __init__(self, assessment): - - super().__init__(assessment) - - self._sample = None - self.loss_map = None - self.loss_params = None - self.loss_type = 'Generic' - - @property - def sample(self): - """ - sample property - """ - return self._sample - - def save_sample(self, filepath=None, save_units=False): - """ - Save loss sample to a csv file - - """ - self.log_div() - if filepath is not None: - self.log_msg('Saving loss sample...') - - cmp_units = self.loss_params[('DV', 'Unit')] - dv_units = pd.Series(index=self.sample.columns, name='Units', - dtype='object') - - for cmp_id, dv_type in cmp_units.index: - dv_units.loc[(dv_type, cmp_id)] = cmp_units.at[(cmp_id, dv_type)] - - res = file_io.save_to_csv( - self.sample, filepath, units=dv_units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - use_simpleindex=(filepath is not None), - log=self._asmnt.log) - - if filepath is not None: - self.log_msg('Loss sample successfully saved.', - prepend_timestamp=False) - return None - - # else: - units = res.loc["Units"] - res.drop("Units", inplace=True) - - if save_units: - return res.astype(float), units - - return res.astype(float) - - def load_sample(self, filepath): - """ - Load damage sample data. - - """ - self.log_div() - self.log_msg('Loading loss sample...') - - self._sample = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, log=self._asmnt.log) - - self.log_msg('Loss sample successfully loaded.', prepend_timestamp=False) - - def load_model(self, data_paths, mapping_path, decision_variables=None): - """ - Load the list of prescribed consequence models and their parameters - - Parameters - ---------- - data_paths: list of string or DataFrame - List of paths to data files with consequence model - parameters. Default XY datasets can be accessed as - PelicunDefault/XY. The list can also contain DataFrame - objects, in which case that data is used directly. - mapping_path: string - Path to a csv file that maps drivers (i.e., damage or edp data) to - loss models. - decision_variables: list of string, optional - List of decision variables to include in the analysis. If None, - all variables provided in the consequence models are included. When - a list is provided, only variables in the list will be included. - """ - - self.log_div() - self.log_msg(f'Loading loss map for {self.loss_type}...') - - loss_map = file_io.load_data( - mapping_path, None, orientation=1, reindex=False, log=self._asmnt.log - ) - - loss_map['Driver'] = loss_map.index.values - loss_map['Consequence'] = loss_map[self.loss_type] - loss_map.index = np.arange(loss_map.shape[0]) - loss_map = loss_map.loc[:, ['Driver', 'Consequence']] - loss_map.dropna(inplace=True) - - self.loss_map = loss_map - - self.log_msg("Loss map successfully parsed.", prepend_timestamp=False) - - self.log_div() - self.log_msg(f'Loading loss parameters for {self.loss_type}...') - - # replace default flag with default data path - for d_i, data_path in enumerate(data_paths): - - if 'PelicunDefault/' in data_path: - data_paths[d_i] = data_path.replace( - 'PelicunDefault/', - f'{base.pelicun_path}/resources/SimCenterDBDL/') - - data_list = [] - # load the data files one by one - for data_path in data_paths: - data = file_io.load_data( - data_path, None, orientation=1, reindex=False, log=self._asmnt.log - ) - - data_list.append(data) - - loss_params = pd.concat(data_list, axis=0) - - # drop redefinitions of components - loss_params = loss_params.groupby( - level=[0, 1]).first().transform(lambda x: x.fillna(np.nan)) - # note: .groupby introduces None entries. We replace them with - # NaN for consistency. - - # keep only the relevant data - loss_cmp = np.unique(self.loss_map['Consequence'].values) - - available_cmp = loss_params.index.unique(level=0) - missing_cmp = [] - for cmp in loss_cmp: - if cmp not in available_cmp: - missing_cmp.append(cmp) - - if len(missing_cmp) > 0: - self.log_msg("\nWARNING: The loss model does not provide " - "consequence information for the following component(s) " - f"in the loss map: {missing_cmp}. They are removed from " - "further analysis\n", - prepend_timestamp=False) - - self.loss_map = self.loss_map.loc[ - ~loss_map['Consequence'].isin(missing_cmp)] - loss_cmp = np.unique(self.loss_map['Consequence'].values) - - loss_params = loss_params.loc[idx[loss_cmp, :], :] - - # drop unused damage states - DS_list = loss_params.columns.get_level_values(0).unique() - DS_to_drop = [] - for DS in DS_list: - if np.all(pd.isna(loss_params.loc[:, idx[DS, :]].values)) is True: - DS_to_drop.append(DS) - - loss_params.drop(columns=DS_to_drop, level=0, inplace=True) - - # convert values to internal base units - for DS in loss_params.columns.unique(level=0): - if DS.startswith('DS'): - loss_params.loc[:, DS] = self.convert_marginal_params( - loss_params.loc[:, DS].copy(), - loss_params[('DV', 'Unit')], - loss_params[('Quantity', 'Unit')] - ).values - - # check for components with incomplete loss information - cmp_incomplete_list = loss_params.loc[ - loss_params[('Incomplete', '')] == 1].index - - if len(cmp_incomplete_list) > 0: - loss_params.drop(cmp_incomplete_list, inplace=True) - - self.log_msg( - "\n" - "WARNING: Loss information is incomplete for the " - f"following component(s) {cmp_incomplete_list}. " - "They were removed from the analysis." - "\n", - prepend_timestamp=False) - - # filter decision variables, if needed - if decision_variables is not None: - - loss_params = loss_params.reorder_levels([1, 0]) - - available_DVs = loss_params.index.unique(level=0) - filtered_DVs = [] - - for DV_i in decision_variables: - - if DV_i in available_DVs: - filtered_DVs.append(DV_i) - - loss_params = loss_params.loc[filtered_DVs, :].reorder_levels([1, 0]) - - self.loss_params = loss_params.sort_index(axis=1) - - self.log_msg("Loss parameters successfully parsed.", - prepend_timestamp=False) - - def aggregate_losses(self): - """ - This is placeholder method. - - The method of aggregating the Decision Variable sample is specific to - each DV and needs to be implemented in every child of the LossModel - independently. - """ - raise NotImplementedError - - def _generate_DV_sample(self, dmg_quantities, sample_size): - """ - This is placeholder method. - - The method of sampling decision variables in Decision - Variable-specific and needs to be implemented in every child - of the LossModel independently. - """ - raise NotImplementedError - - def calculate(self): - """ - Calculate the consequences of each component block damage in - the asset. - - """ - - self.log_div() - self.log_msg("Calculating losses...") - - drivers = [d for d, _ in self.loss_map['Driver']] - - if 'DMG' in drivers: - sample_size = self._asmnt.damage.sample.shape[0] - elif 'DEM' in drivers: - sample_size = self._asmnt.demand.sample.shape[0] - else: - raise ValueError( - 'Invalid loss drivers. Check the specified loss map.') - - # First, get the damaged quantities in each damage state for - # each component of interest. - dmg_q = self._asmnt.damage.sample.copy() - - # Now sample random Decision Variables - # Note that this method is DV-specific and needs to be - # implemented in every child of the LossModel independently. - self._generate_DV_sample(dmg_q, sample_size) - - self.log_msg("Loss calculation successful.") - - -class BldgRepairModel(LossModel): - """ - Manages building repair consequence assessments. - - Parameters - ---------- - - """ - - def __init__(self, assessment): - - super().__init__(assessment) - - self.loss_type = 'BldgRepair' - - # def load_model(self, data_paths, mapping_path): - - # super().load_model(data_paths, mapping_path) - - # def calculate(self): - - # super().calculate() - - def _create_DV_RVs(self, case_list): - """ - Prepare the random variables used for repair cost and time simulation. - - Parameters - ---------- - case_list: MultiIndex - Index with cmp-loc-dir-ds descriptions that identify the RVs - we need for the simulation. - - Raises - ------ - ValueError - When any Loss Driver is not recognized. - """ - - RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) - LP = self.loss_params - - # make ds the second level in the MultiIndex - case_DF = pd.DataFrame( - index=case_list.reorder_levels([0, 4, 1, 2, 3]), columns=[0, ]) - case_DF.sort_index(axis=0, inplace=True) - driver_cmps = case_list.get_level_values(0).unique() - - rv_count = 0 - - # for each loss component - for loss_cmp_id in self.loss_map.index.values: - - # load the corresponding parameters - driver_type, driver_cmp_id = self.loss_map.loc[loss_cmp_id, 'Driver'] - conseq_cmp_id = self.loss_map.loc[loss_cmp_id, 'Consequence'] - - # currently, we only support DMG-based loss calculations - # but this will be extended in the very near future - if driver_type != 'DMG': - raise ValueError(f"Loss Driver type not recognized: " - f"{driver_type}") - - # load the parameters - # TODO: remove specific DV_type references and make the code below - # generate parameters for any DV_types provided - if (conseq_cmp_id, 'Cost') in LP.index: - cost_params = LP.loc[(conseq_cmp_id, 'Cost'), :] - else: - cost_params = None - - if (conseq_cmp_id, 'Time') in LP.index: - time_params = LP.loc[(conseq_cmp_id, 'Time'), :] - else: - time_params = None - - if (conseq_cmp_id, 'Carbon') in LP.index: - carbon_params = LP.loc[(conseq_cmp_id, 'Carbon'), :] - else: - carbon_params = None - - if (conseq_cmp_id, 'Energy') in LP.index: - energy_params = LP.loc[(conseq_cmp_id, 'Energy'), :] - else: - energy_params = None - - if driver_cmp_id not in driver_cmps: - continue - - for ds in case_DF.loc[driver_cmp_id, :].index.unique(level=0): - - if ds == '0': - continue - - if cost_params is not None: - - cost_params_DS = cost_params[f'DS{ds}'] - - cost_family = cost_params_DS.get('Family', np.nan) - cost_theta = [cost_params_DS.get(f"Theta_{t_i}", np.nan) - for t_i in range(3)] - - # If the first parameter is controlled by a function, we use - # 1.0 in its place and will scale the results in a later - # step - if '|' in str(cost_theta[0]): - # if isinstance(cost_theta[0], str): - cost_theta[0] = 1.0 - - else: - cost_family = np.nan - - if time_params is not None: - - time_params_DS = time_params[f'DS{ds}'] - - time_family = time_params_DS.get('Family', np.nan) - time_theta = [time_params_DS.get(f"Theta_{t_i}", np.nan) - for t_i in range(3)] - - # If the first parameter is controlled by a function, we use - # 1.0 in its place and will scale the results in a later - # step - if '|' in str(time_theta[0]): - # if isinstance(time_theta[0], str): - time_theta[0] = 1.0 - - else: - time_family = np.nan - - if carbon_params is not None: - - carbon_params_DS = carbon_params[f'DS{ds}'] - - carbon_family = carbon_params_DS.get('Family', np.nan) - carbon_theta = [ - carbon_params_DS.get(f"Theta_{t_i}", np.nan) - for t_i in range(3) - ] - - # If the first parameter is controlled by a function, we use - # 1.0 in its place and will scale the results in a later - # step - if '|' in str(carbon_theta[0]): - # if isinstance(carbon_theta[0], str): - carbon_theta[0] = 1.0 - - else: - carbon_family = np.nan - - if energy_params is not None: - - energy_params_DS = energy_params[f'DS{ds}'] - - energy_family = energy_params_DS.get('Family', np.nan) - energy_theta = [ - energy_params_DS.get(f"Theta_{t_i}", np.nan) - for t_i in range(3) - ] - - # If the first parameter is controlled by a function, we use - # 1.0 in its place and will scale the results in a later - # step - if '|' in str(energy_theta[0]): - # if isinstance(energy_theta[0], str): - energy_theta[0] = 1.0 - - else: - energy_family = np.nan - - # If neither of the DV_types has a stochastic model assigned, - # we do not need random variables for this DS - if ( - (pd.isna(cost_family)) - and (pd.isna(time_family)) - and (pd.isna(carbon_family)) - and (pd.isna(energy_family)) - ): - continue - - # Otherwise, load the loc-dir cases - loc_dir_uid = case_DF.loc[(driver_cmp_id, ds)].index.values - - for loc, direction, uid in loc_dir_uid: - - # assign cost RV - if pd.isna(cost_family) is False: - - cost_rv_tag = ( - f'Cost-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' - ) - - RV_reg.add_RV( - uq.RandomVariable( - name=cost_rv_tag, - distribution=cost_family, - theta=cost_theta, - truncation_limits=[0., np.nan] - ) - ) - rv_count += 1 - - # assign time RV - if pd.isna(time_family) is False: - time_rv_tag = ( - f'Time-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' - ) - - RV_reg.add_RV(uq.RandomVariable( - name=time_rv_tag, - distribution=time_family, - theta=time_theta, - truncation_limits=[0., np.nan] - )) - rv_count += 1 - - # assign time RV - if pd.isna(carbon_family) is False: - carbon_rv_tag = ( - f'Carbon-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' - ) - - RV_reg.add_RV(uq.RandomVariable( - name=carbon_rv_tag, - distribution=carbon_family, - theta=carbon_theta, - truncation_limits=[0., np.nan] - )) - rv_count += 1 - - # assign time RV - if pd.isna(energy_family) is False: - energy_rv_tag = ( - f'Energy-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' - ) - - RV_reg.add_RV(uq.RandomVariable( - name=energy_rv_tag, - distribution=energy_family, - theta=energy_theta, - truncation_limits=[0., np.nan] - )) - rv_count += 1 - - # assign correlation between RVs across DV_types - # TODO: add more DV_types and handle cases with only a - # subset of them being defined - if ((pd.isna(cost_family) is False) and ( - pd.isna(time_family) is False) and ( - self._asmnt.options.rho_cost_time != 0.0)): - - rho = self._asmnt.options.rho_cost_time - - RV_reg.add_RV_set(uq.RandomVariableSet( - f'DV-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}_set', - list(RV_reg.RVs([cost_rv_tag, time_rv_tag]).values()), - np.array([[1.0, rho], [rho, 1.0]]))) - - self.log_msg(f"\n{rv_count} random variables created.", - prepend_timestamp=False) - - if rv_count > 0: - return RV_reg - # else: - return None - - def _calc_median_consequence(self, eco_qnt): - """ - Calculate the median repair consequence for each loss component. - - """ - - medians = {} - - DV_types = self.loss_params.index.unique(level=1) - - # for DV_type, DV_type_scase in zip(['COST', 'TIME'], ['Cost', 'Time']): - for DV_type in DV_types: - - cmp_list = [] - median_list = [] - - for loss_cmp_id in self.loss_map.index: - - driver_type, driver_cmp = self.loss_map.loc[ - loss_cmp_id, 'Driver'] - loss_cmp_name = self.loss_map.loc[loss_cmp_id, 'Consequence'] - - # check if the given DV type is available as an output for the - # selected component - if (loss_cmp_name, DV_type) not in self.loss_params.index: - continue - - if driver_type != 'DMG': - raise ValueError(f"Loss Driver type not recognized: " - f"{driver_type}") - - if driver_cmp not in eco_qnt.columns.get_level_values( - 0).unique(): - continue - - ds_list = [] - sub_medians = [] - - for ds in self.loss_params.columns.get_level_values(0).unique(): - - if not ds.startswith('DS'): - continue - - ds_id = ds[2:] - - if ds_id == '0': - continue - - loss_params_DS = self.loss_params.loc[ - (loss_cmp_name, DV_type), - ds] - - # check if theta_0 is defined - theta_0 = loss_params_DS.get('Theta_0', np.nan) - - if pd.isna(theta_0): - continue - - # check if the distribution type is supported - family = loss_params_DS.get('Family', np.nan) - - if ((not pd.isna(family)) and ( - family not in [ - 'normal', 'lognormal', 'deterministic'])): - raise ValueError(f"Loss Distribution of type {family} " - f"not supported.") - - # If theta_0 is a scalar - try: - theta_0 = float(theta_0) - - if pd.isna(loss_params_DS.get('Family', np.nan)): - - # if theta_0 is constant, then use it directly - f_median = prep_constant_median_DV(theta_0) - - else: - - # otherwise use a constant 1.0 as the median - # The random variable will be generated as a - # variation from this 1.0 and added in a later step. - f_median = prep_constant_median_DV(1.0) - - except ValueError: - - # otherwise, use the multilinear function - all_vals = np.array( - [val.split(',') for val in theta_0.split('|')], - dtype=float) - medns = all_vals[0] - qnts = all_vals[1] - f_median = prep_bounded_multilinear_median_DV( - medns, qnts) - - # get the corresponding aggregate damage quantities - # to consider economies of scale - if 'ds' in eco_qnt.columns.names: - - avail_ds = ( - eco_qnt.loc[:, driver_cmp].columns.unique(level=0)) - - if (ds_id not in avail_ds): - continue - - eco_qnt_i = eco_qnt.loc[:, (driver_cmp, ds_id)].copy() - - else: - eco_qnt_i = eco_qnt.loc[:, driver_cmp].copy() - - if isinstance(eco_qnt_i, pd.Series): - eco_qnt_i = eco_qnt_i.to_frame() - eco_qnt_i.columns = ['X'] - eco_qnt_i.columns.name = 'del' - - # generate the median values for each realization - eco_qnt_i.loc[:, :] = f_median(eco_qnt_i.values) - - sub_medians.append(eco_qnt_i) - ds_list.append(ds_id) - - if len(ds_list) > 0: - - # combine medians across damage states into one DF - median_list.append(pd.concat(sub_medians, axis=1, - keys=ds_list)) - cmp_list.append(loss_cmp_id) - - if len(cmp_list) > 0: - - # combine medians across components into one DF - result = pd.concat(median_list, axis=1, keys=cmp_list) - - # remove the extra column header level - if 'del' in result.columns.names: - result.columns = result.columns.droplevel('del') - - # name the remaining column header levels - if self._asmnt.options.eco_scale["AcrossFloors"] is True: - result.columns.names = ['cmp', 'ds'] - - else: - result.columns.names = ['cmp', 'ds', 'loc'] - - # save the results to the returned dictionary - medians.update({DV_type: result}) - - return medians - - def aggregate_losses(self): - """ - Aggregates repair consequences across components. - - Repair costs are simply summed up for each realization while repair - times are aggregated to provide lower and upper limits of the total - repair time using the assumption of parallel and sequential repair of - floors, respectively. Repairs within each floor are assumed to occur - sequentially. - """ - - self.log_div() - self.log_msg("Aggregating repair consequences...") - - DV = self.sample - - # group results by DV type and location - DVG = DV.groupby(level=[0, 4], axis=1).sum() - - # create the summary DF - df_agg = pd.DataFrame(index=DV.index, - columns=['repair_cost', - 'repair_time-parallel', - 'repair_time-sequential', - 'repair_carbon', - 'repair_energy']) - - if 'Cost' in DVG.columns: - df_agg['repair_cost'] = DVG['Cost'].sum(axis=1) - else: - df_agg = df_agg.drop('repair_cost', axis=1) - - if 'Time' in DVG.columns: - df_agg['repair_time-sequential'] = DVG['Time'].sum(axis=1) - - df_agg['repair_time-parallel'] = DVG['Time'].max(axis=1) - else: - df_agg = df_agg.drop(['repair_time-parallel', - 'repair_time-sequential'], - axis=1) - - if 'Carbon' in DVG.columns: - df_agg['repair_carbon'] = DVG['Carbon'].sum(axis=1) - else: - df_agg = df_agg.drop('repair_carbon', axis=1) - - if 'Energy' in DVG.columns: - df_agg['repair_energy'] = DVG['Energy'].sum(axis=1) - else: - df_agg = df_agg.drop('repair_energy', axis=1) - - # convert units - - cmp_units = self.loss_params[('DV', 'Unit')].groupby(level=[1, ]).agg( - lambda x: x.value_counts().index[0]) - - dv_units = pd.Series(index=df_agg.columns, name='Units', dtype='object') - - if 'Cost' in DVG.columns: - dv_units['repair_cost'] = cmp_units['Cost'] - - if 'Time' in DVG.columns: - dv_units['repair_time-parallel'] = cmp_units['Time'] - dv_units['repair_time-sequential'] = cmp_units['Time'] - - if 'Carbon' in DVG.columns: - dv_units['repair_carbon'] = cmp_units['Carbon'] - - if 'Energy' in DVG.columns: - dv_units['repair_energy'] = cmp_units['Energy'] - - df_agg = file_io.save_to_csv( - df_agg, None, units=dv_units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - use_simpleindex=False, - log=self._asmnt.log) - - df_agg.drop("Units", inplace=True) - - # convert header - - df_agg = base.convert_to_MultiIndex(df_agg, axis=1) - - self.log_msg("Repair consequences successfully aggregated.") - - return df_agg.astype(float) - - def _generate_DV_sample(self, dmg_quantities, sample_size): - """ - Generate a sample of repair costs and times. - - Parameters - ---------- - dmg_quantities: DataFrame - A table with the quantity of damage experienced in each damage state - of each performance group at each location and direction. You can use - the prepare_dmg_quantities method in the DamageModel to get such a - DF. - sample_size: integer - The number of realizations to generate. - - Raises - ------ - ValueError - When any Loss Driver is not recognized. - """ - - # calculate the quantities for economies of scale - self.log_msg("\nAggregating damage quantities...", - prepend_timestamp=False) - - if self._asmnt.options.eco_scale["AcrossFloors"]: - - if self._asmnt.options.eco_scale["AcrossDamageStates"]: - - eco_levels = [0, ] - eco_columns = ['cmp', ] - - else: - - eco_levels = [0, 4] - eco_columns = ['cmp', 'ds'] - - elif self._asmnt.options.eco_scale["AcrossDamageStates"]: - - eco_levels = [0, 1] - eco_columns = ['cmp', 'loc'] - - else: - - eco_levels = [0, 1, 4] - eco_columns = ['cmp', 'loc', 'ds'] - - eco_group = dmg_quantities.groupby(level=eco_levels, axis=1) - eco_qnt = eco_group.sum().mask(eco_group.count() == 0, np.nan) - assert eco_qnt.columns.names == eco_columns - - self.log_msg("Successfully aggregated damage quantities.", - prepend_timestamp=False) - - # apply the median functions, if needed, to get median consequences for - # each realization - self.log_msg("\nCalculating the median repair consequences...", - prepend_timestamp=False) - - medians = self._calc_median_consequence(eco_qnt) - - self.log_msg("Successfully determined median repair consequences.", - prepend_timestamp=False) - - # combine the median consequences with the samples of deviation from the - # median to get the consequence realizations. - self.log_msg("\nConsidering deviations from the median values to obtain " - "random DV sample...") - - self.log_msg("Preparing random variables for repair cost and time...", - prepend_timestamp=False) - RV_reg = self._create_DV_RVs(dmg_quantities.columns) - - if RV_reg is not None: - RV_reg.generate_sample( - sample_size=sample_size, method=self._asmnt.options.sampling_method) - - std_sample = base.convert_to_MultiIndex( - pd.DataFrame(RV_reg.RV_sample), axis=1).sort_index(axis=1) - std_sample.columns.names = ['dv', 'cmp', 'ds', 'loc', 'dir', 'uid'] - - # convert column names to int - std_idx = std_sample.columns.levels - - std_sample.columns = std_sample.columns.set_levels( - [ - std_idx[0], - std_idx[1].astype(int), - std_idx[2], - std_idx[3], - std_idx[4], - std_idx[5], - ] - ) - - std_sample.sort_index(axis=1, inplace=True) - - else: - std_sample = None - - self.log_msg(f"\nSuccessfully generated {sample_size} realizations of " - "deviation from the median consequences.", - prepend_timestamp=False) - - res_list = [] - key_list = [] - - dmg_quantities.columns = dmg_quantities.columns.reorder_levels( - [0, 4, 1, 2, 3] - ) - dmg_quantities.sort_index(axis=1, inplace=True) - - DV_types = self.loss_params.index.unique(level=1) - - if isinstance(std_sample, pd.DataFrame): - std_DV_types = std_sample.columns.unique(level=0) - else: - std_DV_types = [] - - # for DV_type, _ in zip(['COST', 'TIME'], ['Cost', 'Time']): - for DV_type in DV_types: - - if DV_type in std_DV_types: - prob_cmp_list = std_sample[DV_type].columns.unique(level=0) - else: - prob_cmp_list = [] - - cmp_list = [] - - if DV_type not in medians: - continue - - for cmp_i in medians[DV_type].columns.unique(level=0): - - # check if there is damage in the component - driver_type, dmg_cmp_i = self.loss_map.loc[cmp_i, 'Driver'] - loss_cmp_i = self.loss_map.loc[cmp_i, 'Consequence'] - - if driver_type != 'DMG': - raise ValueError(f"Loss Driver type not " - f"recognized: {driver_type}") - - if not (dmg_cmp_i - in dmg_quantities.columns.unique(level=0)): - continue - - ds_list = [] - - for ds in medians[DV_type].loc[:, cmp_i].columns.unique(level=0): - - loc_list = [] - - for loc_id, loc in enumerate( - dmg_quantities.loc[ - :, (dmg_cmp_i, ds)].columns.unique(level=0)): - - if ((self._asmnt.options.eco_scale[ - "AcrossFloors"] is True) and ( - loc_id > 0)): - break - - if self._asmnt.options.eco_scale["AcrossFloors"] is True: - median_i = medians[DV_type].loc[:, (cmp_i, ds)] - dmg_i = dmg_quantities.loc[:, (dmg_cmp_i, ds)] - - if cmp_i in prob_cmp_list: - std_i = std_sample.loc[:, (DV_type, cmp_i, ds)] - else: - std_i = None - - else: - median_i = medians[DV_type].loc[:, (cmp_i, ds, loc)] - dmg_i = dmg_quantities.loc[:, (dmg_cmp_i, ds, loc)] - - if cmp_i in prob_cmp_list: - std_i = std_sample.loc[:, (DV_type, cmp_i, ds, loc)] - else: - std_i = None - - if std_i is not None: - res_list.append(dmg_i.mul(median_i, axis=0) * std_i) - else: - res_list.append(dmg_i.mul(median_i, axis=0)) - - loc_list.append(loc) - - if self._asmnt.options.eco_scale["AcrossFloors"] is True: - ds_list += [ds, ] - else: - ds_list += [(ds, loc) for loc in loc_list] - - if self._asmnt.options.eco_scale["AcrossFloors"] is True: - cmp_list += [(loss_cmp_i, dmg_cmp_i, ds) for ds in ds_list] - else: - cmp_list += [ - (loss_cmp_i, dmg_cmp_i, ds, loc) for ds, loc in ds_list] - - if self._asmnt.options.eco_scale["AcrossFloors"] is True: - key_list += [(DV_type, loss_cmp_i, dmg_cmp_i, ds) - for loss_cmp_i, dmg_cmp_i, ds in cmp_list] - else: - key_list += [(DV_type, loss_cmp_i, dmg_cmp_i, ds, loc) - for loss_cmp_i, dmg_cmp_i, ds, loc in cmp_list] - - lvl_names = ['dv', 'loss', 'dmg', 'ds', 'loc', 'dir', 'uid'] - DV_sample = pd.concat(res_list, axis=1, keys=key_list, - names=lvl_names) - - DV_sample = DV_sample.fillna(0).convert_dtypes() - DV_sample.columns.names = lvl_names - - # Get the flags for replacement consequence trigger - DV_sum = DV_sample.groupby(level=[1, ], axis=1).sum() - if 'replacement' in DV_sum.columns: - - # When the 'replacement' consequence is triggered, all - # local repair consequences are discarded. Note that - # global consequences are assigned to location '0'. - - id_replacement = DV_sum['replacement'] > 0 - - # get the list of non-zero locations - locs = DV_sample.columns.get_level_values(4).unique().values - - locs = locs[locs != '0'] - - DV_sample.loc[id_replacement, idx[:, :, :, :, locs]] = 0.0 - - self._sample = DV_sample - - self.log_msg("Successfully obtained DV sample.", - prepend_timestamp=False) - - -def prep_constant_median_DV(median): - """ - Returns a constant median Decision Variable (DV) function. - - Parameters - ---------- - median: float - The median DV for a consequence function with fixed median. - - Returns - ------- - f: callable - A function that returns the constant median DV for all component - quantities. - """ - def f(*args): - # pylint: disable=unused-argument - return median - - return f - - -def prep_bounded_multilinear_median_DV(medians, quantities): - """ - Returns a bounded multilinear median Decision Variable (DV) function. - - The median DV equals the min and max values when the quantity is - outside of the prescribed quantity bounds. When the quantity is within the - bounds, the returned median is calculated by linear interpolation. - - Parameters - ---------- - medians: ndarray - Series of values that define the y coordinates of the multilinear DV - function. - quantities: ndarray - Series of values that define the component quantities corresponding to - the series of medians and serving as the x coordinates of the - multilinear DV function. - - Returns - ------- - f: callable - A function that returns the median DV given the quantity of damaged - components. - """ - def f(quantity): - if quantity is None: - raise ValueError( - 'A bounded linear median Decision Variable function called ' - 'without specifying the quantity of damaged components') - - q_array = np.asarray(quantity, dtype=np.float64) - - # calculate the median consequence given the quantity of damaged - # components - output = np.interp(q_array, quantities, medians) - - return output - - return f diff --git a/pelicun/model/__init__.py b/pelicun/model/__init__.py new file mode 100644 index 000000000..f30053fdd --- /dev/null +++ b/pelicun/model/__init__.py @@ -0,0 +1,49 @@ +""" +-*- coding: utf-8 -*- + +Copyright (c) 2018 Leland Stanford Junior University +Copyright (c) 2018 The Regents of the University of California + +This file is part of pelicun. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +You should have received a copy of the BSD 3-Clause License along with +pelicun. If not, see . + +Contributors: +Adam Zsarnóczay +""" + +# flake8: noqa + +from .pelicun_model import PelicunModel +from .demand_model import DemandModel +from .asset_model import AssetModel +from .damage_model import DamageModel +from .loss_model import LossModel +from .loss_model import BldgRepairModel diff --git a/pelicun/model/asset_model.py b/pelicun/model/asset_model.py new file mode 100644 index 000000000..6c4ab94ea --- /dev/null +++ b/pelicun/model/asset_model.py @@ -0,0 +1,436 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +This file defines the AssetModel object and its methods. + +.. rubric:: Contents + +.. autosummary:: + + AssetModel + +""" + +from itertools import product +import numpy as np +import pandas as pd +from .pelicun_model import PelicunModel +from .. import base +from .. import uq +from .. import file_io + + +idx = base.idx + + +class AssetModel(PelicunModel): + """ + Manages asset information used in assessments. + + Parameters + ---------- + + """ + + def __init__(self, assessment): + + super().__init__(assessment) + + self.cmp_marginal_params = None + self.cmp_units = None + + self._cmp_RVs = None + self._cmp_sample = None + + @property + def cmp_sample(self): + """ + Assigns the _cmp_sample attribute if it is None and returns + the component sample. + """ + + if self._cmp_sample is None: + + cmp_sample = pd.DataFrame(self._cmp_RVs.RV_sample) + cmp_sample.sort_index(axis=0, inplace=True) + cmp_sample.sort_index(axis=1, inplace=True) + + cmp_sample = base.convert_to_MultiIndex(cmp_sample, axis=1)['CMP'] + + cmp_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + self._cmp_sample = cmp_sample + + else: + cmp_sample = self._cmp_sample + + return cmp_sample + + def save_cmp_sample(self, filepath=None, save_units=False): + """ + Save component quantity sample to a csv file + + """ + + self.log_div() + if filepath is not None: + self.log_msg('Saving asset components sample...') + + # prepare a units array + sample = self.cmp_sample + + units = pd.Series(name='Units', index=sample.columns, dtype=object) + + for cmp_id, unit_name in self.cmp_units.items(): + units.loc[cmp_id, :] = unit_name + + res = file_io.save_to_csv( + sample, filepath, units=units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + use_simpleindex=(filepath is not None), + log=self._asmnt.log) + + if filepath is not None: + self.log_msg('Asset components sample successfully saved.', + prepend_timestamp=False) + return None + # else: + units = res.loc["Units"] + res.drop("Units", inplace=True) + + if save_units: + return res.astype(float), units + + return res.astype(float) + + def load_cmp_sample(self, filepath): + """ + Load component quantity sample from a csv file + + """ + + self.log_div() + self.log_msg('Loading asset components sample...') + + sample, units = file_io.load_data( + filepath, self._asmnt.unit_conversion_factors, + return_units=True, log=self._asmnt.log) + + sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + self._cmp_sample = sample + + self.cmp_units = units.groupby(level=0).first() + + self.log_msg('Asset components sample successfully loaded.', + prepend_timestamp=False) + + def load_cmp_model(self, data_source): + """ + Load the model that describes component quantities in the asset. + + Parameters + ---------- + data_source: string or dict + If string, the data_source is a file prefix ( in the + following description) that identifies the following files: + _marginals.csv, _empirical.csv, + _correlation.csv. If dict, the data source is a dictionary + with the following optional keys: 'marginals', 'empirical', and + 'correlation'. The value under each key shall be a DataFrame. + """ + + def get_locations(loc_str): + + try: + res = str(int(loc_str)) + return np.array([res, ]) + + except ValueError as exc: + + stories = self._asmnt.stories + + if "--" in loc_str: + s_low, s_high = loc_str.split('--') + s_low = get_locations(s_low) + s_high = get_locations(s_high) + return np.arange(int(s_low[0]), int(s_high[0]) + 1).astype(str) + + if "," in loc_str: + return np.array(loc_str.split(','), dtype=int).astype(str) + + if loc_str == "all": + return np.arange(1, stories + 1).astype(str) + + if loc_str == "top": + return np.array([stories, ]).astype(str) + + if loc_str == "roof": + return np.array([stories + 1, ]).astype(str) + + raise ValueError(f"Cannot parse location string: " + f"{loc_str}") from exc + + def get_directions(dir_str): + + if pd.isnull(dir_str): + return np.ones(1).astype(str) + + # else: + try: + res = str(int(dir_str)) + return np.array([res, ]) + + except ValueError as exc: + + if "," in dir_str: + return np.array(dir_str.split(','), dtype=int).astype(str) + + if "--" in dir_str: + d_low, d_high = dir_str.split('--') + d_low = get_directions(d_low) + d_high = get_directions(d_high) + return np.arange( + int(d_low[0]), int(d_high[0]) + 1).astype(str) + + # else: + raise ValueError(f"Cannot parse direction string: " + f"{dir_str}") from exc + + def get_attribute(attribute_str, dtype=float, default=np.nan): + + if pd.isnull(attribute_str): + return default + + # else: + + try: + + res = dtype(attribute_str) + return res + + except ValueError as exc: + + if "," in attribute_str: + # a list of weights + w = np.array(attribute_str.split(','), dtype=float) + + # return a normalized vector + return w / np.sum(w) + + # else: + raise ValueError(f"Cannot parse Blocks string: " + f"{attribute_str}") from exc + + self.log_div() + self.log_msg('Loading component model...') + + # Currently, we assume independent component distributions are defined + # throughout the building. Correlations may be added afterward or this + # method can be extended to read correlation matrices too if needed. + + # prepare the marginal data source variable to load the data + if isinstance(data_source, dict): + marginal_data_source = data_source['marginals'] + else: + marginal_data_source = data_source + '_marginals.csv' + + marginal_params, units = file_io.load_data( + marginal_data_source, + None, + orientation=1, + reindex=False, + return_units=True, + log=self._asmnt.log, + ) + + # group units by cmp id to avoid redundant entries + self.cmp_units = units.copy().groupby(level=0).first() + + marginal_params = pd.concat([marginal_params, units], axis=1) + + cmp_marginal_param_dct = { + 'Family': [], 'Theta_0': [], 'Theta_1': [], 'Theta_2': [], + 'TruncateLower': [], 'TruncateUpper': [], 'Blocks': [], + 'Units': [] + } + index_list = [] + for row in marginal_params.itertuples(): + locs = get_locations(row.Location) + dirs = get_directions(row.Direction) + indices = list(product((row.Index, ), locs, dirs)) + num_vals = len(indices) + for col, cmp_marginal_param in cmp_marginal_param_dct.items(): + if col == 'Blocks': + cmp_marginal_param.extend( + [ + get_attribute( + getattr(row, 'Blocks', np.nan), + dtype=int, + default=1.0, + ) + ] + * num_vals + ) + elif col == 'Units': + cmp_marginal_param.extend( + [self.cmp_units[row.Index]] * num_vals + ) + elif col == 'Family': + cmp_marginal_param.extend( + [getattr(row, col, np.nan)] * num_vals + ) + else: + cmp_marginal_param.extend( + [get_attribute(getattr(row, col, np.nan))] * num_vals + ) + index_list.extend(indices) + index = pd.MultiIndex.from_tuples(index_list, names=['cmp', 'loc', 'dir']) + dtypes = { + 'Family': object, 'Theta_0': float, 'Theta_1': float, + 'Theta_2': float, 'TruncateLower': float, + 'TruncateUpper': float, 'Blocks': int, 'Units': object + } + cmp_marginal_param_series = [] + for col, cmp_marginal_param in cmp_marginal_param_dct.items(): + cmp_marginal_param_series.append( + pd.Series( + cmp_marginal_param, + dtype=dtypes[col], name=col, index=index)) + + cmp_marginal_params = pd.concat( + cmp_marginal_param_series, axis=1 + ) + + assert not cmp_marginal_params['Theta_0'].isnull().values.any() + + cmp_marginal_params.dropna(axis=1, how='all', inplace=True) + + self.log_msg("Model parameters successfully parsed. " + f"{cmp_marginal_params.shape[0]} performance groups identified", + prepend_timestamp=False) + + # Now we can take care of converting the values to base units + self.log_msg("Converting model parameters to internal units...", + prepend_timestamp=False) + + # ensure that the index has unique entries by introducing an + # internal component uid + base.dedupe_index(cmp_marginal_params) + + cmp_marginal_params = self.convert_marginal_params( + cmp_marginal_params, cmp_marginal_params['Units'] + ) + + self.cmp_marginal_params = cmp_marginal_params.drop('Units', axis=1) + + self.log_msg("Model parameters successfully loaded.", + prepend_timestamp=False) + + self.log_msg("\nComponent model marginal distributions:\n" + + str(cmp_marginal_params), + prepend_timestamp=False) + + # the empirical data and correlation files can be added later, if needed + + def _create_cmp_RVs(self): + """ + Defines the RVs used for sampling component quantities. + """ + + # initialize the registry + RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) + + # add a random variable for each component quantity variable + for rv_params in self.cmp_marginal_params.itertuples(): + + cmp = rv_params.Index + + # create a random variable and add it to the registry + RV_reg.add_RV(uq.RandomVariable( + name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', + distribution=getattr(rv_params, "Family", np.nan), + theta=[getattr(rv_params, f"Theta_{t_i}", np.nan) + for t_i in range(3)], + truncation_limits=[getattr(rv_params, f"Truncate{side}", np.nan) + for side in ("Lower", "Upper")], + )) + + self.log_msg(f"\n{self.cmp_marginal_params.shape[0]} " + "random variables created.", + prepend_timestamp=False) + + self._cmp_RVs = RV_reg + + def generate_cmp_sample(self, sample_size=None): + """ + Generates component quantity realizations. If a sample_size + is not specified, the sample size found in the demand model is + used. + """ + + if self.cmp_marginal_params is None: + raise ValueError('Model parameters have not been specified. Load' + 'parameters from a file before generating a ' + 'sample.') + + self.log_div() + self.log_msg('Generating sample from component quantity variables...') + + if sample_size is None: + if self._asmnt.demand.sample is None: + raise ValueError( + 'Sample size was not specified, ' + 'and it cannot be determined from ' + 'the demand model.') + sample_size = self._asmnt.demand.sample.shape[0] + + self._create_cmp_RVs() + + self._cmp_RVs.generate_sample( + sample_size=sample_size, + method=self._asmnt.options.sampling_method) + + # replace the potentially existing sample with the generated one + self._cmp_sample = None + + self.log_msg(f"\nSuccessfully generated {sample_size} realizations.", + prepend_timestamp=False) diff --git a/pelicun/model/damage_model.py b/pelicun/model/damage_model.py new file mode 100644 index 000000000..0035e3f3b --- /dev/null +++ b/pelicun/model/damage_model.py @@ -0,0 +1,1590 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +This file defines the DamageModel object and its methods. + +.. rubric:: Contents + +.. autosummary:: + + DamageModel + +""" + +from copy import deepcopy +import numpy as np +import pandas as pd +from .pelicun_model import PelicunModel +from .. import base +from .. import uq +from .. import file_io + + +idx = base.idx + + +class DamageModel(PelicunModel): + """ + Manages damage information used in assessments. + + This class contains the following methods: + + - save_sample() + - load_sample() + - load_damage_model() + - calculate() + - _get_pg_batches() + - _generate_dmg_sample() + - _create_dmg_rvs() + - _get_required_demand_type() + - _assemble_required_demand_data() + - _evaluate_damage_state() + - _prepare_dmg_quantities() + - _perform_dmg_task() + - _apply_dmg_funcitons() + + Parameters + ---------- + + """ + + def __init__(self, assessment): + + super().__init__(assessment) + + self.damage_params = None + self.sample = None + + def save_sample(self, filepath=None, save_units=False): + """ + Save damage sample to a csv file + + """ + self.log_div() + self.log_msg('Saving damage sample...') + + cmp_units = self._asmnt.asset.cmp_units + qnt_units = pd.Series(index=self.sample.columns, name='Units', + dtype='object') + for cmp in cmp_units.index: + qnt_units.loc[cmp] = cmp_units.loc[cmp] + + res = file_io.save_to_csv( + self.sample, filepath, + units=qnt_units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + use_simpleindex=(filepath is not None), + log=self._asmnt.log) + + if filepath is not None: + self.log_msg('Damage sample successfully saved.', + prepend_timestamp=False) + return None + + # else: + units = res.loc["Units"] + res.drop("Units", inplace=True) + res.index = res.index.astype('int64') + + if save_units: + return res.astype(float), units + + return res.astype(float) + + def load_sample(self, filepath): + """ + Load damage state sample data. + + """ + self.log_div() + self.log_msg('Loading damage sample...') + + self.sample = file_io.load_data( + filepath, self._asmnt.unit_conversion_factors, + log=self._asmnt.log) + + # set the names of the columns + self.sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'ds'] + + self.log_msg('Damage sample successfully loaded.', + prepend_timestamp=False) + + def load_damage_model(self, data_paths): + """ + Load limit state damage model parameters and damage state assignments + + Parameters + ---------- + data_paths: list of string + List of paths to data files with damage model information. Default + XY datasets can be accessed as PelicunDefault/XY. + """ + + self.log_div() + self.log_msg('Loading damage model...') + + # replace default flag with default data path + for d_i, data_path in enumerate(data_paths): + + if 'PelicunDefault/' in data_path: + data_paths[d_i] = data_path.replace( + 'PelicunDefault/', + f'{base.pelicun_path}/resources/SimCenterDBDL/', + ) + + data_list = [] + # load the data files one by one + for data_path in data_paths: + + data = file_io.load_data( + data_path, None, orientation=1, reindex=False, log=self._asmnt.log + ) + + data_list.append(data) + + damage_params = pd.concat(data_list, axis=0) + + # drop redefinitions of components + damage_params = damage_params.groupby(damage_params.index).first() + + # get the component types defined in the asset model + cmp_labels = self._asmnt.asset.cmp_sample.columns + + # only keep the damage model parameters for the components in the model + cmp_unique = cmp_labels.unique(level=0) + cmp_mask = damage_params.index.isin(cmp_unique, level=0) + + damage_params = damage_params.loc[cmp_mask, :] + + if np.sum(cmp_mask) != len(cmp_unique): + + cmp_list = cmp_unique[ + np.isin(cmp_unique, damage_params.index.values, + invert=True)].to_list() + + self.log_msg("\nWARNING: The damage model does not provide " + "vulnerability information for the following component(s) " + f"in the asset model: {cmp_list}.\n", + prepend_timestamp=False) + + # TODO: load defaults for Demand-Offset and Demand-Directional + + # Now convert model parameters to base units + for LS_i in damage_params.columns.unique(level=0): + if LS_i.startswith('LS'): + + damage_params.loc[:, LS_i] = self.convert_marginal_params( + damage_params.loc[:, LS_i].copy(), + damage_params[('Demand', 'Unit')], + ).values + + # check for components with incomplete damage model information + cmp_incomplete_list = damage_params.loc[ + damage_params[('Incomplete', '')] == 1].index + + damage_params.drop(cmp_incomplete_list, inplace=True) + + if len(cmp_incomplete_list) > 0: + self.log_msg(f"\nWARNING: Damage model information is incomplete for " + f"the following component(s) {cmp_incomplete_list}. They " + f"were removed from the analysis.\n", + prepend_timestamp=False) + + self.damage_params = damage_params + + self.log_msg("Damage model parameters successfully parsed.", + prepend_timestamp=False) + + def _handle_operation(self, initial_value, operation, other_value): + """ + This method is used in `_create_dmg_RVs` to apply capacity + adjustment operations whenever required. It is defined as a + safer alternative to directly using `eval`. + + Parameters + ---------- + initial_value: float + Value before operation + operation: str + Any of +, -, *, / + other_value: float + Value used to apply the operation + + Returns + ------- + result: float + The result of the operation + + """ + if operation == '+': + return initial_value + other_value + if operation == '-': + return initial_value - other_value + if operation == '*': + return initial_value * other_value + if operation == '/': + return initial_value / other_value + raise ValueError(f'Invalid operation: {operation}') + + def _create_dmg_RVs(self, PGB, scaling_specification=None): + """ + Creates random variables required later for the damage calculation. + + The method initializes two random variable registries, + capacity_RV_reg and lsds_RV_reg, and loops through each + performance group in the input performance group block (PGB) + dataframe. For each performance group, it retrieves the + component sample and blocks and checks if the limit state is + defined for the component. If the limit state is defined, the + method gets the list of limit states and the parameters for + each limit state. The method assigns correlation between limit + state random variables, adds the limit state random variables + to the capacity_RV_reg registry, and adds LSDS assignments to + the lsds_RV_reg registry. After looping through all + performance groups, the method returns the two registries. + + Parameters + ---------- + PGB : DataFrame + A DataFrame that groups performance groups into batches + for efficient damage assessment. + scaling_specification: dict, optional + A dictionary defining the shift in median. + Example: {'CMP-1-1': '*1.2', 'CMP-1-2': '/1.4'} + The keys are individual components that should be present + in the `capacity_sample`. The values should be strings + containing an operation followed by the value formatted as + a float. The operation can be '+' for addition, '-' for + subtraction, '*' for multiplication, and '/' for division. + + """ + + def assign_lsds(ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag): + """ + Prepare random variables to handle mutually exclusive damage states. + + """ + + # If the limit state has a single damage state assigned + # to it, we don't need random sampling + if pd.isnull(ds_weights): + + ds_id += 1 + + lsds_RV_reg.add_RV( + uq.RandomVariable( + name=lsds_rv_tag, + distribution='deterministic', + theta=ds_id, + ) + ) + + # Otherwise, we create a multinomial random variable + else: + + # parse the DS weights + ds_weights = np.array( + ds_weights.replace(" ", "").split('|'), dtype=float + ) + + def map_ds(values, offset=int(ds_id + 1)): + return values + offset + + lsds_RV_reg.add_RV( + uq.RandomVariable( + name=lsds_rv_tag, + distribution='multinomial', + theta=ds_weights, + f_map=map_ds, + ) + ) + + ds_id += len(ds_weights) + + return ds_id + + if self._asmnt.log.verbose: + self.log_msg('Generating capacity variables ...', prepend_timestamp=True) + + # initialize the registry + capacity_RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) + lsds_RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) + + # capacity adjustment: + # ensure the scaling_specification is a dictionary + if not scaling_specification: + scaling_specification = {} + else: + # if there are contents, ensure they are valid. + # See docstring for an example of what is expected. + parsed_scaling_specification = {} + # validate contents + for key, value in scaling_specification.items(): + css = 'capacity adjustment specification' + if not isinstance(value, str): + raise ValueError( + f'Invalud entry in {css}: {value}. It has to be a string. ' + f'See docstring of DamageModel._create_dmg_RVs.' + ) + capacity_adjustment_operation = value[0] + number = value[1::] + if capacity_adjustment_operation not in ('+', '-', '*', '/'): + raise ValueError( + f'Invalid operation in {css}: ' + f'{capacity_adjustment_operation}' + ) + fnumber = base.float_or_None(number) + if fnumber is None: + raise ValueError(f'Invalid number in {css}: {number}') + parsed_scaling_specification[key] = ( + capacity_adjustment_operation, + fnumber, + ) + scaling_specification = parsed_scaling_specification + + # get the component sample and blocks from the asset model + for PG in PGB.index: + + # determine demand capacity adjustment operation, if required + cmp_loc_dir = '-'.join(PG[0:3]) + capacity_adjustment_operation = scaling_specification.get( + cmp_loc_dir, None + ) + + cmp_id = PG[0] + blocks = PGB.loc[PG, 'Blocks'] + + # if the number of blocks is provided, calculate the weights + if np.atleast_1d(blocks).shape[0] == 1: + blocks = np.full(int(blocks), 1.0 / blocks) + # otherwise, assume that the list contains the weights + + # initialize the damaged quantity sample variable + + assert self.damage_params is not None + if cmp_id in self.damage_params.index: + + frg_params = self.damage_params.loc[cmp_id, :] + + # get the list of limit states + limit_states = [] + + for val in frg_params.index.get_level_values(0).unique(): + if 'LS' in val: + limit_states.append(val[2:]) + + ds_id = 0 + + frg_rv_set_tags = [[] for b in blocks] + anchor_RVs = [] + + for ls_id in limit_states: + + frg_params_LS = frg_params[f'LS{ls_id}'] + + theta_0 = frg_params_LS.get('Theta_0', np.nan) + family = frg_params_LS.get('Family', np.nan) + ds_weights = frg_params_LS.get('DamageStateWeights', np.nan) + + # check if the limit state is defined for the component + if pd.isna(theta_0): + continue + + theta = [ + frg_params_LS.get(f"Theta_{t_i}", np.nan) for t_i in range(3) + ] + + if capacity_adjustment_operation: + if family in {'normal', 'lognormal'}: + theta[0] = self._handle_operation( + theta[0], + capacity_adjustment_operation[0], + capacity_adjustment_operation[1], + ) + else: + self.log_msg( + f'\nWARNING: Capacity adjustment is only supported ' + f'for `normal` or `lognormal` distributions. ' + f'Ignoring: {cmp_loc_dir}, which is {family}', + prepend_timestamp=False, + ) + + tr_lims = [ + frg_params_LS.get(f"Truncate{side}", np.nan) + for side in ("Lower", "Upper") + ] + + for block_i, _ in enumerate(blocks): + + frg_rv_tag = ( + 'FRG-' + f'{PG[0]}-' # cmp_id + f'{PG[1]}-' # loc + f'{PG[2]}-' # dir + f'{PG[3]}-' # uid + f'{block_i+1}-' # block + f'{ls_id}' + ) + + # Assign correlation between limit state random + # variables + # Note that we assume perfectly correlated limit + # state random variables here. This approach is in + # line with how mainstream PBE calculations are + # performed. Assigning more sophisticated + # correlations between limit state RVs is possible, + # if needed. Please let us know through the + # SimCenter Message Board if you are interested in + # such a feature. + # Anchor all other limit state random variables to + # the first one to consider the perfect correlation + # between capacities in each LS + if ls_id == limit_states[0]: + anchor = None + else: + anchor = anchor_RVs[block_i] + + # parse theta values for multilinear_CDF + if family == 'multilinear_CDF': + theta = np.column_stack( + ( + np.array( + theta[0].split('|')[0].split(','), + dtype=float, + ), + np.array( + theta[0].split('|')[1].split(','), + dtype=float, + ), + ) + ) + + RV = uq.RandomVariable( + name=frg_rv_tag, + distribution=family, + theta=theta, + truncation_limits=tr_lims, + anchor=anchor, + ) + + capacity_RV_reg.add_RV(RV) + + # add the RV to the set of correlated variables + frg_rv_set_tags[block_i].append(frg_rv_tag) + + if ls_id == limit_states[0]: + anchor_RVs.append(RV) + + # Now add the LS->DS assignments + lsds_rv_tag = ( + 'LSDS-' + f'{PG[0]}-' # cmp_id + f'{PG[1]}-' # loc + f'{PG[2]}-' # dir + f'{PG[3]}-' # uid + f'{block_i+1}-' # block + f'{ls_id}' + ) + + ds_id_next = assign_lsds( + ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag + ) + + ds_id = ds_id_next + + if self._asmnt.log.verbose: + rv_count = len(lsds_RV_reg.RV) + self.log_msg( + f"2x{rv_count} random variables created.", prepend_timestamp=False + ) + + return capacity_RV_reg, lsds_RV_reg + + def _generate_dmg_sample(self, sample_size, PGB, scaling_specification=None): + """ + This method generates a damage sample by creating random + variables (RVs) for capacities and limit-state-damage-states + (lsds), and then sampling from these RVs. The sample size and + performance group batches (PGB) are specified as inputs. The + method returns the capacity sample and the lsds sample. + + Parameters + ---------- + sample_size : int + The number of realizations to generate. + PGB : DataFrame + A DataFrame that groups performance groups into batches + for efficient damage assessment. + scaling_specification: dict, optional + A dictionary defining the shift in median. + Example: {'CMP-1-1': '*1.2', 'CMP-1-2': '/1.4'} + The keys are individual components that should be present + in the `capacity_sample`. The values should be strings + containing an operation followed by the value formatted as + a float. The operation can be '+' for addition, '-' for + subtraction, '*' for multiplication, and '/' for division. + + Returns + ------- + capacity_sample : DataFrame + A DataFrame that represents the capacity sample. + lsds_sample : DataFrame + A DataFrame that represents the . + + Raises + ------ + ValueError + If the damage parameters have not been specified. + + """ + + # Check if damage model parameters have been specified + if self.damage_params is None: + raise ValueError('Damage model parameters have not been specified. ' + 'Load parameters from the default damage model ' + 'databases or provide your own damage model ' + 'definitions before generating a sample.') + + # Create capacity and LSD RVs for each performance group + capacity_RVs, lsds_RVs = self._create_dmg_RVs(PGB, scaling_specification) + + if self._asmnt.log.verbose: + self.log_msg('Sampling capacities...', + prepend_timestamp=True) + + # Generate samples for capacity RVs + capacity_RVs.generate_sample( + sample_size=sample_size, + method=self._asmnt.options.sampling_method) + + # Generate samples for LSD RVs + lsds_RVs.generate_sample( + sample_size=sample_size, + method=self._asmnt.options.sampling_method) + + if self._asmnt.log.verbose: + self.log_msg("Raw samples are available", + prepend_timestamp=True) + + # get the capacity and lsds samples + capacity_sample = pd.DataFrame( + capacity_RVs.RV_sample).sort_index( + axis=0).sort_index(axis=1) + capacity_sample = base.convert_to_MultiIndex( + capacity_sample, axis=1)['FRG'] + capacity_sample.columns.names = [ + 'cmp', 'loc', 'dir', 'uid', 'block', 'ls'] + + lsds_sample = pd.DataFrame( + lsds_RVs.RV_sample).sort_index( + axis=0).sort_index(axis=1).astype(int) + lsds_sample = base.convert_to_MultiIndex( + lsds_sample, axis=1)['LSDS'] + lsds_sample.columns.names = [ + 'cmp', 'loc', 'dir', 'uid', 'block', 'ls'] + + if self._asmnt.log.verbose: + self.log_msg( + f"Successfully generated {sample_size} realizations.", + prepend_timestamp=True) + + return capacity_sample, lsds_sample + + def _get_required_demand_type(self, PGB): + """ + Returns the id of the demand needed to calculate damage to a + component. We assume that a damage model sample is available. + + This method returns the demand type and its properties + required to calculate the damage to a component. The + properties include whether the demand is directional, the + offset, and the type of the demand. The method takes as input + a dataframe PGB that contains information about the component + groups in the asset. For each component group PG in the PGB + dataframe, the method retrieves the relevant damage parameters + from the damage_params dataframe and parses the demand type + into its properties. If the demand type has a subtype, the + method splits it and adds the subtype to the demand type to + form the EDP (engineering demand parameter) type. The method + also considers the default offset for the demand type, if it + is specified in the options attribute of the assessment, and + adds the offset to the EDP. If the demand is directional, the + direction is added to the EDP. The method collects all the + unique EDPs for each component group and returns them as a + dictionary where each key is an EDP and its value is a list of + component groups that require that EDP. + + Parameters + ---------- + `PGB`: pd.DataFrame + A pandas DataFrame with the block information for + each component + + Returns + ------- + EDP_req: dict + A dictionary of EDP requirements, where each key is the EDP + string (e.g., "Peak Ground Acceleration-0-0"), and the + corresponding value is a list of tuples (component_id, + location, direction) + + """ + + # Assign the damage_params attribute to a local variable `DP` + DP = self.damage_params + + # Check if verbose logging is enabled in `self._asmnt.log` + if self._asmnt.log.verbose: + # If verbose logging is enabled, log a message indicating + # that we are collecting demand information + self.log_msg('Collecting required demand information...', + prepend_timestamp=True) + + # Initialize an empty dictionary to store the unique EDP + # requirements + EDP_req = {} + + # Iterate over the index of the `PGB` DataFrame + for PG in PGB.index: + # Get the component name from the first element of the + # `PG` tuple + cmp = PG[0] + + # Get the directional, offset, and demand_type parameters + # from the `DP` DataFrame + directional, offset, demand_type = DP.loc[ + cmp, [('Demand', 'Directional'), + ('Demand', 'Offset'), + ('Demand', 'Type')]] + + # Parse the demand type + + # Check if there is a subtype included in the demand_type + # string + if '|' in demand_type: + # If there is a subtype, split the demand_type string + # on the '|' character + demand_type, subtype = demand_type.split('|') + # Convert the demand type to the corresponding EDP + # type using `base.EDP_to_demand_type` + demand_type = base.EDP_to_demand_type[demand_type] + # Concatenate the demand type and subtype to form the + # EDP type + EDP_type = f'{demand_type}_{subtype}' + else: + # If there is no subtype, convert the demand type to + # the corresponding EDP type using + # `base.EDP_to_demand_type` + demand_type = base.EDP_to_demand_type[demand_type] + # Assign the EDP type to be equal to the demand type + EDP_type = demand_type + + # Consider the default offset, if needed + if demand_type in self._asmnt.options.demand_offset.keys(): + # If the demand type has a default offset in + # `self._asmnt.options.demand_offset`, add the offset + # to the default offset + offset = int(offset + self._asmnt.options.demand_offset[demand_type]) + else: + # If the demand type does not have a default offset in + # `self._asmnt.options.demand_offset`, convert the + # offset to an integer + offset = int(offset) + + # Determine the direction + if directional: + # If the demand is directional, use the third element + # of the `PG` tuple as the direction + direction = PG[2] + else: + # If the demand is not directional, use '0' as the + # direction + direction = '0' + + # Concatenate the EDP type, offset, and direction to form + # the EDP key + EDP = f"{EDP_type}-{str(int(PG[1]) + offset)}-{direction}" + + # If the EDP key is not already in the `EDP_req` + # dictionary, add it and initialize it with an empty list + if EDP not in EDP_req: + EDP_req.update({EDP: []}) + + # Add the current PG (performance group) to the list of + # PGs associated with the current EDP key + EDP_req[EDP].append(PG) + + # Return the unique EDP requirements + return EDP_req + + def _assemble_required_demand_data(self, EDP_req): + """ + Assembles demand data for damage state determination. + + The method takes the maximum of all available directions for + non-directional demand, scaling it using the non-directional + multiplier specified in self._asmnt.options, and returning the + result as a dictionary with keys in the format of + '--' and values as arrays of + demand values. If demand data is not found, logs a warning + message and skips the corresponding damages calculation. + + Parameters + ---------- + EDP_req : dict + A dictionary of unique EDP requirements + + Returns + ------- + demand_dict : dict + A dictionary of assembled demand data for calculation + + Raises + ------ + KeyError + If demand data for a given EDP cannot be found + + """ + + if self._asmnt.log.verbose: + self.log_msg('Assembling demand data for calculation...', + prepend_timestamp=True) + + demand_source = self._asmnt.demand.sample + + demand_dict = {} + + for EDP in EDP_req.keys(): + + EDP = EDP.split('-') + + # if non-directional demand is requested... + if EDP[2] == '0': + + # assume that the demand at the given location is available + try: + # take the maximum of all available directions and scale it + # using the nondirectional multiplier specified in the + # self._asmnt.options (the default value is 1.2) + demand = demand_source.loc[ + :, (EDP[0], EDP[1])].max(axis=1).values + demand = demand * self._asmnt.options.nondir_multi(EDP[0]) + + except KeyError: + + demand = None + + else: + demand = demand_source[(EDP[0], EDP[1], EDP[2])].values + + if demand is None: + + self.log_msg(f'\nWARNING: Cannot find demand data for {EDP}. The ' + 'corresponding damages cannot be calculated.', + prepend_timestamp=False) + else: + demand_dict.update({f'{EDP[0]}-{EDP[1]}-{EDP[2]}': demand}) + + return demand_dict + + def _evaluate_damage_state( + self, demand_dict, EDP_req, capacity_sample, lsds_sample): + """ + Use the demand and LS capacity sample to evaluate damage states + + Parameters + ---------- + demand_dict: dict + Dictionary containing the demand of each demand type. + EDP_req: dict + Dictionary containing the EDPs assigned to each demand + type. + capacity_sample: DataFrame + Provides a sample of the capacity. + lsds_sample: DataFrame + Provides the mapping between limit states and damage + states. + + Returns + ------- + dmg_sample: DataFrame + Assigns a Damage State to each component block in the + asset model. + """ + + # Log a message indicating that damage states are being + # evaluated + + if self._asmnt.log.verbose: + self.log_msg('Evaluating damage states...', prepend_timestamp=True) + + # Create an empty dataframe with columns and index taken from + # the input capacity sample + dmg_eval = pd.DataFrame(columns=capacity_sample.columns, + index=capacity_sample.index) + + # Initialize an empty list to store demand data + demand_df = [] + + # For each demand type in the demand dictionary + for demand_name, demand_vals in demand_dict.items(): + + # Get the list of PGs assigned to this demand type + PG_list = EDP_req[demand_name] + + # Create a list of columns for the demand data + # corresponding to each PG in the PG_list + PG_cols = pd.concat( + [dmg_eval.loc[:1, PG_i] for PG_i in PG_list], axis=1, keys=PG_list + ).columns + PG_cols.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ls'] + # Create a dataframe with demand values repeated for the + # number of PGs and assign the columns as PG_cols + demand_df.append(pd.concat([pd.Series(demand_vals)] * len(PG_cols), + axis=1, keys=PG_cols)) + + # Concatenate all demand dataframes into a single dataframe + demand_df = pd.concat(demand_df, axis=1) + # Sort the columns of the demand dataframe + demand_df.sort_index(axis=1, inplace=True) + + # Evaluate the damage exceedance by subtracting demand from + # capacity and checking if the result is less than zero + dmg_eval = (capacity_sample - demand_df) < 0 + + # Remove any columns with NaN values from the damage + # exceedance dataframe + dmg_eval.dropna(axis=1, inplace=True) + + # initialize the DataFrames that store the damage states and + # quantities + ds_sample = capacity_sample.groupby(level=[0, 1, 2, 3, 4], axis=1).first() + ds_sample.loc[:, :] = np.zeros(ds_sample.shape, dtype=int) + + # get a list of limit state ids among all components in the damage model + ls_list = dmg_eval.columns.get_level_values(5).unique() + + # for each consecutive limit state... + for LS_id in ls_list: + # get all cmp - loc - dir - block where this limit state occurs + dmg_e_ls = dmg_eval.loc[:, idx[:, :, :, :, :, LS_id]].dropna(axis=1) + + # Get the damage states corresponding to this limit state in each + # block + # Note that limit states with a set of mutually exclusive damage + # states options have their damage state picked here. + lsds = lsds_sample.loc[:, dmg_e_ls.columns] + + # Drop the limit state level from the columns to make the damage + # exceedance DataFrame compatible with the other DataFrames in the + # following steps + dmg_e_ls.columns = dmg_e_ls.columns.droplevel(5) + + # Same thing for the lsds DataFrame + lsds.columns = dmg_e_ls.columns + + # Update the damage state in the result with the values from the + # lsds DF if the limit state was exceeded according to the + # dmg_e_ls DF. + # This one-liner updates the given Limit State exceedance in the + # entire damage model. If subsequent Limit States are also exceeded, + # those cells in the result matrix will get overwritten by higher + # damage states. + ds_sample.loc[:, dmg_e_ls.columns] = ( + ds_sample.loc[:, dmg_e_ls.columns].mask(dmg_e_ls, lsds)) + + return ds_sample + + def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): + """ + Combine component quantity and damage state information in one + DataFrame. + + This method assumes that a component quantity sample is + available in the asset model and a damage state sample is + available in the damage model. + + Parameters + ---------- + PGB: DataFrame + A DataFrame that contains the Block identifier for each + component. + ds_sample: DataFrame + A DataFrame that assigns a damage state to each component + block in the asset model. + dropzero: bool, optional, default: True + If True, the quantity of non-damaged components is not + saved. + + Returns + ------- + res_df: DataFrame + A DataFrame that combines the component quantity and + damage state information. + + Raises + ------ + ValueError + If the number of blocks is not provided or if the list of + weights does not contain the same number of elements as + the number of blocks. + + """ + + # Log a message indicating that the calculation of damage + # quantities is starting + if self._asmnt.log.verbose: + self.log_msg('Calculating damage quantities...', + prepend_timestamp=True) + + # Store the damage state sample as a local variable + dmg_ds = ds_sample + + # Retrieve the component quantity information from the asset + # model + cmp_qnt = self._asmnt.asset.cmp_sample # .values + # Retrieve the component marginal parameters from the asset + # model + cmp_params = self._asmnt.asset.cmp_marginal_params + + # Combine the component quantity information for the columns + # in the damage state sample + dmg_qnt = pd.concat( + [cmp_qnt[PG[:4]] for PG in dmg_ds.columns], + axis=1, keys=dmg_ds.columns) + + # Initialize a list to store the block weights + block_weights = [] + + # For each component in the list of PG blocks + for PG in PGB.index: + + # Set the number of blocks to 1, unless specified + # otherwise in the component marginal parameters + blocks = 1 + if cmp_params is not None: + if 'Blocks' in cmp_params.columns: + + blocks = cmp_params.loc[PG, 'Blocks'] + + # If the number of blocks is specified, calculate the + # weights as the reciprocal of the number of blocks + if np.atleast_1d(blocks).shape[0] == 1: + blocks_array = np.full(int(blocks), 1. / blocks) + + # Otherwise, assume that the list contains the weights + block_weights += blocks_array.tolist() + + # Broadcast the block weights to match the shape of the damage + # quantity DataFrame + block_weights = np.broadcast_to( + block_weights, + (dmg_qnt.shape[0], len(block_weights))) + + # Multiply the damage quantities by the block weights + dmg_qnt *= block_weights + + # Get the unique damage states from the damage state sample + # Note that these might be fewer than all possible Damage + # States + ds_list = np.unique(dmg_ds.values) + # Filter out any NaN values from the list of damage states + ds_list = ds_list[pd.notna(ds_list)].astype(int) + + # If the dropzero option is True, remove the zero damage state + # from the list of damage states + if dropzero: + + ds_list = ds_list[ds_list != 0] + + # Only proceed with the calculation if there is at least one + # damage state in the list + if len(ds_list) > 0: + + # Create a list of DataFrames, where each DataFrame stores + # the damage quantities for a specific damage state + res_list = [pd.DataFrame( + np.where(dmg_ds == ds_i, dmg_qnt, 0), + columns=dmg_ds.columns, + index=dmg_ds.index + ) for ds_i in ds_list] + + # Combine the damage quantity DataFrames into a single + # DataFrame + res_df = pd.concat( + res_list, axis=1, + keys=[f'{ds_i:g}' for ds_i in ds_list]) + res_df.columns.names = ['ds', *res_df.columns.names[1::]] + # remove the block level from the columns + res_df.columns = res_df.columns.reorder_levels([1, 2, 3, 4, 0, 5]) + res_df = res_df.groupby(level=[0, 1, 2, 3, 4], axis=1).sum() + + # The damage states with no damaged quantities are dropped + # Note that some of these are not even valid DSs at the given PG + res_df = res_df.iloc[:, np.where(res_df.sum(axis=0) != 0)[0]] + + return res_df + + def _perform_dmg_task(self, task, qnt_sample): + """ + Perform a task from a damage process. + + The method performs a task from a damage process on a given + quantity sample. The method first checks if the source + component specified in the task exists among the available + components in the quantity sample. If the source component is + not found, a warning message is logged and the method returns + the original quantity sample unchanged. Otherwise, the method + executes the events specified in the task. The events can be + triggered by a limit state exceedance or a damage state + occurrence. If the event is triggered by a damage state, the + method moves all quantities of the target component(s) into + the target damage state in pre-selected realizations. If the + target event is "NA", the method removes quantity information + from the target components in the pre-selected + realizations. After executing the events, the method returns + the updated quantity sample. + + Parameters + ---------- + task : list + A list representing a task from the damage process. The + list contains two elements: + - The first element is a string representing the source + component, e.g., `'CMP_A'`. + - The second element is a dictionary representing the + events triggered by the damage state of the source + component. The keys of the dictionary are strings that + represent the damage state of the source component, + e.g., `'DS1'`. The values are lists of strings + representing the target component(s) and event(s), e.g., + `['CMP_B', 'CMP_C']`. + qnt_sample : pandas DataFrame + A DataFrame representing the quantities of the components + in the damage sample. It is modified in place to represent + the quantities of the components in the damage sample + after the task has been performed. + + Raises + ------ + ValueError + If the source component is not found among the components + in the damage sample + ValueError + If the source event is not a limit state (LS) or damage + state (DS) + ValueError + If the target event is not a limit state (LS), damage + state (DS), or not available (NA) + ValueError + If the target event is a limit state (LS) + + """ + + if self._asmnt.log.verbose: + self.log_msg('Applying task...', + prepend_timestamp=True) + + # get the list of available components + cmp_list = qnt_sample.columns.get_level_values(0).unique().tolist() + + # get the component quantities + cmp_qnt = self._asmnt.asset.cmp_sample + + # get the source component + source_cmp = task[0].split('_')[1] + + # check if it exists among the available ones + if source_cmp not in cmp_list: + + self.log_msg( + f"WARNING: Source component {source_cmp} in the prescribed " + "damage process not found among components in the damage " + "sample. The corresponding part of the damage process is " + "skipped.", prepend_timestamp=False) + + return + + # get the damage quantities for the source component + source_cmp_df = qnt_sample.loc[:, source_cmp] + + # execute the prescribed events + for source_event, target_infos in task[1].items(): + + # events triggered by limit state exceedance + if source_event.startswith('LS'): + + # ls_i = int(source_event[2:]) + # TODO: implement source LS support + raise ValueError('LS not supported yet.') + + # events triggered by damage state occurrence + if source_event.startswith('DS'): + + # get the ID of the damage state that triggers the event + ds_list = [source_event[2:], ] + + # if we are only looking for a single DS + if len(ds_list) == 1: + + ds_target = ds_list[0] + + # get the realizations with non-zero quantity of the target DS + source_ds_vals = source_cmp_df.groupby( + level=[3], axis=1).max() + + if ds_target in source_ds_vals.columns: + source_ds_vals = source_ds_vals[ds_target] + source_mask = source_cmp_df.loc[source_ds_vals > 0.0].index + else: + # if tge source_cmp is not in ds_target in any of the + # realizations, the prescribed event is not triggered + continue + + else: + pass # TODO: implement multiple DS support + + else: + raise ValueError(f"Unable to parse source event in damage " + f"process: {source_event}") + + # get the information about the events + target_infos = np.atleast_1d(target_infos) + + # for each event + for target_info in target_infos: + + # get the target component and event type + target_cmp, target_event = target_info.split('_') + + # ALL means all, but the source component + if target_cmp == 'ALL': + + # copy the list of available components + target_cmp = deepcopy(cmp_list) + + # remove the source component + if source_cmp in target_cmp: + target_cmp.remove(source_cmp) + + # otherwise we target a specific component + elif target_cmp in cmp_list: + target_cmp = [target_cmp, ] + + # trigger a limit state + if target_event.startswith('LS'): + + # ls_i = int(target_event[2:]) + # TODO: implement target LS support + raise ValueError('LS not supported yet.') + + # trigger a damage state + if target_event.startswith('DS'): + + # get the target damage state ID + ds_i = target_event[2:] + + # move all quantities of the target component(s) into the + # target damage state in the pre-selected realizations + qnt_sample.loc[source_mask, target_cmp] = 0.0 + + for target_cmp_i in target_cmp: + locs = cmp_qnt[target_cmp_i].columns.get_level_values(0) + dirs = cmp_qnt[target_cmp_i].columns.get_level_values(1) + uids = cmp_qnt[target_cmp_i].columns.get_level_values(2) + for loc, direction, uid in zip(locs, dirs, uids): + # because we cannot be certain that ds_i had been + # triggered earlier, we have to add this damage + # state manually for each PG of each component, if needed + if ds_i not in qnt_sample[ + (target_cmp_i, loc, direction, uid)].columns: + qnt_sample[ + (target_cmp_i, loc, direction, uid, ds_i)] = 0.0 + + qnt_sample.loc[ + source_mask, + (target_cmp_i, loc, direction, uid, ds_i)] = ( + cmp_qnt.loc[ + source_mask, + (target_cmp_i, loc, direction, uid)].values) + + # clear all damage information + elif target_event == 'NA': + + # remove quantity information from the target components + # in the pre-selected realizations + qnt_sample.loc[source_mask, target_cmp] = np.nan + + else: + raise ValueError(f"Unable to parse target event in damage " + f"process: {target_event}") + + if self._asmnt.log.verbose: + self.log_msg('Damage process task successfully applied.', + prepend_timestamp=False) + + def _get_pg_batches(self, block_batch_size): + """ + Group performance groups into batches for efficient damage assessment. + + The method takes as input the block_batch_size, which + specifies the maximum number of blocks per batch. The method + first checks if performance groups have been defined in the + cmp_marginal_params dataframe, and if so, it uses the 'Blocks' + column as the performance group information. If performance + groups have not been defined in cmp_marginal_params, the + method uses the cmp_sample dataframe to define the performance + groups, with each performance group having a single block. + + The method then checks if the performance groups are available + in the damage parameters dataframe, and removes any + performance groups that are not found in the damage + parameters. The method then groups the performance groups + based on the locations and directions of the components, and + calculates the cumulative sum of the blocks for each + group. The method then divides the performance groups into + batches of size specified by block_batch_size and assigns a + batch number to each group. Finally, the method groups the + performance groups by batch number, component, location, and + direction, and returns a dataframe that shows the number of + blocks for each batch. + + """ + + # Get the marginal parameters for the components from the + # asset model + cmp_marginals = self._asmnt.asset.cmp_marginal_params + + # Initialize the batch dataframe + pg_batch = None + + # If marginal parameters are available, use the 'Blocks' + # column to initialize the batch dataframe + if cmp_marginals is not None: + + # Check if the "Blocks" column exists in the component + # marginal parameters + if 'Blocks' in cmp_marginals.columns: + pg_batch = cmp_marginals['Blocks'].to_frame() + + # If the "Blocks" column doesn't exist, create a new dataframe + # with "Blocks" column filled with ones, using the component + # sample as the index. + if pg_batch is None: + cmp_sample = self._asmnt.asset.cmp_sample + pg_batch = pd.DataFrame(np.ones(cmp_sample.shape[1]), + index=cmp_sample.columns, + columns=['Blocks']) + + # Check if the damage model information exists for each + # performance group If not, remove the performance group from + # the analysis and log a warning message. + first_time = True + for pg_i in pg_batch.index: + + if np.any(np.isin(pg_i, self.damage_params.index)): + + blocks_i = pg_batch.loc[pg_i, 'Blocks'] + + # If the "Blocks" column contains a list of block + # weights, get the number of blocks from the shape of + # the list. + if np.atleast_1d(blocks_i).shape[0] != 1: + blocks_i = np.atleast_1d(blocks_i).shape[0] + + pg_batch.loc[pg_i, 'Blocks'] = blocks_i + + else: + pg_batch.drop(pg_i, inplace=True) + + if first_time: + self.log_msg("\nWARNING: Damage model information is " + "incomplete for some of the performance groups " + "and they had to be removed from the analysis:", + prepend_timestamp=False) + + first_time = False + + self.log_msg(f"{pg_i}", prepend_timestamp=False) + + # Convert the data types of the dataframe to be efficient + pg_batch = pg_batch.convert_dtypes() + + # Sum up the number of blocks for each performance group + pg_batch = pg_batch.groupby(['loc', 'dir', 'cmp', 'uid']).sum() + pg_batch.sort_index(axis=0, inplace=True) + + # Calculate cumulative sum of blocks + pg_batch['CBlocks'] = np.cumsum(pg_batch['Blocks'].values.astype(int)) + pg_batch['Batch'] = 0 + + # Group the performance groups into batches + for batch_i in range(1, pg_batch.shape[0] + 1): + + # Find the mask for blocks that are less than the batch + # size and greater than 0 + batch_mask = np.all( + np.array([pg_batch['CBlocks'] <= block_batch_size, + pg_batch['CBlocks'] > 0]), + axis=0) + + if np.sum(batch_mask) < 1: + batch_mask = np.full(batch_mask.shape, False) + batch_mask[np.where(pg_batch['CBlocks'] > 0)[0][0]] = True + + pg_batch.loc[batch_mask, 'Batch'] = batch_i + + # Decrement the cumulative block count by the max count in + # the current batch + pg_batch['CBlocks'] -= pg_batch.loc[ + pg_batch['Batch'] == batch_i, 'CBlocks'].max() + + # If the maximum cumulative block count is 0, exit the + # loop + if pg_batch['CBlocks'].max() == 0: + break + + # Group the performance groups by batch, component, location, + # and direction, and keep only the number of blocks for each + # group + pg_batch = pg_batch.groupby( + ['Batch', 'cmp', 'loc', 'dir', 'uid']).sum().loc[:, 'Blocks'].to_frame() + + return pg_batch + + def _complete_ds_cols(self, dmg_sample): + """ + Completes the damage sample dataframe with all possible damage + states for each component. + + Parameters + ---------- + dmg_sample : DataFrame + A DataFrame containing the damage state information for + each component block in the asset model. The columns are + MultiIndexed with levels corresponding to component + information ('cmp', 'loc', 'dir', 'uid') and the damage + state ('ds'). + + Returns + ------- + DataFrame + A DataFrame similar to `dmg_sample` but with additional + columns for missing damage states for each component, + ensuring that all possible damage states are + represented. The new columns are filled with zeros, + indicating no occurrence of those damage states in the + sample. + + Notes + ----- + - The method assumes that the damage model parameters + (`self.damage_params`) are available and contain the + necessary information to determine the total number of + damage states for each component. + + """ + # get a shortcut for the damage model parameters + DP = self.damage_params + + # Get the header for the results that we can use to identify + # cmp-loc-dir-uid sets + dmg_header = ( + dmg_sample.groupby(level=[0, 1, 2, 3], axis=1).first().iloc[:2, :] + ) + + # get the number of possible limit states + ls_list = [col for col in DP.columns.unique(level=0) if 'LS' in col] + + # initialize the result dataframe + res = pd.DataFrame() + + # walk through all components that have damage parameters provided + for cmp_id in DP.index: + + # get the component-specific parameters + cmp_data = DP.loc[cmp_id] + + # and initialize the damage state counter + ds_count = 0 + + # walk through all limit states for the component + for ls in ls_list: + + # check if the given limit state is defined + if not pd.isna(cmp_data[(ls, 'Theta_0')]): + + # check if there is only one damage state + if pd.isna(cmp_data[(ls, 'DamageStateWeights')]): + + ds_count += 1 + + else: + + # or if there are more than one, how many + ds_count += len( + cmp_data[(ls, 'DamageStateWeights')].split('|')) + + # get the list of valid cmp-loc-dir-uid sets + cmp_header = dmg_header.loc[:, [cmp_id, ]] + + # Create a dataframe where they are repeated ds_count times in the + # columns. The keys put the DS id in the first level of the + # multiindexed column + cmp_headers = pd.concat( + [cmp_header for ds_i in range(ds_count + 1)], + keys=[str(r) for r in range(0, ds_count + 1)], + axis=1) + cmp_headers.columns.names = ['ds', *cmp_headers.columns.names[1::]] + + # add these new columns to the result dataframe + res = pd.concat([res, cmp_headers], axis=1) + + # Fill the result dataframe with zeros and reorder its columns to have + # the damage states at the lowest like - matching the dmg_sample input + res = pd.DataFrame( + 0.0, + columns=res.columns.reorder_levels([1, 2, 3, 4, 0]), + index=dmg_sample.index, + ) + + # replace zeros wherever the dmg_sample has results + res.loc[:, dmg_sample.columns.to_list()] = dmg_sample + + return res + + def calculate( + self, dmg_process=None, block_batch_size=1000, scaling_specification=None + ): + """ + Calculate the damage state of each component block in the asset. + + """ + + self.log_div() + self.log_msg('Calculating damages...') + + sample_size = self._asmnt.demand.sample.shape[0] + + # Break up damage calculation and perform it by performance group. + # Compared to the simultaneous calculation of all PGs, this approach + # reduces demands on memory and increases the load on CPU. This leads + # to a more balanced workload on most machines for typical problems. + # It also allows for a straightforward extension with parallel + # computing. + + # get the list of performance groups + qnt_samples = [] + + self.log_msg(f'Number of Performance Groups in Asset Model:' + f' {self._asmnt.asset.cmp_sample.shape[1]}', + prepend_timestamp=False) + + pg_batch = self._get_pg_batches(block_batch_size) + batches = pg_batch.index.get_level_values(0).unique() + + self.log_msg(f'Number of Component Blocks: {pg_batch["Blocks"].sum()}', + prepend_timestamp=False) + + self.log_msg(f"{len(batches)} batches of Performance Groups prepared " + "for damage assessment", + prepend_timestamp=False) + + # for PG_i in self._asmnt.asset.cmp_sample.columns: + for PGB_i in batches: + + PGB = pg_batch.loc[PGB_i] + + self.log_msg(f"Calculating damage for PG batch {PGB_i} with " + f"{int(PGB['Blocks'].sum())} blocks") + + # Generate an array with component capacities for each block and + # generate a second array that assigns a specific damage state to + # each component limit state. The latter is primarily needed to + # handle limit states with multiple, mutually exclusive DS options + capacity_sample, lsds_sample = self._generate_dmg_sample( + sample_size, PGB, scaling_specification) + + # Get the required demand types for the analysis + EDP_req = self._get_required_demand_type(PGB) + + # Create the demand vector + demand_dict = self._assemble_required_demand_data(EDP_req) + + # Evaluate the Damage State of each Component Block + ds_sample = self._evaluate_damage_state( + demand_dict, EDP_req, + capacity_sample, lsds_sample) + qnt_sample = self._prepare_dmg_quantities(PGB, ds_sample, dropzero=False) + + qnt_samples.append(qnt_sample) + + qnt_sample = pd.concat(qnt_samples, axis=1) + + # Create a comprehensive table with all possible DSs to have a robust + # input for the damage processes evaluation below + qnt_sample = self._complete_ds_cols(qnt_sample) + qnt_sample.sort_index(axis=1, inplace=True) + + self.log_msg("Raw damage calculation successful.", + prepend_timestamp=False) + + # Apply the prescribed damage process, if any + if dmg_process is not None: + self.log_msg("Applying damage processes...") + + # sort the processes + dmg_process = {key: dmg_process[key] for key in sorted(dmg_process)} + + for task in dmg_process.items(): + + self._perform_dmg_task(task, qnt_sample) + + self.log_msg("Damage processes successfully applied.", + prepend_timestamp=False) + + # If requested, remove columns with no damage from the sample + if self._asmnt.options.list_all_ds is False: + qnt_sample = qnt_sample.iloc[:, np.where(qnt_sample.sum(axis=0) != 0)[0]] + + self.sample = qnt_sample + + self.log_msg('Damage calculation successfully completed.') diff --git a/pelicun/model/demand_model.py b/pelicun/model/demand_model.py new file mode 100644 index 000000000..f1ff5637b --- /dev/null +++ b/pelicun/model/demand_model.py @@ -0,0 +1,851 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +This file defines the DemandModel object and its methods. + +.. rubric:: Contents + +.. autosummary:: + + DemandModel + +""" + +import numpy as np +import pandas as pd +from .pelicun_model import PelicunModel +from .. import base +from .. import uq +from .. import file_io + + +idx = base.idx + + +class DemandModel(PelicunModel): + """ + Manages demand information used in assessments. + + Parameters + ---------- + marginal_params: DataFrame + Available after the model has been calibrated or calibration data has + been imported. Defines the marginal distribution of each demand + variable. + correlation: DataFrame + Available after the model has been calibrated or calibration data has + been imported. Defines the correlation between the demand variables in + standard normal space. That is, the variables are sampled in standard + normal space and then transformed into the space of their respective + distributions and the correlation matrix corresponds to the space where + they are sampled. + empirical_data: DataFrame + Available after the model has been calibrated or calibration data has + been imported. It provides an empirical dataset for the demand + variables that are modeled with an empirical distribution. + sample: DataFrame + Available after a sample has been generated. Demand variables are + listed in columns and each row provides an independent realization of + the joint demand distribution. + units: Series + Available after any demand data has been loaded. The index identifies + the demand variables and the values provide the unit for each variable. + + """ + + def __init__(self, assessment): + + super().__init__(assessment) + + self.marginal_params = None + self.correlation = None + self.empirical_data = None + self.units = None + + self._RVs = None + self.sample = None + + def save_sample(self, filepath=None, save_units=False): + """ + Save demand sample to a csv file or return it in a DataFrame + + """ + + self.log_div() + if filepath is not None: + self.log_msg('Saving demand sample...') + + res = file_io.save_to_csv( + self.sample, filepath, units=self.units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + use_simpleindex=(filepath is not None), + log=self._asmnt.log) + + if filepath is not None: + self.log_msg('Demand sample successfully saved.', + prepend_timestamp=False) + return None + + # else: + units = res.loc["Units"] + res.drop("Units", inplace=True) + + if save_units: + return res.astype(float), units + + # else: + return res.astype(float) + + def load_sample(self, filepath): + """ + Load demand sample data and parse it. + + Besides parsing the sample, the method also reads and saves the units + specified for each demand variable. If no units are specified, Standard + Units are assumed. + + Parameters + ---------- + filepath: string or DataFrame + Location of the file with the demand sample. + + """ + + def parse_header(raw_header): + + old_MI = raw_header + + # The first number (event_ID) in the demand labels is optional and + # currently not used. We remove it if it was in the raw data. + if old_MI.nlevels == 4: + + if self._asmnt.log.verbose: + self.log_msg('Removing event_ID from header...', + prepend_timestamp=False) + + new_column_index_array = np.array( + [old_MI.get_level_values(i) for i in range(1, 4)]) + + else: + new_column_index_array = np.array( + [old_MI.get_level_values(i) for i in range(3)]) + + # Remove whitespace to avoid ambiguity + + if self._asmnt.log.verbose: + self.log_msg('Removing whitespace from header...', + prepend_timestamp=False) + + wspace_remove = np.vectorize(lambda name: str(name).replace(' ', '')) + + new_column_index = wspace_remove(new_column_index_array) + + # Creating new, cleaned-up header + + new_MI = pd.MultiIndex.from_arrays( + new_column_index, names=['type', 'loc', 'dir']) + + return new_MI + + self.log_div() + self.log_msg('Loading demand data...') + + demand_data, units = file_io.load_data( + filepath, self._asmnt.unit_conversion_factors, + return_units=True, log=self._asmnt.log) + + parsed_data = demand_data.copy() + + # start with cleaning up the header + + parsed_data.columns = parse_header(parsed_data.columns) + + # Remove errors, if needed + if 'ERROR' in parsed_data.columns.get_level_values(0): + + self.log_msg('Removing errors from the raw data...', + prepend_timestamp=False) + + error_list = parsed_data.loc[:, idx['ERROR', :, :]].values.astype(bool) + + parsed_data = parsed_data.loc[~error_list, :].copy() + parsed_data.drop('ERROR', level=0, axis=1, inplace=True) + + self.log_msg("\nBased on the values in the ERROR column, " + f"{np.sum(error_list)} demand samples were removed.\n", + prepend_timestamp=False) + + self.sample = parsed_data + + self.log_msg('Demand data successfully parsed.', prepend_timestamp=False) + + # parse the index for the units + units.index = parse_header(units.index) + + self.units = units + + self.log_msg('Demand units successfully parsed.', prepend_timestamp=False) + + def estimate_RID(self, demands, params, method='FEMA P58'): + """ + Estimate residual drift realizations based on other demands + + Parameters + ---------- + demands: DataFrame + Sample of demands required for the method to estimate the RID values + params: dict + Parameters required for the method to estimate the RID values + method: {'FEMA P58'}, default: 'FEMA P58' + Method to use for the estimation - currently, only one is available. + """ + + if method == 'FEMA P58': + + # method is described in FEMA P-58 Volume 1 Section 5.4 & Appendix C + + # the provided demands shall be PID values at various loc-dir pairs + PID = demands + + # there's only one parameter needed: the yield drift + yield_drift = params['yield_drift'] + + # three subdomains of demands are identified + small = PID < yield_drift + medium = PID < 4 * yield_drift + large = PID >= 4 * yield_drift + + # convert PID to RID in each subdomain + RID = PID.copy() + RID[large] = PID[large] - 3 * yield_drift + RID[medium] = 0.3 * (PID[medium] - yield_drift) + RID[small] = 0. + + # add extra uncertainty to nonzero values + rng = self._asmnt.options.rng + eps = rng.normal(scale=0.2, size=RID.shape) + RID[RID > 0] = np.exp(np.log(RID[RID > 0]) + eps) + + # finally, make sure the RID values are never larger than the PIDs + RID = pd.DataFrame( + np.minimum(PID.values, RID.values), + columns=pd.DataFrame( + 1, index=['RID', ], + columns=PID.columns).stack(level=[0, 1]).index, + index=PID.index) + + else: + RID = None + + # return the generated drift realizations + return RID + + def calibrate_model(self, config): + """ + Calibrate a demand model to describe the raw demand data + + The raw data shall be parsed first to ensure that it follows the + schema expected by this method. The calibration settings define the + characteristics of the multivariate distribution that is fit to the + raw data. + + Parameters + ---------- + config: dict + A dictionary, typically read from a json file, that specifies the + distribution family, truncation and censoring limits, and other + settings for the calibration. + + """ + + def parse_settings(settings, demand_type): + + def parse_str_to_float(in_str, context_string): + + try: + out_float = float(in_str) + + except ValueError: + + self.log_msg(f"WARNING: Could not parse {in_str} provided as " + f"{context_string}. Using NaN instead.", + prepend_timestamp=False) + + out_float = np.nan + + return out_float + + active_d_types = ( + demand_sample.columns.get_level_values('type').unique()) + + if demand_type == 'ALL': + cols = tuple(active_d_types) + + else: + cols_lst = [] + + for d_type in active_d_types: + if d_type.split('_')[0] == demand_type: + cols_lst.append(d_type) + + cols = tuple(cols_lst) + + # load the distribution family + cal_df.loc[idx[cols, :, :], 'Family'] = settings['DistributionFamily'] + + # load limits + for lim in ('CensorLower', 'CensorUpper', + 'TruncateLower', 'TruncateUpper'): + + if lim in settings.keys(): + val = parse_str_to_float(settings[lim], lim) + if not pd.isna(val): + cal_df.loc[idx[cols, :, :], lim] = val + + # scale the censor and truncation limits, if needed + scale_factor = self._asmnt.scale_factor(settings.get('Unit', None)) + + rows_to_scale = ['CensorLower', 'CensorUpper', + 'TruncateLower', 'TruncateUpper'] + cal_df.loc[idx[cols, :, :], rows_to_scale] *= scale_factor + + # load the prescribed additional uncertainty + if 'AddUncertainty' in settings.keys(): + + sig_increase = parse_str_to_float(settings['AddUncertainty'], + 'AddUncertainty') + + # scale the sig value if the target distribution family is normal + if settings['DistributionFamily'] == 'normal': + sig_increase *= scale_factor + + cal_df.loc[idx[cols, :, :], 'SigIncrease'] = sig_increase + + def get_filter_mask(lower_lims, upper_lims): + + demands_of_interest = demand_sample.iloc[:, pd.notna(upper_lims)] + limits_of_interest = upper_lims[pd.notna(upper_lims)] + upper_mask = np.all(demands_of_interest < limits_of_interest, + axis=1) + + demands_of_interest = demand_sample.iloc[:, pd.notna(lower_lims)] + limits_of_interest = lower_lims[pd.notna(lower_lims)] + lower_mask = np.all(demands_of_interest > limits_of_interest, + axis=1) + + return np.all([lower_mask, upper_mask], axis=0) + + self.log_div() + self.log_msg('Calibrating demand model...') + + demand_sample = self.sample + + # initialize a DataFrame that contains calibration information + cal_df = pd.DataFrame( + columns=['Family', + 'CensorLower', 'CensorUpper', + 'TruncateLower', 'TruncateUpper', + 'SigIncrease', 'Theta_0', 'Theta_1'], + index=demand_sample.columns, + dtype=float + ) + + cal_df['Family'] = cal_df['Family'].astype(str) + + # start by assigning the default option ('ALL') to every demand column + parse_settings(config['ALL'], 'ALL') + + # then parse the additional settings and make the necessary adjustments + for demand_type in config.keys(): + if demand_type != 'ALL': + parse_settings(config[demand_type], demand_type) + + if self._asmnt.log.verbose: + self.log_msg( + "\nCalibration settings successfully parsed:\n" + str(cal_df), + prepend_timestamp=False) + else: + self.log_msg( + "\nCalibration settings successfully parsed:\n", + prepend_timestamp=False) + + # save the settings + model_params = cal_df.copy() + + # Remove the samples outside of censoring limits + # Currently, non-empirical demands are assumed to have some level of + # correlation, hence, a censored value in any demand triggers the + # removal of the entire sample from the population. + upper_lims = cal_df.loc[:, 'CensorUpper'].values + lower_lims = cal_df.loc[:, 'CensorLower'].values + + if ~np.all(pd.isna(np.array([upper_lims, lower_lims]))): + + censor_mask = get_filter_mask(lower_lims, upper_lims) + censored_count = np.sum(~censor_mask) + + demand_sample = demand_sample.loc[censor_mask, :] + + self.log_msg("\nBased on the provided censoring limits, " + f"{censored_count} samples were censored.", + prepend_timestamp=False) + else: + censored_count = 0 + + # Check if there is any sample outside of truncation limits + # If yes, that suggests an error either in the samples or the + # configuration. We handle such errors gracefully: the analysis is not + # terminated, but we show an error in the log file. + upper_lims = cal_df.loc[:, 'TruncateUpper'].values + lower_lims = cal_df.loc[:, 'TruncateLower'].values + + if ~np.all(pd.isna(np.array([upper_lims, lower_lims]))): + + truncate_mask = get_filter_mask(lower_lims, upper_lims) + truncated_count = np.sum(~truncate_mask) + + if truncated_count > 0: + + demand_sample = demand_sample.loc[truncate_mask, :] + + self.log_msg("\nBased on the provided truncation limits, " + f"{truncated_count} samples were removed before demand " + "calibration.", + prepend_timestamp=False) + + # Separate and save the demands that are kept empirical -> i.e., no + # fitting. Currently, empirical demands are decoupled from those that + # have a distribution fit to their samples. The correlation between + # empirical and other demands is not preserved in the demand model. + empirical_edps = [] + for edp in cal_df.index: + if cal_df.loc[edp, 'Family'] == 'empirical': + empirical_edps.append(edp) + + self.empirical_data = demand_sample.loc[:, empirical_edps].copy() + + # remove the empirical demands from the samples used for calibration + demand_sample = demand_sample.drop(empirical_edps, axis=1) + + # and the calibration settings + cal_df = cal_df.drop(empirical_edps, axis=0) + + if self._asmnt.log.verbose: + self.log_msg(f"\nDemand data used for calibration:\n{demand_sample}", + prepend_timestamp=False) + + # fit the joint distribution + self.log_msg("\nFitting the prescribed joint demand distribution...", + prepend_timestamp=False) + + demand_theta, demand_rho = uq.fit_distribution_to_sample( + raw_samples=demand_sample.values.T, + distribution=cal_df.loc[:, 'Family'].values, + censored_count=censored_count, + detection_limits=cal_df.loc[ + :, ['CensorLower', 'CensorUpper']].values, + truncation_limits=cal_df.loc[ + :, ['TruncateLower', 'TruncateUpper']].values, + multi_fit=False, + logger_object=self._asmnt.log + ) + # fit the joint distribution + self.log_msg("\nCalibration successful, processing results...", + prepend_timestamp=False) + + # save the calibration results + model_params.loc[cal_df.index, ['Theta_0', 'Theta_1']] = demand_theta + + # increase the variance of the marginal distributions, if needed + if ~np.all(pd.isna(model_params.loc[:, 'SigIncrease'].values)): + + self.log_msg("\nIncreasing demand variance...", + prepend_timestamp=False) + + sig_inc = np.nan_to_num(model_params.loc[:, 'SigIncrease'].values) + sig_0 = model_params.loc[:, 'Theta_1'].values + + model_params.loc[:, 'Theta_1'] = ( + np.sqrt(sig_0 ** 2. + sig_inc ** 2.)) + + # remove unneeded fields from model_params + for col in ('SigIncrease', 'CensorLower', 'CensorUpper'): + model_params = model_params.drop(col, axis=1) + + # reorder the remaining fields for clarity + model_params = model_params[[ + 'Family', 'Theta_0', 'Theta_1', 'TruncateLower', 'TruncateUpper']] + + self.marginal_params = model_params + + self.log_msg("\nCalibrated demand model marginal distributions:\n" + + str(model_params), + prepend_timestamp=False) + + # save the correlation matrix + self.correlation = pd.DataFrame(demand_rho, + columns=cal_df.index, + index=cal_df.index) + + self.log_msg("\nCalibrated demand model correlation matrix:\n" + + str(self.correlation), + prepend_timestamp=False) + + def save_model(self, file_prefix): + """ + Save parameters of the demand model to a set of csv files + + """ + + self.log_div() + self.log_msg('Saving demand model...') + + # save the correlation and empirical data + file_io.save_to_csv(self.correlation, file_prefix + '_correlation.csv') + file_io.save_to_csv( + self.empirical_data, + file_prefix + '_empirical.csv', + units=self.units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + log=self._asmnt.log, + ) + + # the log standard deviations in the marginal parameters need to be + # scaled up before feeding to the saving method where they will be + # scaled back down and end up being saved unscaled to the target file + + marginal_params = self.marginal_params.copy() + + log_rows = marginal_params['Family'] == 'lognormal' + log_demands = marginal_params.loc[log_rows, :] + + for label in log_demands.index: + + if label in self.units.index: + + unit_factor = self._asmnt.calc_unit_scale_factor(self.units[label]) + + marginal_params.loc[label, 'Theta_1'] *= unit_factor + + file_io.save_to_csv( + marginal_params, + file_prefix + '_marginals.csv', + units=self.units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + orientation=1, + log=self._asmnt.log, + ) + + self.log_msg('Demand model successfully saved.', prepend_timestamp=False) + + def load_model(self, data_source): + """ + Load the model that describes demands on the asset. + + Parameters + ---------- + data_source: string or dict + If string, the data_source is a file prefix ( in the + following description) that identifies the following files: + _marginals.csv, _empirical.csv, + _correlation.csv. If dict, the data source is a dictionary + with the following optional keys: 'marginals', 'empirical', and + 'correlation'. The value under each key shall be a DataFrame. + """ + + self.log_div() + self.log_msg('Loading demand model...') + + # prepare the marginal data source variable to load the data + if isinstance(data_source, dict): + marginal_data_source = data_source.get('marginals') + empirical_data_source = data_source.get('empirical', None) + correlation_data_source = data_source.get('correlation', None) + else: + marginal_data_source = data_source + '_marginals.csv' + empirical_data_source = data_source + '_empirical.csv' + correlation_data_source = data_source + '_correlation.csv' + + if empirical_data_source is not None: + self.empirical_data = file_io.load_data( + empirical_data_source, + self._asmnt.unit_conversion_factors, + log=self._asmnt.log, + ) + if not self.empirical_data.empty: + self.empirical_data.columns.set_names( + ['type', 'loc', 'dir'], inplace=True + ) + else: + self.empirical_data = None + else: + self.empirical_data = None + + if correlation_data_source is not None: + self.correlation = file_io.load_data( + correlation_data_source, + self._asmnt.unit_conversion_factors, + reindex=False, log=self._asmnt.log) + self.correlation.index.set_names(['type', 'loc', 'dir'], inplace=True) + self.correlation.columns.set_names(['type', 'loc', 'dir'], inplace=True) + else: + self.correlation = None + + # the log standard deviations in the marginal parameters need to be + # adjusted after getting the data from the loading method where they + # were scaled according to the units of the corresponding variable + + # Note that a data source without marginal information is not valid + marginal_params, units = file_io.load_data( + marginal_data_source, + None, + orientation=1, + reindex=False, + return_units=True, + log=self._asmnt.log, + ) + marginal_params.index.set_names(['type', 'loc', 'dir'], inplace=True) + + marginal_params = self.convert_marginal_params(marginal_params.copy(), + units) + + self.marginal_params = marginal_params + self.units = units + + self.log_msg('Demand model successfully loaded.', prepend_timestamp=False) + + def _create_RVs(self, preserve_order=False): + """ + Create a random variable registry for the joint distribution of demands. + + """ + + # initialize the registry + RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) + + # add a random variable for each demand variable + for rv_params in self.marginal_params.itertuples(): + + edp = rv_params.Index + rv_tag = f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' + family = getattr(rv_params, "Family", np.nan) + + if family == 'empirical': + + if preserve_order: + dist_family = 'coupled_empirical' + else: + dist_family = 'empirical' + + # empirical RVs need the data points + RV_reg.add_RV(uq.RandomVariable( + name=rv_tag, + distribution=dist_family, + raw_samples=self.empirical_data.loc[:, edp].values + )) + + else: + + # all other RVs need parameters of their distributions + RV_reg.add_RV(uq.RandomVariable( + name=rv_tag, + distribution=family, + theta=[getattr(rv_params, f"Theta_{t_i}", np.nan) + for t_i in range(3)], + truncation_limits=[ + getattr(rv_params, f"Truncate{side}", np.nan) + for side in ("Lower", "Upper")], + + + )) + + self.log_msg(f"\n{self.marginal_params.shape[0]} random variables created.", + prepend_timestamp=False) + + # add an RV set to consider the correlation between demands, if needed + if self.correlation is not None: + rv_set_tags = [f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' + for edp in self.correlation.index.values] + + RV_reg.add_RV_set(uq.RandomVariableSet( + 'EDP_set', list(RV_reg.RVs(rv_set_tags).values()), + self.correlation.values)) + + self.log_msg( + f"\nCorrelations between {len(rv_set_tags)} random variables " + "successfully defined.", + prepend_timestamp=False) + + self._RVs = RV_reg + + def clone_demands(self, demand_cloning): + """ + Clones demands. This means copying over columns of the + original demand sample and assigning given names to them. The + columns to be copied over and the names to assign to the + copies are defined as the keys and values of the + `demand_cloning` dictionary, respectively. + The method modifies `sample` inplace. + + Parameters + ---------- + demand_cloning: dict + Keys correspond to the columns of the original sample to + be copied over and the values correspond to the intended + names for the copies. Caution: It's possible to define a + dictionary with duplicate keys, and Python will just keep + the last entry without warning. Users need to be careful + enough to avoid duplicate keys, because we can't validate + them. + E.g.: x = {'1': 1.00, '1': 2.00} results in x={'1': 2.00}. + + Raises + ------ + ValueError + In multiple instances of invalid demand_cloning entries. + + """ + + # it's impossible to have duplicate keys, because + # demand_cloning is a dictionary. + new_columns_list = demand_cloning.values() + # The following prevents duplicate entries in the values + # corresponding to a single cloned demand (1), but + # also the same column being specified as the cloned + # entry of multiple demands (2). + # e.g. + # (1): {'PGV-0-1': ['PGV-1-1', 'PGV-1-1', ...]} + # (2): {'PGV-0-1': ['PGV-1-1', ...], 'PGV-0-2': ['PGV-1-1', ...]} + flat_list = [] + for new_columns in new_columns_list: + flat_list.extend(new_columns) + if len(set(flat_list)) != len(flat_list): + raise ValueError( + 'Duplicate entries in demand cloning ' + 'configuration.' + ) + + # turn the config entries to tuples + def turn_to_tuples(demand_cloning): + demand_cloning_tuples = {} + for key, values in demand_cloning.items(): + demand_cloning_tuples[tuple(key.split('-'))] = [ + tuple(x.split('-')) for x in values + ] + return demand_cloning_tuples + + demand_cloning = turn_to_tuples(demand_cloning) + + # The demand cloning confuguration should not include + # columns that are not present in the orignal sample. + warn_columns = [] + for column in demand_cloning: + if column not in self.sample.columns: + warn_columns.append(column) + if warn_columns: + warn_columns = ['-'.join(x) for x in warn_columns] + self.log_msg( + "\nWARNING: The demand cloning configuration lists " + "columns that are not present in the original demand sample's " + f"columns: {warn_columns}.\n", + prepend_timestamp=False, + ) + + # we iterate over the existing columns of the sample and try + # to locate columns that need to be copied as required by the + # demand cloning configuration. If a column does not need + # to be cloned it is left as is. Otherwise, we keep track + # of its initial index location (in `column_index`) and the + # number of times it needs to be replicated, along with the + # new names of its copies (in `column_values`). + column_index = [] + column_values = [] + for i, column in enumerate(self.sample.columns): + if column not in demand_cloning: + column_index.append(i) + column_values.append(column) + else: + new_column_values = demand_cloning[column] + column_index.extend([i] * len(new_column_values)) + column_values.extend(new_column_values) + # copy the columns + self.sample = self.sample.iloc[:, column_index] + # update the column index + self.sample.columns = pd.MultiIndex.from_tuples(column_values) + + def generate_sample(self, config): + """ + Generates an RV sample with the specified configuration. + """ + + if self.marginal_params is None: + raise ValueError('Model parameters have not been specified. Either' + 'load parameters from a file or calibrate the ' + 'model using raw demand data.') + + self.log_div() + self.log_msg('Generating sample from demand variables...') + + self._create_RVs( + preserve_order=config.get('PreserveRawOrder', False)) + + sample_size = config['SampleSize'] + self._RVs.generate_sample( + sample_size=sample_size, + method=self._asmnt.options.sampling_method) + + # replace the potentially existing raw sample with the generated one + assert self._RVs is not None + assert self._RVs.RV_sample is not None + sample = pd.DataFrame(self._RVs.RV_sample) + sample.sort_index(axis=0, inplace=True) + sample.sort_index(axis=1, inplace=True) + + sample = base.convert_to_MultiIndex(sample, axis=1)['EDP'] + + sample.columns.names = ['type', 'loc', 'dir'] + self.sample = sample + + if config.get('DemandCloning', False): + self.clone_demands(config['DemandCloning']) + + self.log_msg(f"\nSuccessfully generated {sample_size} realizations.", + prepend_timestamp=False) diff --git a/pelicun/model/loss_model.py b/pelicun/model/loss_model.py new file mode 100644 index 000000000..5f762f41f --- /dev/null +++ b/pelicun/model/loss_model.py @@ -0,0 +1,1129 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +This file defines Loss model objects and their methods. + +.. rubric:: Contents + +.. autosummary:: + + prep_constant_median_DV + prep_bounded_multilinear_median_DV + + LossModel + BldgRepairModel + +""" + +import numpy as np +import pandas as pd +from .pelicun_model import PelicunModel +from .. import base +from .. import uq +from .. import file_io + + +idx = base.idx + + +class LossModel(PelicunModel): + """ + Parent object for loss models. + + All loss assessment methods should be children of this class. + + Parameters + ---------- + + """ + + def __init__(self, assessment): + + super().__init__(assessment) + + self._sample = None + self.loss_map = None + self.loss_params = None + self.loss_type = 'Generic' + + @property + def sample(self): + """ + sample property + """ + return self._sample + + def save_sample(self, filepath=None, save_units=False): + """ + Save loss sample to a csv file + + """ + self.log_div() + if filepath is not None: + self.log_msg('Saving loss sample...') + + cmp_units = self.loss_params[('DV', 'Unit')] + dv_units = pd.Series(index=self.sample.columns, name='Units', + dtype='object') + + for cmp_id, dv_type in cmp_units.index: + dv_units.loc[(dv_type, cmp_id)] = cmp_units.at[(cmp_id, dv_type)] + + res = file_io.save_to_csv( + self.sample, filepath, units=dv_units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + use_simpleindex=(filepath is not None), + log=self._asmnt.log) + + if filepath is not None: + self.log_msg('Loss sample successfully saved.', + prepend_timestamp=False) + return None + + # else: + units = res.loc["Units"] + res.drop("Units", inplace=True) + + if save_units: + return res.astype(float), units + + return res.astype(float) + + def load_sample(self, filepath): + """ + Load damage sample data. + + """ + self.log_div() + self.log_msg('Loading loss sample...') + + self._sample = file_io.load_data( + filepath, self._asmnt.unit_conversion_factors, log=self._asmnt.log) + + self.log_msg('Loss sample successfully loaded.', prepend_timestamp=False) + + def load_model(self, data_paths, mapping_path, decision_variables=None): + """ + Load the list of prescribed consequence models and their parameters + + Parameters + ---------- + data_paths: list of string or DataFrame + List of paths to data files with consequence model + parameters. Default XY datasets can be accessed as + PelicunDefault/XY. The list can also contain DataFrame + objects, in which case that data is used directly. + mapping_path: string + Path to a csv file that maps drivers (i.e., damage or edp data) to + loss models. + decision_variables: list of string, optional + List of decision variables to include in the analysis. If None, + all variables provided in the consequence models are included. When + a list is provided, only variables in the list will be included. + """ + + self.log_div() + self.log_msg(f'Loading loss map for {self.loss_type}...') + + loss_map = file_io.load_data( + mapping_path, None, orientation=1, reindex=False, log=self._asmnt.log + ) + + loss_map['Driver'] = loss_map.index.values + loss_map['Consequence'] = loss_map[self.loss_type] + loss_map.index = np.arange(loss_map.shape[0]) + loss_map = loss_map.loc[:, ['Driver', 'Consequence']] + loss_map.dropna(inplace=True) + + self.loss_map = loss_map + + self.log_msg("Loss map successfully parsed.", prepend_timestamp=False) + + self.log_div() + self.log_msg(f'Loading loss parameters for {self.loss_type}...') + + # replace default flag with default data path + for d_i, data_path in enumerate(data_paths): + + if 'PelicunDefault/' in data_path: + data_paths[d_i] = data_path.replace( + 'PelicunDefault/', + f'{base.pelicun_path}/resources/SimCenterDBDL/') + + data_list = [] + # load the data files one by one + for data_path in data_paths: + data = file_io.load_data( + data_path, None, orientation=1, reindex=False, log=self._asmnt.log + ) + + data_list.append(data) + + loss_params = pd.concat(data_list, axis=0) + + # drop redefinitions of components + loss_params = loss_params.groupby( + level=[0, 1]).first().transform(lambda x: x.fillna(np.nan)) + # note: .groupby introduces None entries. We replace them with + # NaN for consistency. + + # keep only the relevant data + loss_cmp = np.unique(self.loss_map['Consequence'].values) + + available_cmp = loss_params.index.unique(level=0) + missing_cmp = [] + for cmp in loss_cmp: + if cmp not in available_cmp: + missing_cmp.append(cmp) + + if len(missing_cmp) > 0: + self.log_msg("\nWARNING: The loss model does not provide " + "consequence information for the following component(s) " + f"in the loss map: {missing_cmp}. They are removed from " + "further analysis\n", + prepend_timestamp=False) + + self.loss_map = self.loss_map.loc[ + ~loss_map['Consequence'].isin(missing_cmp)] + loss_cmp = np.unique(self.loss_map['Consequence'].values) + + loss_params = loss_params.loc[idx[loss_cmp, :], :] + + # drop unused damage states + DS_list = loss_params.columns.get_level_values(0).unique() + DS_to_drop = [] + for DS in DS_list: + if np.all(pd.isna(loss_params.loc[:, idx[DS, :]].values)) is True: + DS_to_drop.append(DS) + + loss_params.drop(columns=DS_to_drop, level=0, inplace=True) + + # convert values to internal base units + for DS in loss_params.columns.unique(level=0): + if DS.startswith('DS'): + loss_params.loc[:, DS] = self.convert_marginal_params( + loss_params.loc[:, DS].copy(), + loss_params[('DV', 'Unit')], + loss_params[('Quantity', 'Unit')] + ).values + + # check for components with incomplete loss information + cmp_incomplete_list = loss_params.loc[ + loss_params[('Incomplete', '')] == 1].index + + if len(cmp_incomplete_list) > 0: + loss_params.drop(cmp_incomplete_list, inplace=True) + + self.log_msg( + "\n" + "WARNING: Loss information is incomplete for the " + f"following component(s) {cmp_incomplete_list}. " + "They were removed from the analysis." + "\n", + prepend_timestamp=False) + + # filter decision variables, if needed + if decision_variables is not None: + + loss_params = loss_params.reorder_levels([1, 0]) + + available_DVs = loss_params.index.unique(level=0) + filtered_DVs = [] + + for DV_i in decision_variables: + + if DV_i in available_DVs: + filtered_DVs.append(DV_i) + + loss_params = loss_params.loc[filtered_DVs, :].reorder_levels([1, 0]) + + self.loss_params = loss_params.sort_index(axis=1) + + self.log_msg("Loss parameters successfully parsed.", + prepend_timestamp=False) + + def aggregate_losses(self): + """ + This is placeholder method. + + The method of aggregating the Decision Variable sample is specific to + each DV and needs to be implemented in every child of the LossModel + independently. + """ + raise NotImplementedError + + def _generate_DV_sample(self, dmg_quantities, sample_size): + """ + This is placeholder method. + + The method of sampling decision variables in Decision + Variable-specific and needs to be implemented in every child + of the LossModel independently. + """ + raise NotImplementedError + + def calculate(self): + """ + Calculate the consequences of each component block damage in + the asset. + + """ + + self.log_div() + self.log_msg("Calculating losses...") + + drivers = [d for d, _ in self.loss_map['Driver']] + + if 'DMG' in drivers: + sample_size = self._asmnt.damage.sample.shape[0] + elif 'DEM' in drivers: + sample_size = self._asmnt.demand.sample.shape[0] + else: + raise ValueError( + 'Invalid loss drivers. Check the specified loss map.') + + # First, get the damaged quantities in each damage state for + # each component of interest. + dmg_q = self._asmnt.damage.sample.copy() + + # Now sample random Decision Variables + # Note that this method is DV-specific and needs to be + # implemented in every child of the LossModel independently. + self._generate_DV_sample(dmg_q, sample_size) + + self.log_msg("Loss calculation successful.") + + +class BldgRepairModel(LossModel): + """ + Manages building repair consequence assessments. + + Parameters + ---------- + + """ + + def __init__(self, assessment): + + super().__init__(assessment) + + self.loss_type = 'BldgRepair' + + # def load_model(self, data_paths, mapping_path): + + # super().load_model(data_paths, mapping_path) + + # def calculate(self): + + # super().calculate() + + def _create_DV_RVs(self, case_list): + """ + Prepare the random variables used for repair cost and time simulation. + + Parameters + ---------- + case_list: MultiIndex + Index with cmp-loc-dir-ds descriptions that identify the RVs + we need for the simulation. + + Raises + ------ + ValueError + When any Loss Driver is not recognized. + """ + + RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) + LP = self.loss_params + + # make ds the second level in the MultiIndex + case_DF = pd.DataFrame( + index=case_list.reorder_levels([0, 4, 1, 2, 3]), columns=[0, ]) + case_DF.sort_index(axis=0, inplace=True) + driver_cmps = case_list.get_level_values(0).unique() + + rv_count = 0 + + # for each loss component + for loss_cmp_id in self.loss_map.index.values: + + # load the corresponding parameters + driver_type, driver_cmp_id = self.loss_map.loc[loss_cmp_id, 'Driver'] + conseq_cmp_id = self.loss_map.loc[loss_cmp_id, 'Consequence'] + + # currently, we only support DMG-based loss calculations + # but this will be extended in the very near future + if driver_type != 'DMG': + raise ValueError(f"Loss Driver type not recognized: " + f"{driver_type}") + + # load the parameters + # TODO: remove specific DV_type references and make the code below + # generate parameters for any DV_types provided + if (conseq_cmp_id, 'Cost') in LP.index: + cost_params = LP.loc[(conseq_cmp_id, 'Cost'), :] + else: + cost_params = None + + if (conseq_cmp_id, 'Time') in LP.index: + time_params = LP.loc[(conseq_cmp_id, 'Time'), :] + else: + time_params = None + + if (conseq_cmp_id, 'Carbon') in LP.index: + carbon_params = LP.loc[(conseq_cmp_id, 'Carbon'), :] + else: + carbon_params = None + + if (conseq_cmp_id, 'Energy') in LP.index: + energy_params = LP.loc[(conseq_cmp_id, 'Energy'), :] + else: + energy_params = None + + if driver_cmp_id not in driver_cmps: + continue + + for ds in case_DF.loc[driver_cmp_id, :].index.unique(level=0): + + if ds == '0': + continue + + if cost_params is not None: + + cost_params_DS = cost_params[f'DS{ds}'] + + cost_family = cost_params_DS.get('Family', np.nan) + cost_theta = [cost_params_DS.get(f"Theta_{t_i}", np.nan) + for t_i in range(3)] + + # If the first parameter is controlled by a function, we use + # 1.0 in its place and will scale the results in a later + # step + if '|' in str(cost_theta[0]): + # if isinstance(cost_theta[0], str): + cost_theta[0] = 1.0 + + else: + cost_family = np.nan + + if time_params is not None: + + time_params_DS = time_params[f'DS{ds}'] + + time_family = time_params_DS.get('Family', np.nan) + time_theta = [time_params_DS.get(f"Theta_{t_i}", np.nan) + for t_i in range(3)] + + # If the first parameter is controlled by a function, we use + # 1.0 in its place and will scale the results in a later + # step + if '|' in str(time_theta[0]): + # if isinstance(time_theta[0], str): + time_theta[0] = 1.0 + + else: + time_family = np.nan + + if carbon_params is not None: + + carbon_params_DS = carbon_params[f'DS{ds}'] + + carbon_family = carbon_params_DS.get('Family', np.nan) + carbon_theta = [ + carbon_params_DS.get(f"Theta_{t_i}", np.nan) + for t_i in range(3) + ] + + # If the first parameter is controlled by a function, we use + # 1.0 in its place and will scale the results in a later + # step + if '|' in str(carbon_theta[0]): + # if isinstance(carbon_theta[0], str): + carbon_theta[0] = 1.0 + + else: + carbon_family = np.nan + + if energy_params is not None: + + energy_params_DS = energy_params[f'DS{ds}'] + + energy_family = energy_params_DS.get('Family', np.nan) + energy_theta = [ + energy_params_DS.get(f"Theta_{t_i}", np.nan) + for t_i in range(3) + ] + + # If the first parameter is controlled by a function, we use + # 1.0 in its place and will scale the results in a later + # step + if '|' in str(energy_theta[0]): + # if isinstance(energy_theta[0], str): + energy_theta[0] = 1.0 + + else: + energy_family = np.nan + + # If neither of the DV_types has a stochastic model assigned, + # we do not need random variables for this DS + if ( + (pd.isna(cost_family)) + and (pd.isna(time_family)) + and (pd.isna(carbon_family)) + and (pd.isna(energy_family)) + ): + continue + + # Otherwise, load the loc-dir cases + loc_dir_uid = case_DF.loc[(driver_cmp_id, ds)].index.values + + for loc, direction, uid in loc_dir_uid: + + # assign cost RV + if pd.isna(cost_family) is False: + + cost_rv_tag = ( + f'Cost-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' + ) + + RV_reg.add_RV( + uq.RandomVariable( + name=cost_rv_tag, + distribution=cost_family, + theta=cost_theta, + truncation_limits=[0., np.nan] + ) + ) + rv_count += 1 + + # assign time RV + if pd.isna(time_family) is False: + time_rv_tag = ( + f'Time-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' + ) + + RV_reg.add_RV(uq.RandomVariable( + name=time_rv_tag, + distribution=time_family, + theta=time_theta, + truncation_limits=[0., np.nan] + )) + rv_count += 1 + + # assign time RV + if pd.isna(carbon_family) is False: + carbon_rv_tag = ( + f'Carbon-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' + ) + + RV_reg.add_RV(uq.RandomVariable( + name=carbon_rv_tag, + distribution=carbon_family, + theta=carbon_theta, + truncation_limits=[0., np.nan] + )) + rv_count += 1 + + # assign time RV + if pd.isna(energy_family) is False: + energy_rv_tag = ( + f'Energy-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' + ) + + RV_reg.add_RV(uq.RandomVariable( + name=energy_rv_tag, + distribution=energy_family, + theta=energy_theta, + truncation_limits=[0., np.nan] + )) + rv_count += 1 + + # assign correlation between RVs across DV_types + # TODO: add more DV_types and handle cases with only a + # subset of them being defined + if ((pd.isna(cost_family) is False) and ( + pd.isna(time_family) is False) and ( + self._asmnt.options.rho_cost_time != 0.0)): + + rho = self._asmnt.options.rho_cost_time + + RV_reg.add_RV_set(uq.RandomVariableSet( + f'DV-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}_set', + list(RV_reg.RVs([cost_rv_tag, time_rv_tag]).values()), + np.array([[1.0, rho], [rho, 1.0]]))) + + self.log_msg(f"\n{rv_count} random variables created.", + prepend_timestamp=False) + + if rv_count > 0: + return RV_reg + # else: + return None + + def _calc_median_consequence(self, eco_qnt): + """ + Calculate the median repair consequence for each loss component. + + """ + + medians = {} + + DV_types = self.loss_params.index.unique(level=1) + + # for DV_type, DV_type_scase in zip(['COST', 'TIME'], ['Cost', 'Time']): + for DV_type in DV_types: + + cmp_list = [] + median_list = [] + + for loss_cmp_id in self.loss_map.index: + + driver_type, driver_cmp = self.loss_map.loc[ + loss_cmp_id, 'Driver'] + loss_cmp_name = self.loss_map.loc[loss_cmp_id, 'Consequence'] + + # check if the given DV type is available as an output for the + # selected component + if (loss_cmp_name, DV_type) not in self.loss_params.index: + continue + + if driver_type != 'DMG': + raise ValueError(f"Loss Driver type not recognized: " + f"{driver_type}") + + if driver_cmp not in eco_qnt.columns.get_level_values( + 0).unique(): + continue + + ds_list = [] + sub_medians = [] + + for ds in self.loss_params.columns.get_level_values(0).unique(): + + if not ds.startswith('DS'): + continue + + ds_id = ds[2:] + + if ds_id == '0': + continue + + loss_params_DS = self.loss_params.loc[ + (loss_cmp_name, DV_type), + ds] + + # check if theta_0 is defined + theta_0 = loss_params_DS.get('Theta_0', np.nan) + + if pd.isna(theta_0): + continue + + # check if the distribution type is supported + family = loss_params_DS.get('Family', np.nan) + + if ((not pd.isna(family)) and ( + family not in [ + 'normal', 'lognormal', 'deterministic'])): + raise ValueError(f"Loss Distribution of type {family} " + f"not supported.") + + # If theta_0 is a scalar + try: + theta_0 = float(theta_0) + + if pd.isna(loss_params_DS.get('Family', np.nan)): + + # if theta_0 is constant, then use it directly + f_median = prep_constant_median_DV(theta_0) + + else: + + # otherwise use a constant 1.0 as the median + # The random variable will be generated as a + # variation from this 1.0 and added in a later step. + f_median = prep_constant_median_DV(1.0) + + except ValueError: + + # otherwise, use the multilinear function + all_vals = np.array( + [val.split(',') for val in theta_0.split('|')], + dtype=float) + medns = all_vals[0] + qnts = all_vals[1] + f_median = prep_bounded_multilinear_median_DV( + medns, qnts) + + # get the corresponding aggregate damage quantities + # to consider economies of scale + if 'ds' in eco_qnt.columns.names: + + avail_ds = ( + eco_qnt.loc[:, driver_cmp].columns.unique(level=0)) + + if (ds_id not in avail_ds): + continue + + eco_qnt_i = eco_qnt.loc[:, (driver_cmp, ds_id)].copy() + + else: + eco_qnt_i = eco_qnt.loc[:, driver_cmp].copy() + + if isinstance(eco_qnt_i, pd.Series): + eco_qnt_i = eco_qnt_i.to_frame() + eco_qnt_i.columns = ['X'] + eco_qnt_i.columns.name = 'del' + + # generate the median values for each realization + eco_qnt_i.loc[:, :] = f_median(eco_qnt_i.values) + + sub_medians.append(eco_qnt_i) + ds_list.append(ds_id) + + if len(ds_list) > 0: + + # combine medians across damage states into one DF + median_list.append(pd.concat(sub_medians, axis=1, + keys=ds_list)) + cmp_list.append(loss_cmp_id) + + if len(cmp_list) > 0: + + # combine medians across components into one DF + result = pd.concat(median_list, axis=1, keys=cmp_list) + + # remove the extra column header level + if 'del' in result.columns.names: + result.columns = result.columns.droplevel('del') + + # name the remaining column header levels + if self._asmnt.options.eco_scale["AcrossFloors"] is True: + result.columns.names = ['cmp', 'ds'] + + else: + result.columns.names = ['cmp', 'ds', 'loc'] + + # save the results to the returned dictionary + medians.update({DV_type: result}) + + return medians + + def aggregate_losses(self): + """ + Aggregates repair consequences across components. + + Repair costs are simply summed up for each realization while repair + times are aggregated to provide lower and upper limits of the total + repair time using the assumption of parallel and sequential repair of + floors, respectively. Repairs within each floor are assumed to occur + sequentially. + """ + + self.log_div() + self.log_msg("Aggregating repair consequences...") + + DV = self.sample + + # group results by DV type and location + DVG = DV.groupby(level=[0, 4], axis=1).sum() + + # create the summary DF + df_agg = pd.DataFrame(index=DV.index, + columns=['repair_cost', + 'repair_time-parallel', + 'repair_time-sequential', + 'repair_carbon', + 'repair_energy']) + + if 'Cost' in DVG.columns: + df_agg['repair_cost'] = DVG['Cost'].sum(axis=1) + else: + df_agg = df_agg.drop('repair_cost', axis=1) + + if 'Time' in DVG.columns: + df_agg['repair_time-sequential'] = DVG['Time'].sum(axis=1) + + df_agg['repair_time-parallel'] = DVG['Time'].max(axis=1) + else: + df_agg = df_agg.drop(['repair_time-parallel', + 'repair_time-sequential'], + axis=1) + + if 'Carbon' in DVG.columns: + df_agg['repair_carbon'] = DVG['Carbon'].sum(axis=1) + else: + df_agg = df_agg.drop('repair_carbon', axis=1) + + if 'Energy' in DVG.columns: + df_agg['repair_energy'] = DVG['Energy'].sum(axis=1) + else: + df_agg = df_agg.drop('repair_energy', axis=1) + + # convert units + + cmp_units = self.loss_params[('DV', 'Unit')].groupby(level=[1, ]).agg( + lambda x: x.value_counts().index[0]) + + dv_units = pd.Series(index=df_agg.columns, name='Units', dtype='object') + + if 'Cost' in DVG.columns: + dv_units['repair_cost'] = cmp_units['Cost'] + + if 'Time' in DVG.columns: + dv_units['repair_time-parallel'] = cmp_units['Time'] + dv_units['repair_time-sequential'] = cmp_units['Time'] + + if 'Carbon' in DVG.columns: + dv_units['repair_carbon'] = cmp_units['Carbon'] + + if 'Energy' in DVG.columns: + dv_units['repair_energy'] = cmp_units['Energy'] + + df_agg = file_io.save_to_csv( + df_agg, None, units=dv_units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + use_simpleindex=False, + log=self._asmnt.log) + + df_agg.drop("Units", inplace=True) + + # convert header + + df_agg = base.convert_to_MultiIndex(df_agg, axis=1) + + self.log_msg("Repair consequences successfully aggregated.") + + return df_agg.astype(float) + + def _generate_DV_sample(self, dmg_quantities, sample_size): + """ + Generate a sample of repair costs and times. + + Parameters + ---------- + dmg_quantities: DataFrame + A table with the quantity of damage experienced in each damage state + of each performance group at each location and direction. You can use + the prepare_dmg_quantities method in the DamageModel to get such a + DF. + sample_size: integer + The number of realizations to generate. + + Raises + ------ + ValueError + When any Loss Driver is not recognized. + """ + + # calculate the quantities for economies of scale + self.log_msg("\nAggregating damage quantities...", + prepend_timestamp=False) + + if self._asmnt.options.eco_scale["AcrossFloors"]: + + if self._asmnt.options.eco_scale["AcrossDamageStates"]: + + eco_levels = [0, ] + eco_columns = ['cmp', ] + + else: + + eco_levels = [0, 4] + eco_columns = ['cmp', 'ds'] + + elif self._asmnt.options.eco_scale["AcrossDamageStates"]: + + eco_levels = [0, 1] + eco_columns = ['cmp', 'loc'] + + else: + + eco_levels = [0, 1, 4] + eco_columns = ['cmp', 'loc', 'ds'] + + eco_group = dmg_quantities.groupby(level=eco_levels, axis=1) + eco_qnt = eco_group.sum().mask(eco_group.count() == 0, np.nan) + assert eco_qnt.columns.names == eco_columns + + self.log_msg("Successfully aggregated damage quantities.", + prepend_timestamp=False) + + # apply the median functions, if needed, to get median consequences for + # each realization + self.log_msg("\nCalculating the median repair consequences...", + prepend_timestamp=False) + + medians = self._calc_median_consequence(eco_qnt) + + self.log_msg("Successfully determined median repair consequences.", + prepend_timestamp=False) + + # combine the median consequences with the samples of deviation from the + # median to get the consequence realizations. + self.log_msg("\nConsidering deviations from the median values to obtain " + "random DV sample...") + + self.log_msg("Preparing random variables for repair cost and time...", + prepend_timestamp=False) + RV_reg = self._create_DV_RVs(dmg_quantities.columns) + + if RV_reg is not None: + RV_reg.generate_sample( + sample_size=sample_size, method=self._asmnt.options.sampling_method) + + std_sample = base.convert_to_MultiIndex( + pd.DataFrame(RV_reg.RV_sample), axis=1).sort_index(axis=1) + std_sample.columns.names = ['dv', 'cmp', 'ds', 'loc', 'dir', 'uid'] + + # convert column names to int + std_idx = std_sample.columns.levels + + std_sample.columns = std_sample.columns.set_levels( + [ + std_idx[0], + std_idx[1].astype(int), + std_idx[2], + std_idx[3], + std_idx[4], + std_idx[5], + ] + ) + + std_sample.sort_index(axis=1, inplace=True) + + else: + std_sample = None + + self.log_msg(f"\nSuccessfully generated {sample_size} realizations of " + "deviation from the median consequences.", + prepend_timestamp=False) + + res_list = [] + key_list = [] + + dmg_quantities.columns = dmg_quantities.columns.reorder_levels( + [0, 4, 1, 2, 3] + ) + dmg_quantities.sort_index(axis=1, inplace=True) + + DV_types = self.loss_params.index.unique(level=1) + + if isinstance(std_sample, pd.DataFrame): + std_DV_types = std_sample.columns.unique(level=0) + else: + std_DV_types = [] + + # for DV_type, _ in zip(['COST', 'TIME'], ['Cost', 'Time']): + for DV_type in DV_types: + + if DV_type in std_DV_types: + prob_cmp_list = std_sample[DV_type].columns.unique(level=0) + else: + prob_cmp_list = [] + + cmp_list = [] + + if DV_type not in medians: + continue + + for cmp_i in medians[DV_type].columns.unique(level=0): + + # check if there is damage in the component + driver_type, dmg_cmp_i = self.loss_map.loc[cmp_i, 'Driver'] + loss_cmp_i = self.loss_map.loc[cmp_i, 'Consequence'] + + if driver_type != 'DMG': + raise ValueError(f"Loss Driver type not " + f"recognized: {driver_type}") + + if not (dmg_cmp_i + in dmg_quantities.columns.unique(level=0)): + continue + + ds_list = [] + + for ds in medians[DV_type].loc[:, cmp_i].columns.unique(level=0): + + loc_list = [] + + for loc_id, loc in enumerate( + dmg_quantities.loc[ + :, (dmg_cmp_i, ds)].columns.unique(level=0)): + + if ((self._asmnt.options.eco_scale[ + "AcrossFloors"] is True) and ( + loc_id > 0)): + break + + if self._asmnt.options.eco_scale["AcrossFloors"] is True: + median_i = medians[DV_type].loc[:, (cmp_i, ds)] + dmg_i = dmg_quantities.loc[:, (dmg_cmp_i, ds)] + + if cmp_i in prob_cmp_list: + std_i = std_sample.loc[:, (DV_type, cmp_i, ds)] + else: + std_i = None + + else: + median_i = medians[DV_type].loc[:, (cmp_i, ds, loc)] + dmg_i = dmg_quantities.loc[:, (dmg_cmp_i, ds, loc)] + + if cmp_i in prob_cmp_list: + std_i = std_sample.loc[:, (DV_type, cmp_i, ds, loc)] + else: + std_i = None + + if std_i is not None: + res_list.append(dmg_i.mul(median_i, axis=0) * std_i) + else: + res_list.append(dmg_i.mul(median_i, axis=0)) + + loc_list.append(loc) + + if self._asmnt.options.eco_scale["AcrossFloors"] is True: + ds_list += [ds, ] + else: + ds_list += [(ds, loc) for loc in loc_list] + + if self._asmnt.options.eco_scale["AcrossFloors"] is True: + cmp_list += [(loss_cmp_i, dmg_cmp_i, ds) for ds in ds_list] + else: + cmp_list += [ + (loss_cmp_i, dmg_cmp_i, ds, loc) for ds, loc in ds_list] + + if self._asmnt.options.eco_scale["AcrossFloors"] is True: + key_list += [(DV_type, loss_cmp_i, dmg_cmp_i, ds) + for loss_cmp_i, dmg_cmp_i, ds in cmp_list] + else: + key_list += [(DV_type, loss_cmp_i, dmg_cmp_i, ds, loc) + for loss_cmp_i, dmg_cmp_i, ds, loc in cmp_list] + + lvl_names = ['dv', 'loss', 'dmg', 'ds', 'loc', 'dir', 'uid'] + DV_sample = pd.concat(res_list, axis=1, keys=key_list, + names=lvl_names) + + DV_sample = DV_sample.fillna(0).convert_dtypes() + DV_sample.columns.names = lvl_names + + # Get the flags for replacement consequence trigger + DV_sum = DV_sample.groupby(level=[1, ], axis=1).sum() + if 'replacement' in DV_sum.columns: + + # When the 'replacement' consequence is triggered, all + # local repair consequences are discarded. Note that + # global consequences are assigned to location '0'. + + id_replacement = DV_sum['replacement'] > 0 + + # get the list of non-zero locations + locs = DV_sample.columns.get_level_values(4).unique().values + + locs = locs[locs != '0'] + + DV_sample.loc[id_replacement, idx[:, :, :, :, locs]] = 0.0 + + self._sample = DV_sample + + self.log_msg("Successfully obtained DV sample.", + prepend_timestamp=False) + + +def prep_constant_median_DV(median): + """ + Returns a constant median Decision Variable (DV) function. + + Parameters + ---------- + median: float + The median DV for a consequence function with fixed median. + + Returns + ------- + f: callable + A function that returns the constant median DV for all component + quantities. + """ + def f(*args): + # pylint: disable=unused-argument + return median + + return f + + +def prep_bounded_multilinear_median_DV(medians, quantities): + """ + Returns a bounded multilinear median Decision Variable (DV) function. + + The median DV equals the min and max values when the quantity is + outside of the prescribed quantity bounds. When the quantity is within the + bounds, the returned median is calculated by linear interpolation. + + Parameters + ---------- + medians: ndarray + Series of values that define the y coordinates of the multilinear DV + function. + quantities: ndarray + Series of values that define the component quantities corresponding to + the series of medians and serving as the x coordinates of the + multilinear DV function. + + Returns + ------- + f: callable + A function that returns the median DV given the quantity of damaged + components. + """ + def f(quantity): + if quantity is None: + raise ValueError( + 'A bounded linear median Decision Variable function called ' + 'without specifying the quantity of damaged components') + + q_array = np.asarray(quantity, dtype=np.float64) + + # calculate the median consequence given the quantity of damaged + # components + output = np.interp(q_array, quantities, medians) + + return output + + return f diff --git a/pelicun/model/pelicun_model.py b/pelicun/model/pelicun_model.py new file mode 100644 index 000000000..a90c009da --- /dev/null +++ b/pelicun/model/pelicun_model.py @@ -0,0 +1,227 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +This file defines the PelicunModel object and its methods. + +.. rubric:: Contents + +.. autosummary:: + + PelicunModel + +""" + +import numpy as np +import pandas as pd +from .. import base +from .. import uq + + +idx = base.idx + + +class PelicunModel: + """ + Generic model class to manage methods shared between all models in Pelicun. + + """ + + def __init__(self, assessment): + + # link the PelicunModel object to its Assessment object + self._asmnt = assessment + + # link logging methods as attributes enabling more + # concise syntax + self.log_msg = self._asmnt.log.msg + self.log_div = self._asmnt.log.div + + def convert_marginal_params(self, marginal_params, units, arg_units=None): + """ + Converts the parameters of marginal distributions in a model to SI units. + + Parameters + ---------- + marginal_params: DataFrame + Each row corresponds to a marginal distribution with Theta + parameters and TruncateLower, TruncateUpper truncation limits + identified in separate columns. + units: Series + Identifies the input units of each marginal. The index shall be + identical to the index of the marginal_params argument. The values + are strings that correspond to the units listed in base.py. + arg_units: Series + Identifies the size of a reference entity for the marginal + parameters. For example, when the parameters refer to a component + repair cost, the reference size is the component block size the + repair cost corresponds to. When the parameters refer to a capacity, + demand, or component quantity, the reference size can be omitted + and the default value will ensure that the corresponding scaling is + skipped. This Series provides the units of the reference entities + for each component. Use '1 EA' if you want to skip such scaling for + select components but provide arg units for others. + + Returns + ------- + marginal_params: DataFrame + Same structure as the input DataFrame but with values scaled to + represent internal Standard International units. + + """ + assert np.all(marginal_params.index == units.index) + if arg_units is not None: + assert np.all( + marginal_params.index == arg_units.index) + + # preserve the columns in the input marginal_params + original_cols = marginal_params.columns + + # add extra columns if they are not available in the marginals + for col_name in ('Family', + 'Theta_0', 'Theta_1', 'Theta_2', + 'TruncateLower', 'TruncateUpper'): + if col_name not in marginal_params.columns: + + marginal_params[col_name] = np.nan + + # get a list of unique units + unique_units = units.unique() + + # for each unit + for unit_name in unique_units: + + # get the scale factor for converting from the source unit + unit_factor = self._asmnt.calc_unit_scale_factor(unit_name) + + # get the variables that use the given unit + unit_ids = marginal_params.loc[units == unit_name].index + + # for each variable + for row_id in unit_ids: + + # pull the parameters of the marginal distribution + family = marginal_params.at[row_id, 'Family'] + + if family == 'empirical': + continue + + # load the theta values + theta = marginal_params.loc[ + row_id, ['Theta_0', 'Theta_1', 'Theta_2']].values + + # for each theta + args = [] + for t_i, theta_i in enumerate(theta): + + # if theta_i evaluates to NaN, it is considered undefined + if pd.isna(theta_i): + args.append([]) + continue + + try: + # if theta is a scalar, just store it + theta[t_i] = float(theta_i) + args.append([]) + + except ValueError: + + # otherwise, we assume it is a string using SimCenter + # array notation to identify coordinates of a + # multilinear function + values = [val.split(',') for val in theta_i.split('|')] + + # the first set of values defines the ordinates that + # need to be passed to the distribution scaling method + theta[t_i] = np.array(values[0], dtype=float) + + # the second set of values defines the abscissae that + # we will use after the distribution scaling + args.append(np.array(values[1], dtype=float)) + + # load the truncation limits + tr_limits = marginal_params.loc[ + row_id, ['TruncateLower', 'TruncateUpper']] + + arg_unit_factor = 1.0 + + # check if there is a need to scale due to argument units + if not (arg_units is None): + + # get the argument unit for the given marginal + arg_unit = arg_units.get(row_id) + + if arg_unit != '1 EA': + + # get the scale factor + arg_unit_factor = self._asmnt.calc_unit_scale_factor( + arg_unit + ) + + # scale arguments, if needed + for a_i, arg in enumerate(args): + + if isinstance(arg, np.ndarray): + args[a_i] = arg * arg_unit_factor + + # convert the distribution parameters to SI + theta, tr_limits = uq.scale_distribution( + unit_factor / arg_unit_factor, family, theta, tr_limits) + + # convert multilinear function parameters back into strings + for a_i, arg in enumerate(args): + + if len(arg) > 0: + + theta[a_i] = '|'.join( + [','.join([f'{val:g}' for val in vals]) + for vals in (theta[a_i], args[a_i])]) + + # and update the values in the DF + marginal_params.loc[ + row_id, ['Theta_0', 'Theta_1', 'Theta_2']] = theta + + marginal_params.loc[ + row_id, ['TruncateLower', 'TruncateUpper']] = tr_limits + + # remove the added columns + marginal_params = marginal_params[original_cols] + + return marginal_params diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index 1834a8a2c..4fae4f23d 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -1948,7 +1948,7 @@ def test__generate_DV_sample(self, bldg_repair_model): class TestModelFunctions: def test_prep_constant_median_DV(self): median = 10.00 - constant_median_DV = model.prep_constant_median_DV(median) + constant_median_DV = model.loss_model.prep_constant_median_DV(median) assert constant_median_DV() == median values = (1.0, 2.0, 3.0, 4.0, 5.0) for value in values: @@ -1957,7 +1957,7 @@ def test_prep_constant_median_DV(self): def test_prep_bounded_multilinear_median_DV(self): medians = np.array((1.00, 2.00, 3.00, 4.00, 5.00)) quantities = np.array((0.00, 1.00, 2.00, 3.00, 4.00)) - f = model.prep_bounded_multilinear_median_DV(medians, quantities) + f = model.loss_model.prep_bounded_multilinear_median_DV(medians, quantities) result = f(2.5) expected = 3.5 From e211f2f0c7e2ee9a67dd99e985de84e0f25839be Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Fri, 1 Mar 2024 05:26:05 -0800 Subject: [PATCH 02/48] Code formatting This commit only contains automated formatting changes (using `black`). --- pelicun/model/asset_model.py | 196 ++++++++++------- pelicun/model/damage_model.py | 387 ++++++++++++++++++--------------- pelicun/model/demand_model.py | 320 +++++++++++++++------------ pelicun/model/loss_model.py | 374 +++++++++++++++++-------------- pelicun/model/pelicun_model.py | 47 ++-- 5 files changed, 741 insertions(+), 583 deletions(-) diff --git a/pelicun/model/asset_model.py b/pelicun/model/asset_model.py index 6c4ab94ea..12a8d220f 100644 --- a/pelicun/model/asset_model.py +++ b/pelicun/model/asset_model.py @@ -71,7 +71,6 @@ class AssetModel(PelicunModel): """ def __init__(self, assessment): - super().__init__(assessment) self.cmp_marginal_params = None @@ -88,7 +87,6 @@ def cmp_sample(self): """ if self._cmp_sample is None: - cmp_sample = pd.DataFrame(self._cmp_RVs.RV_sample) cmp_sample.sort_index(axis=0, inplace=True) cmp_sample.sort_index(axis=1, inplace=True) @@ -123,14 +121,19 @@ def save_cmp_sample(self, filepath=None, save_units=False): units.loc[cmp_id, :] = unit_name res = file_io.save_to_csv( - sample, filepath, units=units, + sample, + filepath, + units=units, unit_conversion_factors=self._asmnt.unit_conversion_factors, use_simpleindex=(filepath is not None), - log=self._asmnt.log) + log=self._asmnt.log, + ) if filepath is not None: - self.log_msg('Asset components sample successfully saved.', - prepend_timestamp=False) + self.log_msg( + 'Asset components sample successfully saved.', + prepend_timestamp=False, + ) return None # else: units = res.loc["Units"] @@ -151,8 +154,11 @@ def load_cmp_sample(self, filepath): self.log_msg('Loading asset components sample...') sample, units = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, - return_units=True, log=self._asmnt.log) + filepath, + self._asmnt.unit_conversion_factors, + return_units=True, + log=self._asmnt.log, + ) sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] @@ -160,8 +166,9 @@ def load_cmp_sample(self, filepath): self.cmp_units = units.groupby(level=0).first() - self.log_msg('Asset components sample successfully loaded.', - prepend_timestamp=False) + self.log_msg( + 'Asset components sample successfully loaded.', prepend_timestamp=False + ) def load_cmp_model(self, data_source): """ @@ -179,13 +186,15 @@ def load_cmp_model(self, data_source): """ def get_locations(loc_str): - try: res = str(int(loc_str)) - return np.array([res, ]) + return np.array( + [ + res, + ] + ) except ValueError as exc: - stories = self._asmnt.stories if "--" in loc_str: @@ -201,26 +210,37 @@ def get_locations(loc_str): return np.arange(1, stories + 1).astype(str) if loc_str == "top": - return np.array([stories, ]).astype(str) + return np.array( + [ + stories, + ] + ).astype(str) if loc_str == "roof": - return np.array([stories + 1, ]).astype(str) + return np.array( + [ + stories + 1, + ] + ).astype(str) - raise ValueError(f"Cannot parse location string: " - f"{loc_str}") from exc + raise ValueError( + f"Cannot parse location string: " f"{loc_str}" + ) from exc def get_directions(dir_str): - if pd.isnull(dir_str): return np.ones(1).astype(str) # else: try: res = str(int(dir_str)) - return np.array([res, ]) + return np.array( + [ + res, + ] + ) except ValueError as exc: - if "," in dir_str: return np.array(dir_str.split(','), dtype=int).astype(str) @@ -228,27 +248,24 @@ def get_directions(dir_str): d_low, d_high = dir_str.split('--') d_low = get_directions(d_low) d_high = get_directions(d_high) - return np.arange( - int(d_low[0]), int(d_high[0]) + 1).astype(str) + return np.arange(int(d_low[0]), int(d_high[0]) + 1).astype(str) # else: - raise ValueError(f"Cannot parse direction string: " - f"{dir_str}") from exc + raise ValueError( + f"Cannot parse direction string: " f"{dir_str}" + ) from exc def get_attribute(attribute_str, dtype=float, default=np.nan): - if pd.isnull(attribute_str): return default # else: try: - res = dtype(attribute_str) return res except ValueError as exc: - if "," in attribute_str: # a list of weights w = np.array(attribute_str.split(','), dtype=float) @@ -257,8 +274,9 @@ def get_attribute(attribute_str, dtype=float, default=np.nan): return w / np.sum(w) # else: - raise ValueError(f"Cannot parse Blocks string: " - f"{attribute_str}") from exc + raise ValueError( + f"Cannot parse Blocks string: {attribute_str}" + ) from exc self.log_div() self.log_msg('Loading component model...') @@ -288,15 +306,20 @@ def get_attribute(attribute_str, dtype=float, default=np.nan): marginal_params = pd.concat([marginal_params, units], axis=1) cmp_marginal_param_dct = { - 'Family': [], 'Theta_0': [], 'Theta_1': [], 'Theta_2': [], - 'TruncateLower': [], 'TruncateUpper': [], 'Blocks': [], - 'Units': [] + 'Family': [], + 'Theta_0': [], + 'Theta_1': [], + 'Theta_2': [], + 'TruncateLower': [], + 'TruncateUpper': [], + 'Blocks': [], + 'Units': [], } index_list = [] for row in marginal_params.itertuples(): locs = get_locations(row.Location) dirs = get_directions(row.Direction) - indices = list(product((row.Index, ), locs, dirs)) + indices = list(product((row.Index,), locs, dirs)) num_vals = len(indices) for col, cmp_marginal_param in cmp_marginal_param_dct.items(): if col == 'Blocks': @@ -311,13 +334,9 @@ def get_attribute(attribute_str, dtype=float, default=np.nan): * num_vals ) elif col == 'Units': - cmp_marginal_param.extend( - [self.cmp_units[row.Index]] * num_vals - ) + cmp_marginal_param.extend([self.cmp_units[row.Index]] * num_vals) elif col == 'Family': - cmp_marginal_param.extend( - [getattr(row, col, np.nan)] * num_vals - ) + cmp_marginal_param.extend([getattr(row, col, np.nan)] * num_vals) else: cmp_marginal_param.extend( [get_attribute(getattr(row, col, np.nan))] * num_vals @@ -325,32 +344,40 @@ def get_attribute(attribute_str, dtype=float, default=np.nan): index_list.extend(indices) index = pd.MultiIndex.from_tuples(index_list, names=['cmp', 'loc', 'dir']) dtypes = { - 'Family': object, 'Theta_0': float, 'Theta_1': float, - 'Theta_2': float, 'TruncateLower': float, - 'TruncateUpper': float, 'Blocks': int, 'Units': object + 'Family': object, + 'Theta_0': float, + 'Theta_1': float, + 'Theta_2': float, + 'TruncateLower': float, + 'TruncateUpper': float, + 'Blocks': int, + 'Units': object, } cmp_marginal_param_series = [] for col, cmp_marginal_param in cmp_marginal_param_dct.items(): cmp_marginal_param_series.append( pd.Series( - cmp_marginal_param, - dtype=dtypes[col], name=col, index=index)) + cmp_marginal_param, dtype=dtypes[col], name=col, index=index + ) + ) - cmp_marginal_params = pd.concat( - cmp_marginal_param_series, axis=1 - ) + cmp_marginal_params = pd.concat(cmp_marginal_param_series, axis=1) assert not cmp_marginal_params['Theta_0'].isnull().values.any() cmp_marginal_params.dropna(axis=1, how='all', inplace=True) - self.log_msg("Model parameters successfully parsed. " - f"{cmp_marginal_params.shape[0]} performance groups identified", - prepend_timestamp=False) + self.log_msg( + "Model parameters successfully parsed. " + f"{cmp_marginal_params.shape[0]} performance groups identified", + prepend_timestamp=False, + ) # Now we can take care of converting the values to base units - self.log_msg("Converting model parameters to internal units...", - prepend_timestamp=False) + self.log_msg( + "Converting model parameters to internal units...", + prepend_timestamp=False, + ) # ensure that the index has unique entries by introducing an # internal component uid @@ -362,12 +389,14 @@ def get_attribute(attribute_str, dtype=float, default=np.nan): self.cmp_marginal_params = cmp_marginal_params.drop('Units', axis=1) - self.log_msg("Model parameters successfully loaded.", - prepend_timestamp=False) + self.log_msg( + "Model parameters successfully loaded.", prepend_timestamp=False + ) - self.log_msg("\nComponent model marginal distributions:\n" - + str(cmp_marginal_params), - prepend_timestamp=False) + self.log_msg( + "\nComponent model marginal distributions:\n" + str(cmp_marginal_params), + prepend_timestamp=False, + ) # the empirical data and correlation files can be added later, if needed @@ -381,22 +410,28 @@ def _create_cmp_RVs(self): # add a random variable for each component quantity variable for rv_params in self.cmp_marginal_params.itertuples(): - cmp = rv_params.Index # create a random variable and add it to the registry - RV_reg.add_RV(uq.RandomVariable( - name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', - distribution=getattr(rv_params, "Family", np.nan), - theta=[getattr(rv_params, f"Theta_{t_i}", np.nan) - for t_i in range(3)], - truncation_limits=[getattr(rv_params, f"Truncate{side}", np.nan) - for side in ("Lower", "Upper")], - )) - - self.log_msg(f"\n{self.cmp_marginal_params.shape[0]} " - "random variables created.", - prepend_timestamp=False) + RV_reg.add_RV( + uq.RandomVariable( + name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', + distribution=getattr(rv_params, "Family", np.nan), + theta=[ + getattr(rv_params, f"Theta_{t_i}", np.nan) + for t_i in range(3) + ], + truncation_limits=[ + getattr(rv_params, f"Truncate{side}", np.nan) + for side in ("Lower", "Upper") + ], + ) + ) + + self.log_msg( + f"\n{self.cmp_marginal_params.shape[0]} random variables created.", + prepend_timestamp=False, + ) self._cmp_RVs = RV_reg @@ -408,9 +443,11 @@ def generate_cmp_sample(self, sample_size=None): """ if self.cmp_marginal_params is None: - raise ValueError('Model parameters have not been specified. Load' - 'parameters from a file before generating a ' - 'sample.') + raise ValueError( + 'Model parameters have not been specified. Load' + 'parameters from a file before generating a ' + 'sample.' + ) self.log_div() self.log_msg('Generating sample from component quantity variables...') @@ -420,17 +457,20 @@ def generate_cmp_sample(self, sample_size=None): raise ValueError( 'Sample size was not specified, ' 'and it cannot be determined from ' - 'the demand model.') + 'the demand model.' + ) sample_size = self._asmnt.demand.sample.shape[0] self._create_cmp_RVs() self._cmp_RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) + sample_size=sample_size, method=self._asmnt.options.sampling_method + ) # replace the potentially existing sample with the generated one self._cmp_sample = None - self.log_msg(f"\nSuccessfully generated {sample_size} realizations.", - prepend_timestamp=False) + self.log_msg( + f"\nSuccessfully generated {sample_size} realizations.", + prepend_timestamp=False, + ) diff --git a/pelicun/model/damage_model.py b/pelicun/model/damage_model.py index 0035e3f3b..cc30bf78b 100644 --- a/pelicun/model/damage_model.py +++ b/pelicun/model/damage_model.py @@ -87,7 +87,6 @@ class DamageModel(PelicunModel): """ def __init__(self, assessment): - super().__init__(assessment) self.damage_params = None @@ -102,21 +101,25 @@ def save_sample(self, filepath=None, save_units=False): self.log_msg('Saving damage sample...') cmp_units = self._asmnt.asset.cmp_units - qnt_units = pd.Series(index=self.sample.columns, name='Units', - dtype='object') + qnt_units = pd.Series( + index=self.sample.columns, name='Units', dtype='object' + ) for cmp in cmp_units.index: qnt_units.loc[cmp] = cmp_units.loc[cmp] res = file_io.save_to_csv( - self.sample, filepath, + self.sample, + filepath, units=qnt_units, unit_conversion_factors=self._asmnt.unit_conversion_factors, use_simpleindex=(filepath is not None), - log=self._asmnt.log) + log=self._asmnt.log, + ) if filepath is not None: - self.log_msg('Damage sample successfully saved.', - prepend_timestamp=False) + self.log_msg( + 'Damage sample successfully saved.', prepend_timestamp=False + ) return None # else: @@ -138,14 +141,13 @@ def load_sample(self, filepath): self.log_msg('Loading damage sample...') self.sample = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, - log=self._asmnt.log) + filepath, self._asmnt.unit_conversion_factors, log=self._asmnt.log + ) # set the names of the columns self.sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'ds'] - self.log_msg('Damage sample successfully loaded.', - prepend_timestamp=False) + self.log_msg('Damage sample successfully loaded.', prepend_timestamp=False) def load_damage_model(self, data_paths): """ @@ -163,7 +165,6 @@ def load_damage_model(self, data_paths): # replace default flag with default data path for d_i, data_path in enumerate(data_paths): - if 'PelicunDefault/' in data_path: data_paths[d_i] = data_path.replace( 'PelicunDefault/', @@ -173,7 +174,6 @@ def load_damage_model(self, data_paths): data_list = [] # load the data files one by one for data_path in data_paths: - data = file_io.load_data( data_path, None, orientation=1, reindex=False, log=self._asmnt.log ) @@ -195,22 +195,22 @@ def load_damage_model(self, data_paths): damage_params = damage_params.loc[cmp_mask, :] if np.sum(cmp_mask) != len(cmp_unique): - cmp_list = cmp_unique[ - np.isin(cmp_unique, damage_params.index.values, - invert=True)].to_list() + np.isin(cmp_unique, damage_params.index.values, invert=True) + ].to_list() - self.log_msg("\nWARNING: The damage model does not provide " - "vulnerability information for the following component(s) " - f"in the asset model: {cmp_list}.\n", - prepend_timestamp=False) + self.log_msg( + "\nWARNING: The damage model does not provide " + "vulnerability information for the following component(s) " + f"in the asset model: {cmp_list}.\n", + prepend_timestamp=False, + ) # TODO: load defaults for Demand-Offset and Demand-Directional # Now convert model parameters to base units for LS_i in damage_params.columns.unique(level=0): if LS_i.startswith('LS'): - damage_params.loc[:, LS_i] = self.convert_marginal_params( damage_params.loc[:, LS_i].copy(), damage_params[('Demand', 'Unit')], @@ -218,20 +218,24 @@ def load_damage_model(self, data_paths): # check for components with incomplete damage model information cmp_incomplete_list = damage_params.loc[ - damage_params[('Incomplete', '')] == 1].index + damage_params[('Incomplete', '')] == 1 + ].index damage_params.drop(cmp_incomplete_list, inplace=True) if len(cmp_incomplete_list) > 0: - self.log_msg(f"\nWARNING: Damage model information is incomplete for " - f"the following component(s) {cmp_incomplete_list}. They " - f"were removed from the analysis.\n", - prepend_timestamp=False) + self.log_msg( + f"\nWARNING: Damage model information is incomplete for " + f"the following component(s) {cmp_incomplete_list}. They " + f"were removed from the analysis.\n", + prepend_timestamp=False, + ) self.damage_params = damage_params - self.log_msg("Damage model parameters successfully parsed.", - prepend_timestamp=False) + self.log_msg( + "Damage model parameters successfully parsed.", prepend_timestamp=False + ) def _handle_operation(self, initial_value, operation, other_value): """ @@ -306,7 +310,6 @@ def assign_lsds(ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag): # If the limit state has a single damage state assigned # to it, we don't need random sampling if pd.isnull(ds_weights): - ds_id += 1 lsds_RV_reg.add_RV( @@ -319,7 +322,6 @@ def assign_lsds(ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag): # Otherwise, we create a multinomial random variable else: - # parse the DS weights ds_weights = np.array( ds_weights.replace(" ", "").split('|'), dtype=float @@ -382,7 +384,6 @@ def map_ds(values, offset=int(ds_id + 1)): # get the component sample and blocks from the asset model for PG in PGB.index: - # determine demand capacity adjustment operation, if required cmp_loc_dir = '-'.join(PG[0:3]) capacity_adjustment_operation = scaling_specification.get( @@ -401,7 +402,6 @@ def map_ds(values, offset=int(ds_id + 1)): assert self.damage_params is not None if cmp_id in self.damage_params.index: - frg_params = self.damage_params.loc[cmp_id, :] # get the list of limit states @@ -417,7 +417,6 @@ def map_ds(values, offset=int(ds_id + 1)): anchor_RVs = [] for ls_id in limit_states: - frg_params_LS = frg_params[f'LS{ls_id}'] theta_0 = frg_params_LS.get('Theta_0', np.nan) @@ -453,7 +452,6 @@ def map_ds(values, offset=int(ds_id + 1)): ] for block_i, _ in enumerate(blocks): - frg_rv_tag = ( 'FRG-' f'{PG[0]}-' # cmp_id @@ -578,53 +576,55 @@ def _generate_dmg_sample(self, sample_size, PGB, scaling_specification=None): # Check if damage model parameters have been specified if self.damage_params is None: - raise ValueError('Damage model parameters have not been specified. ' - 'Load parameters from the default damage model ' - 'databases or provide your own damage model ' - 'definitions before generating a sample.') + raise ValueError( + 'Damage model parameters have not been specified. ' + 'Load parameters from the default damage model ' + 'databases or provide your own damage model ' + 'definitions before generating a sample.' + ) # Create capacity and LSD RVs for each performance group capacity_RVs, lsds_RVs = self._create_dmg_RVs(PGB, scaling_specification) if self._asmnt.log.verbose: - self.log_msg('Sampling capacities...', - prepend_timestamp=True) + self.log_msg('Sampling capacities...', prepend_timestamp=True) # Generate samples for capacity RVs capacity_RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) + sample_size=sample_size, method=self._asmnt.options.sampling_method + ) # Generate samples for LSD RVs lsds_RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) + sample_size=sample_size, method=self._asmnt.options.sampling_method + ) if self._asmnt.log.verbose: - self.log_msg("Raw samples are available", - prepend_timestamp=True) + self.log_msg("Raw samples are available", prepend_timestamp=True) # get the capacity and lsds samples - capacity_sample = pd.DataFrame( - capacity_RVs.RV_sample).sort_index( - axis=0).sort_index(axis=1) - capacity_sample = base.convert_to_MultiIndex( - capacity_sample, axis=1)['FRG'] - capacity_sample.columns.names = [ - 'cmp', 'loc', 'dir', 'uid', 'block', 'ls'] - - lsds_sample = pd.DataFrame( - lsds_RVs.RV_sample).sort_index( - axis=0).sort_index(axis=1).astype(int) - lsds_sample = base.convert_to_MultiIndex( - lsds_sample, axis=1)['LSDS'] - lsds_sample.columns.names = [ - 'cmp', 'loc', 'dir', 'uid', 'block', 'ls'] + capacity_sample = ( + pd.DataFrame(capacity_RVs.RV_sample) + .sort_index(axis=0) + .sort_index(axis=1) + ) + capacity_sample = base.convert_to_MultiIndex(capacity_sample, axis=1)['FRG'] + capacity_sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ls'] + + lsds_sample = ( + pd.DataFrame(lsds_RVs.RV_sample) + .sort_index(axis=0) + .sort_index(axis=1) + .astype(int) + ) + lsds_sample = base.convert_to_MultiIndex(lsds_sample, axis=1)['LSDS'] + lsds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ls'] if self._asmnt.log.verbose: self.log_msg( f"Successfully generated {sample_size} realizations.", - prepend_timestamp=True) + prepend_timestamp=True, + ) return capacity_sample, lsds_sample @@ -675,8 +675,9 @@ def _get_required_demand_type(self, PGB): if self._asmnt.log.verbose: # If verbose logging is enabled, log a message indicating # that we are collecting demand information - self.log_msg('Collecting required demand information...', - prepend_timestamp=True) + self.log_msg( + 'Collecting required demand information...', prepend_timestamp=True + ) # Initialize an empty dictionary to store the unique EDP # requirements @@ -691,9 +692,13 @@ def _get_required_demand_type(self, PGB): # Get the directional, offset, and demand_type parameters # from the `DP` DataFrame directional, offset, demand_type = DP.loc[ - cmp, [('Demand', 'Directional'), - ('Demand', 'Offset'), - ('Demand', 'Type')]] + cmp, + [ + ('Demand', 'Directional'), + ('Demand', 'Offset'), + ('Demand', 'Type'), + ], + ] # Parse the demand type @@ -785,48 +790,49 @@ def _assemble_required_demand_data(self, EDP_req): """ if self._asmnt.log.verbose: - self.log_msg('Assembling demand data for calculation...', - prepend_timestamp=True) + self.log_msg( + 'Assembling demand data for calculation...', prepend_timestamp=True + ) demand_source = self._asmnt.demand.sample demand_dict = {} for EDP in EDP_req.keys(): - EDP = EDP.split('-') # if non-directional demand is requested... if EDP[2] == '0': - # assume that the demand at the given location is available try: # take the maximum of all available directions and scale it # using the nondirectional multiplier specified in the # self._asmnt.options (the default value is 1.2) - demand = demand_source.loc[ - :, (EDP[0], EDP[1])].max(axis=1).values + demand = ( + demand_source.loc[:, (EDP[0], EDP[1])].max(axis=1).values + ) demand = demand * self._asmnt.options.nondir_multi(EDP[0]) except KeyError: - demand = None else: demand = demand_source[(EDP[0], EDP[1], EDP[2])].values if demand is None: - - self.log_msg(f'\nWARNING: Cannot find demand data for {EDP}. The ' - 'corresponding damages cannot be calculated.', - prepend_timestamp=False) + self.log_msg( + f'\nWARNING: Cannot find demand data for {EDP}. The ' + 'corresponding damages cannot be calculated.', + prepend_timestamp=False, + ) else: demand_dict.update({f'{EDP[0]}-{EDP[1]}-{EDP[2]}': demand}) return demand_dict def _evaluate_damage_state( - self, demand_dict, EDP_req, capacity_sample, lsds_sample): + self, demand_dict, EDP_req, capacity_sample, lsds_sample + ): """ Use the demand and LS capacity sample to evaluate damage states @@ -858,15 +864,15 @@ def _evaluate_damage_state( # Create an empty dataframe with columns and index taken from # the input capacity sample - dmg_eval = pd.DataFrame(columns=capacity_sample.columns, - index=capacity_sample.index) + dmg_eval = pd.DataFrame( + columns=capacity_sample.columns, index=capacity_sample.index + ) # Initialize an empty list to store demand data demand_df = [] # For each demand type in the demand dictionary for demand_name, demand_vals in demand_dict.items(): - # Get the list of PGs assigned to this demand type PG_list = EDP_req[demand_name] @@ -878,8 +884,11 @@ def _evaluate_damage_state( PG_cols.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ls'] # Create a dataframe with demand values repeated for the # number of PGs and assign the columns as PG_cols - demand_df.append(pd.concat([pd.Series(demand_vals)] * len(PG_cols), - axis=1, keys=PG_cols)) + demand_df.append( + pd.concat( + [pd.Series(demand_vals)] * len(PG_cols), axis=1, keys=PG_cols + ) + ) # Concatenate all demand dataframes into a single dataframe demand_df = pd.concat(demand_df, axis=1) @@ -928,8 +937,9 @@ def _evaluate_damage_state( # entire damage model. If subsequent Limit States are also exceeded, # those cells in the result matrix will get overwritten by higher # damage states. - ds_sample.loc[:, dmg_e_ls.columns] = ( - ds_sample.loc[:, dmg_e_ls.columns].mask(dmg_e_ls, lsds)) + ds_sample.loc[:, dmg_e_ls.columns] = ds_sample.loc[ + :, dmg_e_ls.columns + ].mask(dmg_e_ls, lsds) return ds_sample @@ -972,8 +982,7 @@ def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): # Log a message indicating that the calculation of damage # quantities is starting if self._asmnt.log.verbose: - self.log_msg('Calculating damage quantities...', - prepend_timestamp=True) + self.log_msg('Calculating damage quantities...', prepend_timestamp=True) # Store the damage state sample as a local variable dmg_ds = ds_sample @@ -988,27 +997,25 @@ def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): # Combine the component quantity information for the columns # in the damage state sample dmg_qnt = pd.concat( - [cmp_qnt[PG[:4]] for PG in dmg_ds.columns], - axis=1, keys=dmg_ds.columns) + [cmp_qnt[PG[:4]] for PG in dmg_ds.columns], axis=1, keys=dmg_ds.columns + ) # Initialize a list to store the block weights block_weights = [] # For each component in the list of PG blocks for PG in PGB.index: - # Set the number of blocks to 1, unless specified # otherwise in the component marginal parameters blocks = 1 if cmp_params is not None: if 'Blocks' in cmp_params.columns: - blocks = cmp_params.loc[PG, 'Blocks'] # If the number of blocks is specified, calculate the # weights as the reciprocal of the number of blocks if np.atleast_1d(blocks).shape[0] == 1: - blocks_array = np.full(int(blocks), 1. / blocks) + blocks_array = np.full(int(blocks), 1.0 / blocks) # Otherwise, assume that the list contains the weights block_weights += blocks_array.tolist() @@ -1016,8 +1023,8 @@ def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): # Broadcast the block weights to match the shape of the damage # quantity DataFrame block_weights = np.broadcast_to( - block_weights, - (dmg_qnt.shape[0], len(block_weights))) + block_weights, (dmg_qnt.shape[0], len(block_weights)) + ) # Multiply the damage quantities by the block weights dmg_qnt *= block_weights @@ -1032,26 +1039,27 @@ def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): # If the dropzero option is True, remove the zero damage state # from the list of damage states if dropzero: - ds_list = ds_list[ds_list != 0] # Only proceed with the calculation if there is at least one # damage state in the list if len(ds_list) > 0: - # Create a list of DataFrames, where each DataFrame stores # the damage quantities for a specific damage state - res_list = [pd.DataFrame( - np.where(dmg_ds == ds_i, dmg_qnt, 0), - columns=dmg_ds.columns, - index=dmg_ds.index - ) for ds_i in ds_list] + res_list = [ + pd.DataFrame( + np.where(dmg_ds == ds_i, dmg_qnt, 0), + columns=dmg_ds.columns, + index=dmg_ds.index, + ) + for ds_i in ds_list + ] # Combine the damage quantity DataFrames into a single # DataFrame res_df = pd.concat( - res_list, axis=1, - keys=[f'{ds_i:g}' for ds_i in ds_list]) + res_list, axis=1, keys=[f'{ds_i:g}' for ds_i in ds_list] + ) res_df.columns.names = ['ds', *res_df.columns.names[1::]] # remove the block level from the columns res_df.columns = res_df.columns.reorder_levels([1, 2, 3, 4, 0, 5]) @@ -1120,8 +1128,7 @@ def _perform_dmg_task(self, task, qnt_sample): """ if self._asmnt.log.verbose: - self.log_msg('Applying task...', - prepend_timestamp=True) + self.log_msg('Applying task...', prepend_timestamp=True) # get the list of available components cmp_list = qnt_sample.columns.get_level_values(0).unique().tolist() @@ -1134,12 +1141,13 @@ def _perform_dmg_task(self, task, qnt_sample): # check if it exists among the available ones if source_cmp not in cmp_list: - self.log_msg( f"WARNING: Source component {source_cmp} in the prescribed " "damage process not found among components in the damage " "sample. The corresponding part of the damage process is " - "skipped.", prepend_timestamp=False) + "skipped.", + prepend_timestamp=False, + ) return @@ -1148,28 +1156,25 @@ def _perform_dmg_task(self, task, qnt_sample): # execute the prescribed events for source_event, target_infos in task[1].items(): - # events triggered by limit state exceedance if source_event.startswith('LS'): - # ls_i = int(source_event[2:]) # TODO: implement source LS support raise ValueError('LS not supported yet.') # events triggered by damage state occurrence if source_event.startswith('DS'): - # get the ID of the damage state that triggers the event - ds_list = [source_event[2:], ] + ds_list = [ + source_event[2:], + ] # if we are only looking for a single DS if len(ds_list) == 1: - ds_target = ds_list[0] # get the realizations with non-zero quantity of the target DS - source_ds_vals = source_cmp_df.groupby( - level=[3], axis=1).max() + source_ds_vals = source_cmp_df.groupby(level=[3], axis=1).max() if ds_target in source_ds_vals.columns: source_ds_vals = source_ds_vals[ds_target] @@ -1183,21 +1188,21 @@ def _perform_dmg_task(self, task, qnt_sample): pass # TODO: implement multiple DS support else: - raise ValueError(f"Unable to parse source event in damage " - f"process: {source_event}") + raise ValueError( + f"Unable to parse source event in damage " + f"process: {source_event}" + ) # get the information about the events target_infos = np.atleast_1d(target_infos) # for each event for target_info in target_infos: - # get the target component and event type target_cmp, target_event = target_info.split('_') # ALL means all, but the source component if target_cmp == 'ALL': - # copy the list of available components target_cmp = deepcopy(cmp_list) @@ -1207,18 +1212,18 @@ def _perform_dmg_task(self, task, qnt_sample): # otherwise we target a specific component elif target_cmp in cmp_list: - target_cmp = [target_cmp, ] + target_cmp = [ + target_cmp, + ] # trigger a limit state if target_event.startswith('LS'): - # ls_i = int(target_event[2:]) # TODO: implement target LS support raise ValueError('LS not supported yet.') # trigger a damage state if target_event.startswith('DS'): - # get the target damage state ID ds_i = target_event[2:] @@ -1234,32 +1239,39 @@ def _perform_dmg_task(self, task, qnt_sample): # because we cannot be certain that ds_i had been # triggered earlier, we have to add this damage # state manually for each PG of each component, if needed - if ds_i not in qnt_sample[ - (target_cmp_i, loc, direction, uid)].columns: + if ( + ds_i + not in qnt_sample[ + (target_cmp_i, loc, direction, uid) + ].columns + ): qnt_sample[ - (target_cmp_i, loc, direction, uid, ds_i)] = 0.0 + (target_cmp_i, loc, direction, uid, ds_i) + ] = 0.0 qnt_sample.loc[ source_mask, - (target_cmp_i, loc, direction, uid, ds_i)] = ( - cmp_qnt.loc[ - source_mask, - (target_cmp_i, loc, direction, uid)].values) + (target_cmp_i, loc, direction, uid, ds_i), + ] = cmp_qnt.loc[ + source_mask, (target_cmp_i, loc, direction, uid) + ].values # clear all damage information elif target_event == 'NA': - # remove quantity information from the target components # in the pre-selected realizations qnt_sample.loc[source_mask, target_cmp] = np.nan else: - raise ValueError(f"Unable to parse target event in damage " - f"process: {target_event}") + raise ValueError( + f"Unable to parse target event in damage " + f"process: {target_event}" + ) if self._asmnt.log.verbose: - self.log_msg('Damage process task successfully applied.', - prepend_timestamp=False) + self.log_msg( + 'Damage process task successfully applied.', prepend_timestamp=False + ) def _get_pg_batches(self, block_batch_size): """ @@ -1299,7 +1311,6 @@ def _get_pg_batches(self, block_batch_size): # If marginal parameters are available, use the 'Blocks' # column to initialize the batch dataframe if cmp_marginals is not None: - # Check if the "Blocks" column exists in the component # marginal parameters if 'Blocks' in cmp_marginals.columns: @@ -1310,18 +1321,18 @@ def _get_pg_batches(self, block_batch_size): # sample as the index. if pg_batch is None: cmp_sample = self._asmnt.asset.cmp_sample - pg_batch = pd.DataFrame(np.ones(cmp_sample.shape[1]), - index=cmp_sample.columns, - columns=['Blocks']) + pg_batch = pd.DataFrame( + np.ones(cmp_sample.shape[1]), + index=cmp_sample.columns, + columns=['Blocks'], + ) # Check if the damage model information exists for each # performance group If not, remove the performance group from # the analysis and log a warning message. first_time = True for pg_i in pg_batch.index: - if np.any(np.isin(pg_i, self.damage_params.index)): - blocks_i = pg_batch.loc[pg_i, 'Blocks'] # If the "Blocks" column contains a list of block @@ -1336,10 +1347,12 @@ def _get_pg_batches(self, block_batch_size): pg_batch.drop(pg_i, inplace=True) if first_time: - self.log_msg("\nWARNING: Damage model information is " - "incomplete for some of the performance groups " - "and they had to be removed from the analysis:", - prepend_timestamp=False) + self.log_msg( + "\nWARNING: Damage model information is " + "incomplete for some of the performance groups " + "and they had to be removed from the analysis:", + prepend_timestamp=False, + ) first_time = False @@ -1358,13 +1371,17 @@ def _get_pg_batches(self, block_batch_size): # Group the performance groups into batches for batch_i in range(1, pg_batch.shape[0] + 1): - # Find the mask for blocks that are less than the batch # size and greater than 0 batch_mask = np.all( - np.array([pg_batch['CBlocks'] <= block_batch_size, - pg_batch['CBlocks'] > 0]), - axis=0) + np.array( + [ + pg_batch['CBlocks'] <= block_batch_size, + pg_batch['CBlocks'] > 0, + ] + ), + axis=0, + ) if np.sum(batch_mask) < 1: batch_mask = np.full(batch_mask.shape, False) @@ -1375,7 +1392,8 @@ def _get_pg_batches(self, block_batch_size): # Decrement the cumulative block count by the max count in # the current batch pg_batch['CBlocks'] -= pg_batch.loc[ - pg_batch['Batch'] == batch_i, 'CBlocks'].max() + pg_batch['Batch'] == batch_i, 'CBlocks' + ].max() # If the maximum cumulative block count is 0, exit the # loop @@ -1385,8 +1403,12 @@ def _get_pg_batches(self, block_batch_size): # Group the performance groups by batch, component, location, # and direction, and keep only the number of blocks for each # group - pg_batch = pg_batch.groupby( - ['Batch', 'cmp', 'loc', 'dir', 'uid']).sum().loc[:, 'Blocks'].to_frame() + pg_batch = ( + pg_batch.groupby(['Batch', 'cmp', 'loc', 'dir', 'uid']) + .sum() + .loc[:, 'Blocks'] + .to_frame() + ) return pg_batch @@ -1439,7 +1461,6 @@ def _complete_ds_cols(self, dmg_sample): # walk through all components that have damage parameters provided for cmp_id in DP.index: - # get the component-specific parameters cmp_data = DP.loc[cmp_id] @@ -1448,23 +1469,25 @@ def _complete_ds_cols(self, dmg_sample): # walk through all limit states for the component for ls in ls_list: - # check if the given limit state is defined if not pd.isna(cmp_data[(ls, 'Theta_0')]): - # check if there is only one damage state if pd.isna(cmp_data[(ls, 'DamageStateWeights')]): - ds_count += 1 else: - # or if there are more than one, how many ds_count += len( - cmp_data[(ls, 'DamageStateWeights')].split('|')) + cmp_data[(ls, 'DamageStateWeights')].split('|') + ) # get the list of valid cmp-loc-dir-uid sets - cmp_header = dmg_header.loc[:, [cmp_id, ]] + cmp_header = dmg_header.loc[ + :, + [ + cmp_id, + ], + ] # Create a dataframe where they are repeated ds_count times in the # columns. The keys put the DS id in the first level of the @@ -1472,7 +1495,8 @@ def _complete_ds_cols(self, dmg_sample): cmp_headers = pd.concat( [cmp_header for ds_i in range(ds_count + 1)], keys=[str(r) for r in range(0, ds_count + 1)], - axis=1) + axis=1, + ) cmp_headers.columns.names = ['ds', *cmp_headers.columns.names[1::]] # add these new columns to the result dataframe @@ -1514,34 +1538,42 @@ def calculate( # get the list of performance groups qnt_samples = [] - self.log_msg(f'Number of Performance Groups in Asset Model:' - f' {self._asmnt.asset.cmp_sample.shape[1]}', - prepend_timestamp=False) + self.log_msg( + f'Number of Performance Groups in Asset Model:' + f' {self._asmnt.asset.cmp_sample.shape[1]}', + prepend_timestamp=False, + ) pg_batch = self._get_pg_batches(block_batch_size) batches = pg_batch.index.get_level_values(0).unique() - self.log_msg(f'Number of Component Blocks: {pg_batch["Blocks"].sum()}', - prepend_timestamp=False) + self.log_msg( + f'Number of Component Blocks: {pg_batch["Blocks"].sum()}', + prepend_timestamp=False, + ) - self.log_msg(f"{len(batches)} batches of Performance Groups prepared " - "for damage assessment", - prepend_timestamp=False) + self.log_msg( + f"{len(batches)} batches of Performance Groups prepared " + "for damage assessment", + prepend_timestamp=False, + ) # for PG_i in self._asmnt.asset.cmp_sample.columns: for PGB_i in batches: - PGB = pg_batch.loc[PGB_i] - self.log_msg(f"Calculating damage for PG batch {PGB_i} with " - f"{int(PGB['Blocks'].sum())} blocks") + self.log_msg( + f"Calculating damage for PG batch {PGB_i} with " + f"{int(PGB['Blocks'].sum())} blocks" + ) # Generate an array with component capacities for each block and # generate a second array that assigns a specific damage state to # each component limit state. The latter is primarily needed to # handle limit states with multiple, mutually exclusive DS options capacity_sample, lsds_sample = self._generate_dmg_sample( - sample_size, PGB, scaling_specification) + sample_size, PGB, scaling_specification + ) # Get the required demand types for the analysis EDP_req = self._get_required_demand_type(PGB) @@ -1551,8 +1583,8 @@ def calculate( # Evaluate the Damage State of each Component Block ds_sample = self._evaluate_damage_state( - demand_dict, EDP_req, - capacity_sample, lsds_sample) + demand_dict, EDP_req, capacity_sample, lsds_sample + ) qnt_sample = self._prepare_dmg_quantities(PGB, ds_sample, dropzero=False) qnt_samples.append(qnt_sample) @@ -1564,8 +1596,7 @@ def calculate( qnt_sample = self._complete_ds_cols(qnt_sample) qnt_sample.sort_index(axis=1, inplace=True) - self.log_msg("Raw damage calculation successful.", - prepend_timestamp=False) + self.log_msg("Raw damage calculation successful.", prepend_timestamp=False) # Apply the prescribed damage process, if any if dmg_process is not None: @@ -1575,11 +1606,11 @@ def calculate( dmg_process = {key: dmg_process[key] for key in sorted(dmg_process)} for task in dmg_process.items(): - self._perform_dmg_task(task, qnt_sample) - self.log_msg("Damage processes successfully applied.", - prepend_timestamp=False) + self.log_msg( + "Damage processes successfully applied.", prepend_timestamp=False + ) # If requested, remove columns with no damage from the sample if self._asmnt.options.list_all_ds is False: diff --git a/pelicun/model/demand_model.py b/pelicun/model/demand_model.py index f1ff5637b..a89935e1a 100644 --- a/pelicun/model/demand_model.py +++ b/pelicun/model/demand_model.py @@ -92,7 +92,6 @@ class DemandModel(PelicunModel): """ def __init__(self, assessment): - super().__init__(assessment) self.marginal_params = None @@ -114,14 +113,18 @@ def save_sample(self, filepath=None, save_units=False): self.log_msg('Saving demand sample...') res = file_io.save_to_csv( - self.sample, filepath, units=self.units, + self.sample, + filepath, + units=self.units, unit_conversion_factors=self._asmnt.unit_conversion_factors, use_simpleindex=(filepath is not None), - log=self._asmnt.log) + log=self._asmnt.log, + ) if filepath is not None: - self.log_msg('Demand sample successfully saved.', - prepend_timestamp=False) + self.log_msg( + 'Demand sample successfully saved.', prepend_timestamp=False + ) return None # else: @@ -150,29 +153,31 @@ def load_sample(self, filepath): """ def parse_header(raw_header): - old_MI = raw_header # The first number (event_ID) in the demand labels is optional and # currently not used. We remove it if it was in the raw data. if old_MI.nlevels == 4: - if self._asmnt.log.verbose: - self.log_msg('Removing event_ID from header...', - prepend_timestamp=False) + self.log_msg( + 'Removing event_ID from header...', prepend_timestamp=False + ) new_column_index_array = np.array( - [old_MI.get_level_values(i) for i in range(1, 4)]) + [old_MI.get_level_values(i) for i in range(1, 4)] + ) else: new_column_index_array = np.array( - [old_MI.get_level_values(i) for i in range(3)]) + [old_MI.get_level_values(i) for i in range(3)] + ) # Remove whitespace to avoid ambiguity if self._asmnt.log.verbose: - self.log_msg('Removing whitespace from header...', - prepend_timestamp=False) + self.log_msg( + 'Removing whitespace from header...', prepend_timestamp=False + ) wspace_remove = np.vectorize(lambda name: str(name).replace(' ', '')) @@ -181,7 +186,8 @@ def parse_header(raw_header): # Creating new, cleaned-up header new_MI = pd.MultiIndex.from_arrays( - new_column_index, names=['type', 'loc', 'dir']) + new_column_index, names=['type', 'loc', 'dir'] + ) return new_MI @@ -189,8 +195,11 @@ def parse_header(raw_header): self.log_msg('Loading demand data...') demand_data, units = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, - return_units=True, log=self._asmnt.log) + filepath, + self._asmnt.unit_conversion_factors, + return_units=True, + log=self._asmnt.log, + ) parsed_data = demand_data.copy() @@ -200,18 +209,20 @@ def parse_header(raw_header): # Remove errors, if needed if 'ERROR' in parsed_data.columns.get_level_values(0): - - self.log_msg('Removing errors from the raw data...', - prepend_timestamp=False) + self.log_msg( + 'Removing errors from the raw data...', prepend_timestamp=False + ) error_list = parsed_data.loc[:, idx['ERROR', :, :]].values.astype(bool) parsed_data = parsed_data.loc[~error_list, :].copy() parsed_data.drop('ERROR', level=0, axis=1, inplace=True) - self.log_msg("\nBased on the values in the ERROR column, " - f"{np.sum(error_list)} demand samples were removed.\n", - prepend_timestamp=False) + self.log_msg( + "\nBased on the values in the ERROR column, " + f"{np.sum(error_list)} demand samples were removed.\n", + prepend_timestamp=False, + ) self.sample = parsed_data @@ -239,7 +250,6 @@ def estimate_RID(self, demands, params, method='FEMA P58'): """ if method == 'FEMA P58': - # method is described in FEMA P-58 Volume 1 Section 5.4 & Appendix C # the provided demands shall be PID values at various loc-dir pairs @@ -257,7 +267,7 @@ def estimate_RID(self, demands, params, method='FEMA P58'): RID = PID.copy() RID[large] = PID[large] - 3 * yield_drift RID[medium] = 0.3 * (PID[medium] - yield_drift) - RID[small] = 0. + RID[small] = 0.0 # add extra uncertainty to nonzero values rng = self._asmnt.options.rng @@ -268,9 +278,16 @@ def estimate_RID(self, demands, params, method='FEMA P58'): RID = pd.DataFrame( np.minimum(PID.values, RID.values), columns=pd.DataFrame( - 1, index=['RID', ], - columns=PID.columns).stack(level=[0, 1]).index, - index=PID.index) + 1, + index=[ + 'RID', + ], + columns=PID.columns, + ) + .stack(level=[0, 1]) + .index, + index=PID.index, + ) else: RID = None @@ -297,24 +314,22 @@ def calibrate_model(self, config): """ def parse_settings(settings, demand_type): - def parse_str_to_float(in_str, context_string): - try: out_float = float(in_str) except ValueError: - - self.log_msg(f"WARNING: Could not parse {in_str} provided as " - f"{context_string}. Using NaN instead.", - prepend_timestamp=False) + self.log_msg( + f"WARNING: Could not parse {in_str} provided as " + f"{context_string}. Using NaN instead.", + prepend_timestamp=False, + ) out_float = np.nan return out_float - active_d_types = ( - demand_sample.columns.get_level_values('type').unique()) + active_d_types = demand_sample.columns.get_level_values('type').unique() if demand_type == 'ALL': cols = tuple(active_d_types) @@ -332,9 +347,12 @@ def parse_str_to_float(in_str, context_string): cal_df.loc[idx[cols, :, :], 'Family'] = settings['DistributionFamily'] # load limits - for lim in ('CensorLower', 'CensorUpper', - 'TruncateLower', 'TruncateUpper'): - + for lim in ( + 'CensorLower', + 'CensorUpper', + 'TruncateLower', + 'TruncateUpper', + ): if lim in settings.keys(): val = parse_str_to_float(settings[lim], lim) if not pd.isna(val): @@ -343,15 +361,19 @@ def parse_str_to_float(in_str, context_string): # scale the censor and truncation limits, if needed scale_factor = self._asmnt.scale_factor(settings.get('Unit', None)) - rows_to_scale = ['CensorLower', 'CensorUpper', - 'TruncateLower', 'TruncateUpper'] + rows_to_scale = [ + 'CensorLower', + 'CensorUpper', + 'TruncateLower', + 'TruncateUpper', + ] cal_df.loc[idx[cols, :, :], rows_to_scale] *= scale_factor # load the prescribed additional uncertainty if 'AddUncertainty' in settings.keys(): - - sig_increase = parse_str_to_float(settings['AddUncertainty'], - 'AddUncertainty') + sig_increase = parse_str_to_float( + settings['AddUncertainty'], 'AddUncertainty' + ) # scale the sig value if the target distribution family is normal if settings['DistributionFamily'] == 'normal': @@ -360,16 +382,13 @@ def parse_str_to_float(in_str, context_string): cal_df.loc[idx[cols, :, :], 'SigIncrease'] = sig_increase def get_filter_mask(lower_lims, upper_lims): - demands_of_interest = demand_sample.iloc[:, pd.notna(upper_lims)] limits_of_interest = upper_lims[pd.notna(upper_lims)] - upper_mask = np.all(demands_of_interest < limits_of_interest, - axis=1) + upper_mask = np.all(demands_of_interest < limits_of_interest, axis=1) demands_of_interest = demand_sample.iloc[:, pd.notna(lower_lims)] limits_of_interest = lower_lims[pd.notna(lower_lims)] - lower_mask = np.all(demands_of_interest > limits_of_interest, - axis=1) + lower_mask = np.all(demands_of_interest > limits_of_interest, axis=1) return np.all([lower_mask, upper_mask], axis=0) @@ -380,12 +399,18 @@ def get_filter_mask(lower_lims, upper_lims): # initialize a DataFrame that contains calibration information cal_df = pd.DataFrame( - columns=['Family', - 'CensorLower', 'CensorUpper', - 'TruncateLower', 'TruncateUpper', - 'SigIncrease', 'Theta_0', 'Theta_1'], + columns=[ + 'Family', + 'CensorLower', + 'CensorUpper', + 'TruncateLower', + 'TruncateUpper', + 'SigIncrease', + 'Theta_0', + 'Theta_1', + ], index=demand_sample.columns, - dtype=float + dtype=float, ) cal_df['Family'] = cal_df['Family'].astype(str) @@ -401,11 +426,13 @@ def get_filter_mask(lower_lims, upper_lims): if self._asmnt.log.verbose: self.log_msg( "\nCalibration settings successfully parsed:\n" + str(cal_df), - prepend_timestamp=False) + prepend_timestamp=False, + ) else: self.log_msg( "\nCalibration settings successfully parsed:\n", - prepend_timestamp=False) + prepend_timestamp=False, + ) # save the settings model_params = cal_df.copy() @@ -418,15 +445,16 @@ def get_filter_mask(lower_lims, upper_lims): lower_lims = cal_df.loc[:, 'CensorLower'].values if ~np.all(pd.isna(np.array([upper_lims, lower_lims]))): - censor_mask = get_filter_mask(lower_lims, upper_lims) censored_count = np.sum(~censor_mask) demand_sample = demand_sample.loc[censor_mask, :] - self.log_msg("\nBased on the provided censoring limits, " - f"{censored_count} samples were censored.", - prepend_timestamp=False) + self.log_msg( + "\nBased on the provided censoring limits, " + f"{censored_count} samples were censored.", + prepend_timestamp=False, + ) else: censored_count = 0 @@ -438,18 +466,18 @@ def get_filter_mask(lower_lims, upper_lims): lower_lims = cal_df.loc[:, 'TruncateLower'].values if ~np.all(pd.isna(np.array([upper_lims, lower_lims]))): - truncate_mask = get_filter_mask(lower_lims, upper_lims) truncated_count = np.sum(~truncate_mask) if truncated_count > 0: - demand_sample = demand_sample.loc[truncate_mask, :] - self.log_msg("\nBased on the provided truncation limits, " - f"{truncated_count} samples were removed before demand " - "calibration.", - prepend_timestamp=False) + self.log_msg( + "\nBased on the provided truncation limits, " + f"{truncated_count} samples were removed before demand " + "calibration.", + prepend_timestamp=False, + ) # Separate and save the demands that are kept empirical -> i.e., no # fitting. Currently, empirical demands are decoupled from those that @@ -469,65 +497,73 @@ def get_filter_mask(lower_lims, upper_lims): cal_df = cal_df.drop(empirical_edps, axis=0) if self._asmnt.log.verbose: - self.log_msg(f"\nDemand data used for calibration:\n{demand_sample}", - prepend_timestamp=False) + self.log_msg( + f"\nDemand data used for calibration:\n{demand_sample}", + prepend_timestamp=False, + ) # fit the joint distribution - self.log_msg("\nFitting the prescribed joint demand distribution...", - prepend_timestamp=False) + self.log_msg( + "\nFitting the prescribed joint demand distribution...", + prepend_timestamp=False, + ) demand_theta, demand_rho = uq.fit_distribution_to_sample( raw_samples=demand_sample.values.T, distribution=cal_df.loc[:, 'Family'].values, censored_count=censored_count, - detection_limits=cal_df.loc[ - :, ['CensorLower', 'CensorUpper']].values, + detection_limits=cal_df.loc[:, ['CensorLower', 'CensorUpper']].values, truncation_limits=cal_df.loc[ - :, ['TruncateLower', 'TruncateUpper']].values, + :, ['TruncateLower', 'TruncateUpper'] + ].values, multi_fit=False, - logger_object=self._asmnt.log + logger_object=self._asmnt.log, ) # fit the joint distribution - self.log_msg("\nCalibration successful, processing results...", - prepend_timestamp=False) + self.log_msg( + "\nCalibration successful, processing results...", + prepend_timestamp=False, + ) # save the calibration results model_params.loc[cal_df.index, ['Theta_0', 'Theta_1']] = demand_theta # increase the variance of the marginal distributions, if needed if ~np.all(pd.isna(model_params.loc[:, 'SigIncrease'].values)): - - self.log_msg("\nIncreasing demand variance...", - prepend_timestamp=False) + self.log_msg("\nIncreasing demand variance...", prepend_timestamp=False) sig_inc = np.nan_to_num(model_params.loc[:, 'SigIncrease'].values) sig_0 = model_params.loc[:, 'Theta_1'].values - model_params.loc[:, 'Theta_1'] = ( - np.sqrt(sig_0 ** 2. + sig_inc ** 2.)) + model_params.loc[:, 'Theta_1'] = np.sqrt(sig_0**2.0 + sig_inc**2.0) # remove unneeded fields from model_params for col in ('SigIncrease', 'CensorLower', 'CensorUpper'): model_params = model_params.drop(col, axis=1) # reorder the remaining fields for clarity - model_params = model_params[[ - 'Family', 'Theta_0', 'Theta_1', 'TruncateLower', 'TruncateUpper']] + model_params = model_params[ + ['Family', 'Theta_0', 'Theta_1', 'TruncateLower', 'TruncateUpper'] + ] self.marginal_params = model_params - self.log_msg("\nCalibrated demand model marginal distributions:\n" - + str(model_params), - prepend_timestamp=False) + self.log_msg( + "\nCalibrated demand model marginal distributions:\n" + + str(model_params), + prepend_timestamp=False, + ) # save the correlation matrix - self.correlation = pd.DataFrame(demand_rho, - columns=cal_df.index, - index=cal_df.index) + self.correlation = pd.DataFrame( + demand_rho, columns=cal_df.index, index=cal_df.index + ) - self.log_msg("\nCalibrated demand model correlation matrix:\n" - + str(self.correlation), - prepend_timestamp=False) + self.log_msg( + "\nCalibrated demand model correlation matrix:\n" + + str(self.correlation), + prepend_timestamp=False, + ) def save_model(self, file_prefix): """ @@ -558,9 +594,7 @@ def save_model(self, file_prefix): log_demands = marginal_params.loc[log_rows, :] for label in log_demands.index: - if label in self.units.index: - unit_factor = self._asmnt.calc_unit_scale_factor(self.units[label]) marginal_params.loc[label, 'Theta_1'] *= unit_factor @@ -623,7 +657,9 @@ def load_model(self, data_source): self.correlation = file_io.load_data( correlation_data_source, self._asmnt.unit_conversion_factors, - reindex=False, log=self._asmnt.log) + reindex=False, + log=self._asmnt.log, + ) self.correlation.index.set_names(['type', 'loc', 'dir'], inplace=True) self.correlation.columns.set_names(['type', 'loc', 'dir'], inplace=True) else: @@ -644,8 +680,7 @@ def load_model(self, data_source): ) marginal_params.index.set_names(['type', 'loc', 'dir'], inplace=True) - marginal_params = self.convert_marginal_params(marginal_params.copy(), - units) + marginal_params = self.convert_marginal_params(marginal_params.copy(), units) self.marginal_params = marginal_params self.units = units @@ -663,56 +698,67 @@ def _create_RVs(self, preserve_order=False): # add a random variable for each demand variable for rv_params in self.marginal_params.itertuples(): - edp = rv_params.Index rv_tag = f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' family = getattr(rv_params, "Family", np.nan) if family == 'empirical': - if preserve_order: dist_family = 'coupled_empirical' else: dist_family = 'empirical' # empirical RVs need the data points - RV_reg.add_RV(uq.RandomVariable( - name=rv_tag, - distribution=dist_family, - raw_samples=self.empirical_data.loc[:, edp].values - )) + RV_reg.add_RV( + uq.RandomVariable( + name=rv_tag, + distribution=dist_family, + raw_samples=self.empirical_data.loc[:, edp].values, + ) + ) else: - # all other RVs need parameters of their distributions - RV_reg.add_RV(uq.RandomVariable( - name=rv_tag, - distribution=family, - theta=[getattr(rv_params, f"Theta_{t_i}", np.nan) - for t_i in range(3)], - truncation_limits=[ - getattr(rv_params, f"Truncate{side}", np.nan) - for side in ("Lower", "Upper")], - - - )) + RV_reg.add_RV( + uq.RandomVariable( + name=rv_tag, + distribution=family, + theta=[ + getattr(rv_params, f"Theta_{t_i}", np.nan) + for t_i in range(3) + ], + truncation_limits=[ + getattr(rv_params, f"Truncate{side}", np.nan) + for side in ("Lower", "Upper") + ], + ) + ) - self.log_msg(f"\n{self.marginal_params.shape[0]} random variables created.", - prepend_timestamp=False) + self.log_msg( + f"\n{self.marginal_params.shape[0]} random variables created.", + prepend_timestamp=False, + ) # add an RV set to consider the correlation between demands, if needed if self.correlation is not None: - rv_set_tags = [f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' - for edp in self.correlation.index.values] - - RV_reg.add_RV_set(uq.RandomVariableSet( - 'EDP_set', list(RV_reg.RVs(rv_set_tags).values()), - self.correlation.values)) + rv_set_tags = [ + f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' + for edp in self.correlation.index.values + ] + + RV_reg.add_RV_set( + uq.RandomVariableSet( + 'EDP_set', + list(RV_reg.RVs(rv_set_tags).values()), + self.correlation.values, + ) + ) self.log_msg( f"\nCorrelations between {len(rv_set_tags)} random variables " "successfully defined.", - prepend_timestamp=False) + prepend_timestamp=False, + ) self._RVs = RV_reg @@ -758,10 +804,7 @@ def clone_demands(self, demand_cloning): for new_columns in new_columns_list: flat_list.extend(new_columns) if len(set(flat_list)) != len(flat_list): - raise ValueError( - 'Duplicate entries in demand cloning ' - 'configuration.' - ) + raise ValueError('Duplicate entries in demand cloning configuration.') # turn the config entries to tuples def turn_to_tuples(demand_cloning): @@ -817,20 +860,21 @@ def generate_sample(self, config): """ if self.marginal_params is None: - raise ValueError('Model parameters have not been specified. Either' - 'load parameters from a file or calibrate the ' - 'model using raw demand data.') + raise ValueError( + 'Model parameters have not been specified. Either' + 'load parameters from a file or calibrate the ' + 'model using raw demand data.' + ) self.log_div() self.log_msg('Generating sample from demand variables...') - self._create_RVs( - preserve_order=config.get('PreserveRawOrder', False)) + self._create_RVs(preserve_order=config.get('PreserveRawOrder', False)) sample_size = config['SampleSize'] self._RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) + sample_size=sample_size, method=self._asmnt.options.sampling_method + ) # replace the potentially existing raw sample with the generated one assert self._RVs is not None @@ -847,5 +891,7 @@ def generate_sample(self, config): if config.get('DemandCloning', False): self.clone_demands(config['DemandCloning']) - self.log_msg(f"\nSuccessfully generated {sample_size} realizations.", - prepend_timestamp=False) + self.log_msg( + f"\nSuccessfully generated {sample_size} realizations.", + prepend_timestamp=False, + ) diff --git a/pelicun/model/loss_model.py b/pelicun/model/loss_model.py index 5f762f41f..69e111ba2 100644 --- a/pelicun/model/loss_model.py +++ b/pelicun/model/loss_model.py @@ -76,7 +76,6 @@ class LossModel(PelicunModel): """ def __init__(self, assessment): - super().__init__(assessment) self._sample = None @@ -101,21 +100,22 @@ def save_sample(self, filepath=None, save_units=False): self.log_msg('Saving loss sample...') cmp_units = self.loss_params[('DV', 'Unit')] - dv_units = pd.Series(index=self.sample.columns, name='Units', - dtype='object') + dv_units = pd.Series(index=self.sample.columns, name='Units', dtype='object') for cmp_id, dv_type in cmp_units.index: dv_units.loc[(dv_type, cmp_id)] = cmp_units.at[(cmp_id, dv_type)] res = file_io.save_to_csv( - self.sample, filepath, units=dv_units, + self.sample, + filepath, + units=dv_units, unit_conversion_factors=self._asmnt.unit_conversion_factors, use_simpleindex=(filepath is not None), - log=self._asmnt.log) + log=self._asmnt.log, + ) if filepath is not None: - self.log_msg('Loss sample successfully saved.', - prepend_timestamp=False) + self.log_msg('Loss sample successfully saved.', prepend_timestamp=False) return None # else: @@ -136,7 +136,8 @@ def load_sample(self, filepath): self.log_msg('Loading loss sample...') self._sample = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, log=self._asmnt.log) + filepath, self._asmnt.unit_conversion_factors, log=self._asmnt.log + ) self.log_msg('Loss sample successfully loaded.', prepend_timestamp=False) @@ -182,11 +183,11 @@ def load_model(self, data_paths, mapping_path, decision_variables=None): # replace default flag with default data path for d_i, data_path in enumerate(data_paths): - if 'PelicunDefault/' in data_path: data_paths[d_i] = data_path.replace( 'PelicunDefault/', - f'{base.pelicun_path}/resources/SimCenterDBDL/') + f'{base.pelicun_path}/resources/SimCenterDBDL/', + ) data_list = [] # load the data files one by one @@ -200,8 +201,11 @@ def load_model(self, data_paths, mapping_path, decision_variables=None): loss_params = pd.concat(data_list, axis=0) # drop redefinitions of components - loss_params = loss_params.groupby( - level=[0, 1]).first().transform(lambda x: x.fillna(np.nan)) + loss_params = ( + loss_params.groupby(level=[0, 1]) + .first() + .transform(lambda x: x.fillna(np.nan)) + ) # note: .groupby introduces None entries. We replace them with # NaN for consistency. @@ -215,14 +219,15 @@ def load_model(self, data_paths, mapping_path, decision_variables=None): missing_cmp.append(cmp) if len(missing_cmp) > 0: - self.log_msg("\nWARNING: The loss model does not provide " - "consequence information for the following component(s) " - f"in the loss map: {missing_cmp}. They are removed from " - "further analysis\n", - prepend_timestamp=False) - - self.loss_map = self.loss_map.loc[ - ~loss_map['Consequence'].isin(missing_cmp)] + self.log_msg( + "\nWARNING: The loss model does not provide " + "consequence information for the following component(s) " + f"in the loss map: {missing_cmp}. They are removed from " + "further analysis\n", + prepend_timestamp=False, + ) + + self.loss_map = self.loss_map.loc[~loss_map['Consequence'].isin(missing_cmp)] loss_cmp = np.unique(self.loss_map['Consequence'].values) loss_params = loss_params.loc[idx[loss_cmp, :], :] @@ -242,12 +247,13 @@ def load_model(self, data_paths, mapping_path, decision_variables=None): loss_params.loc[:, DS] = self.convert_marginal_params( loss_params.loc[:, DS].copy(), loss_params[('DV', 'Unit')], - loss_params[('Quantity', 'Unit')] + loss_params[('Quantity', 'Unit')], ).values # check for components with incomplete loss information cmp_incomplete_list = loss_params.loc[ - loss_params[('Incomplete', '')] == 1].index + loss_params[('Incomplete', '')] == 1 + ].index if len(cmp_incomplete_list) > 0: loss_params.drop(cmp_incomplete_list, inplace=True) @@ -258,18 +264,17 @@ def load_model(self, data_paths, mapping_path, decision_variables=None): f"following component(s) {cmp_incomplete_list}. " "They were removed from the analysis." "\n", - prepend_timestamp=False) + prepend_timestamp=False, + ) # filter decision variables, if needed if decision_variables is not None: - loss_params = loss_params.reorder_levels([1, 0]) available_DVs = loss_params.index.unique(level=0) filtered_DVs = [] for DV_i in decision_variables: - if DV_i in available_DVs: filtered_DVs.append(DV_i) @@ -277,8 +282,7 @@ def load_model(self, data_paths, mapping_path, decision_variables=None): self.loss_params = loss_params.sort_index(axis=1) - self.log_msg("Loss parameters successfully parsed.", - prepend_timestamp=False) + self.log_msg("Loss parameters successfully parsed.", prepend_timestamp=False) def aggregate_losses(self): """ @@ -317,8 +321,7 @@ def calculate(self): elif 'DEM' in drivers: sample_size = self._asmnt.demand.sample.shape[0] else: - raise ValueError( - 'Invalid loss drivers. Check the specified loss map.') + raise ValueError('Invalid loss drivers. Check the specified loss map.') # First, get the damaged quantities in each damage state for # each component of interest. @@ -342,7 +345,6 @@ class BldgRepairModel(LossModel): """ def __init__(self, assessment): - super().__init__(assessment) self.loss_type = 'BldgRepair' @@ -376,7 +378,11 @@ def _create_DV_RVs(self, case_list): # make ds the second level in the MultiIndex case_DF = pd.DataFrame( - index=case_list.reorder_levels([0, 4, 1, 2, 3]), columns=[0, ]) + index=case_list.reorder_levels([0, 4, 1, 2, 3]), + columns=[ + 0, + ], + ) case_DF.sort_index(axis=0, inplace=True) driver_cmps = case_list.get_level_values(0).unique() @@ -384,7 +390,6 @@ def _create_DV_RVs(self, case_list): # for each loss component for loss_cmp_id in self.loss_map.index.values: - # load the corresponding parameters driver_type, driver_cmp_id = self.loss_map.loc[loss_cmp_id, 'Driver'] conseq_cmp_id = self.loss_map.loc[loss_cmp_id, 'Consequence'] @@ -392,8 +397,9 @@ def _create_DV_RVs(self, case_list): # currently, we only support DMG-based loss calculations # but this will be extended in the very near future if driver_type != 'DMG': - raise ValueError(f"Loss Driver type not recognized: " - f"{driver_type}") + raise ValueError( + f"Loss Driver type not recognized: " f"{driver_type}" + ) # load the parameters # TODO: remove specific DV_type references and make the code below @@ -422,17 +428,17 @@ def _create_DV_RVs(self, case_list): continue for ds in case_DF.loc[driver_cmp_id, :].index.unique(level=0): - if ds == '0': continue if cost_params is not None: - cost_params_DS = cost_params[f'DS{ds}'] cost_family = cost_params_DS.get('Family', np.nan) - cost_theta = [cost_params_DS.get(f"Theta_{t_i}", np.nan) - for t_i in range(3)] + cost_theta = [ + cost_params_DS.get(f"Theta_{t_i}", np.nan) + for t_i in range(3) + ] # If the first parameter is controlled by a function, we use # 1.0 in its place and will scale the results in a later @@ -445,12 +451,13 @@ def _create_DV_RVs(self, case_list): cost_family = np.nan if time_params is not None: - time_params_DS = time_params[f'DS{ds}'] time_family = time_params_DS.get('Family', np.nan) - time_theta = [time_params_DS.get(f"Theta_{t_i}", np.nan) - for t_i in range(3)] + time_theta = [ + time_params_DS.get(f"Theta_{t_i}", np.nan) + for t_i in range(3) + ] # If the first parameter is controlled by a function, we use # 1.0 in its place and will scale the results in a later @@ -463,7 +470,6 @@ def _create_DV_RVs(self, case_list): time_family = np.nan if carbon_params is not None: - carbon_params_DS = carbon_params[f'DS{ds}'] carbon_family = carbon_params_DS.get('Family', np.nan) @@ -483,7 +489,6 @@ def _create_DV_RVs(self, case_list): carbon_family = np.nan if energy_params is not None: - energy_params_DS = energy_params[f'DS{ds}'] energy_family = energy_params_DS.get('Family', np.nan) @@ -516,10 +521,8 @@ def _create_DV_RVs(self, case_list): loc_dir_uid = case_DF.loc[(driver_cmp_id, ds)].index.values for loc, direction, uid in loc_dir_uid: - # assign cost RV if pd.isna(cost_family) is False: - cost_rv_tag = ( f'Cost-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' ) @@ -529,7 +532,7 @@ def _create_DV_RVs(self, case_list): name=cost_rv_tag, distribution=cost_family, theta=cost_theta, - truncation_limits=[0., np.nan] + truncation_limits=[0.0, np.nan], ) ) rv_count += 1 @@ -540,12 +543,14 @@ def _create_DV_RVs(self, case_list): f'Time-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' ) - RV_reg.add_RV(uq.RandomVariable( - name=time_rv_tag, - distribution=time_family, - theta=time_theta, - truncation_limits=[0., np.nan] - )) + RV_reg.add_RV( + uq.RandomVariable( + name=time_rv_tag, + distribution=time_family, + theta=time_theta, + truncation_limits=[0.0, np.nan], + ) + ) rv_count += 1 # assign time RV @@ -554,12 +559,14 @@ def _create_DV_RVs(self, case_list): f'Carbon-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' ) - RV_reg.add_RV(uq.RandomVariable( - name=carbon_rv_tag, - distribution=carbon_family, - theta=carbon_theta, - truncation_limits=[0., np.nan] - )) + RV_reg.add_RV( + uq.RandomVariable( + name=carbon_rv_tag, + distribution=carbon_family, + theta=carbon_theta, + truncation_limits=[0.0, np.nan], + ) + ) rv_count += 1 # assign time RV @@ -568,30 +575,39 @@ def _create_DV_RVs(self, case_list): f'Energy-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' ) - RV_reg.add_RV(uq.RandomVariable( - name=energy_rv_tag, - distribution=energy_family, - theta=energy_theta, - truncation_limits=[0., np.nan] - )) + RV_reg.add_RV( + uq.RandomVariable( + name=energy_rv_tag, + distribution=energy_family, + theta=energy_theta, + truncation_limits=[0.0, np.nan], + ) + ) rv_count += 1 # assign correlation between RVs across DV_types # TODO: add more DV_types and handle cases with only a # subset of them being defined - if ((pd.isna(cost_family) is False) and ( - pd.isna(time_family) is False) and ( - self._asmnt.options.rho_cost_time != 0.0)): - + if ( + (pd.isna(cost_family) is False) + and (pd.isna(time_family) is False) + and (self._asmnt.options.rho_cost_time != 0.0) + ): rho = self._asmnt.options.rho_cost_time - RV_reg.add_RV_set(uq.RandomVariableSet( - f'DV-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}_set', - list(RV_reg.RVs([cost_rv_tag, time_rv_tag]).values()), - np.array([[1.0, rho], [rho, 1.0]]))) + RV_reg.add_RV_set( + uq.RandomVariableSet( + f'DV-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}_set', + list( + RV_reg.RVs([cost_rv_tag, time_rv_tag]).values() + ), + np.array([[1.0, rho], [rho, 1.0]]), + ) + ) - self.log_msg(f"\n{rv_count} random variables created.", - prepend_timestamp=False) + self.log_msg( + f"\n{rv_count} random variables created.", prepend_timestamp=False + ) if rv_count > 0: return RV_reg @@ -610,14 +626,11 @@ def _calc_median_consequence(self, eco_qnt): # for DV_type, DV_type_scase in zip(['COST', 'TIME'], ['Cost', 'Time']): for DV_type in DV_types: - cmp_list = [] median_list = [] for loss_cmp_id in self.loss_map.index: - - driver_type, driver_cmp = self.loss_map.loc[ - loss_cmp_id, 'Driver'] + driver_type, driver_cmp = self.loss_map.loc[loss_cmp_id, 'Driver'] loss_cmp_name = self.loss_map.loc[loss_cmp_id, 'Consequence'] # check if the given DV type is available as an output for the @@ -626,18 +639,17 @@ def _calc_median_consequence(self, eco_qnt): continue if driver_type != 'DMG': - raise ValueError(f"Loss Driver type not recognized: " - f"{driver_type}") + raise ValueError( + f"Loss Driver type not recognized: " f"{driver_type}" + ) - if driver_cmp not in eco_qnt.columns.get_level_values( - 0).unique(): + if driver_cmp not in eco_qnt.columns.get_level_values(0).unique(): continue ds_list = [] sub_medians = [] for ds in self.loss_params.columns.get_level_values(0).unique(): - if not ds.startswith('DS'): continue @@ -647,8 +659,8 @@ def _calc_median_consequence(self, eco_qnt): continue loss_params_DS = self.loss_params.loc[ - (loss_cmp_name, DV_type), - ds] + (loss_cmp_name, DV_type), ds + ] # check if theta_0 is defined theta_0 = loss_params_DS.get('Theta_0', np.nan) @@ -659,47 +671,43 @@ def _calc_median_consequence(self, eco_qnt): # check if the distribution type is supported family = loss_params_DS.get('Family', np.nan) - if ((not pd.isna(family)) and ( - family not in [ - 'normal', 'lognormal', 'deterministic'])): - raise ValueError(f"Loss Distribution of type {family} " - f"not supported.") + if (not pd.isna(family)) and ( + family not in ['normal', 'lognormal', 'deterministic'] + ): + raise ValueError( + f"Loss Distribution of type {family} " f"not supported." + ) # If theta_0 is a scalar try: theta_0 = float(theta_0) if pd.isna(loss_params_DS.get('Family', np.nan)): - # if theta_0 is constant, then use it directly f_median = prep_constant_median_DV(theta_0) else: - # otherwise use a constant 1.0 as the median # The random variable will be generated as a # variation from this 1.0 and added in a later step. f_median = prep_constant_median_DV(1.0) except ValueError: - # otherwise, use the multilinear function all_vals = np.array( [val.split(',') for val in theta_0.split('|')], - dtype=float) + dtype=float, + ) medns = all_vals[0] qnts = all_vals[1] - f_median = prep_bounded_multilinear_median_DV( - medns, qnts) + f_median = prep_bounded_multilinear_median_DV(medns, qnts) # get the corresponding aggregate damage quantities # to consider economies of scale if 'ds' in eco_qnt.columns.names: + avail_ds = eco_qnt.loc[:, driver_cmp].columns.unique(level=0) - avail_ds = ( - eco_qnt.loc[:, driver_cmp].columns.unique(level=0)) - - if (ds_id not in avail_ds): + if ds_id not in avail_ds: continue eco_qnt_i = eco_qnt.loc[:, (driver_cmp, ds_id)].copy() @@ -719,14 +727,11 @@ def _calc_median_consequence(self, eco_qnt): ds_list.append(ds_id) if len(ds_list) > 0: - # combine medians across damage states into one DF - median_list.append(pd.concat(sub_medians, axis=1, - keys=ds_list)) + median_list.append(pd.concat(sub_medians, axis=1, keys=ds_list)) cmp_list.append(loss_cmp_id) if len(cmp_list) > 0: - # combine medians across components into one DF result = pd.concat(median_list, axis=1, keys=cmp_list) @@ -766,12 +771,16 @@ def aggregate_losses(self): DVG = DV.groupby(level=[0, 4], axis=1).sum() # create the summary DF - df_agg = pd.DataFrame(index=DV.index, - columns=['repair_cost', - 'repair_time-parallel', - 'repair_time-sequential', - 'repair_carbon', - 'repair_energy']) + df_agg = pd.DataFrame( + index=DV.index, + columns=[ + 'repair_cost', + 'repair_time-parallel', + 'repair_time-sequential', + 'repair_carbon', + 'repair_energy', + ], + ) if 'Cost' in DVG.columns: df_agg['repair_cost'] = DVG['Cost'].sum(axis=1) @@ -783,9 +792,9 @@ def aggregate_losses(self): df_agg['repair_time-parallel'] = DVG['Time'].max(axis=1) else: - df_agg = df_agg.drop(['repair_time-parallel', - 'repair_time-sequential'], - axis=1) + df_agg = df_agg.drop( + ['repair_time-parallel', 'repair_time-sequential'], axis=1 + ) if 'Carbon' in DVG.columns: df_agg['repair_carbon'] = DVG['Carbon'].sum(axis=1) @@ -799,8 +808,15 @@ def aggregate_losses(self): # convert units - cmp_units = self.loss_params[('DV', 'Unit')].groupby(level=[1, ]).agg( - lambda x: x.value_counts().index[0]) + cmp_units = ( + self.loss_params[('DV', 'Unit')] + .groupby( + level=[ + 1, + ] + ) + .agg(lambda x: x.value_counts().index[0]) + ) dv_units = pd.Series(index=df_agg.columns, name='Units', dtype='object') @@ -818,10 +834,13 @@ def aggregate_losses(self): dv_units['repair_energy'] = cmp_units['Energy'] df_agg = file_io.save_to_csv( - df_agg, None, units=dv_units, + df_agg, + None, + units=dv_units, unit_conversion_factors=self._asmnt.unit_conversion_factors, use_simpleindex=False, - log=self._asmnt.log) + log=self._asmnt.log, + ) df_agg.drop("Units", inplace=True) @@ -854,28 +873,26 @@ def _generate_DV_sample(self, dmg_quantities, sample_size): """ # calculate the quantities for economies of scale - self.log_msg("\nAggregating damage quantities...", - prepend_timestamp=False) + self.log_msg("\nAggregating damage quantities...", prepend_timestamp=False) if self._asmnt.options.eco_scale["AcrossFloors"]: - if self._asmnt.options.eco_scale["AcrossDamageStates"]: - - eco_levels = [0, ] - eco_columns = ['cmp', ] + eco_levels = [ + 0, + ] + eco_columns = [ + 'cmp', + ] else: - eco_levels = [0, 4] eco_columns = ['cmp', 'ds'] elif self._asmnt.options.eco_scale["AcrossDamageStates"]: - eco_levels = [0, 1] eco_columns = ['cmp', 'loc'] else: - eco_levels = [0, 1, 4] eco_columns = ['cmp', 'loc', 'ds'] @@ -883,34 +900,45 @@ def _generate_DV_sample(self, dmg_quantities, sample_size): eco_qnt = eco_group.sum().mask(eco_group.count() == 0, np.nan) assert eco_qnt.columns.names == eco_columns - self.log_msg("Successfully aggregated damage quantities.", - prepend_timestamp=False) + self.log_msg( + "Successfully aggregated damage quantities.", prepend_timestamp=False + ) # apply the median functions, if needed, to get median consequences for # each realization - self.log_msg("\nCalculating the median repair consequences...", - prepend_timestamp=False) + self.log_msg( + "\nCalculating the median repair consequences...", + prepend_timestamp=False, + ) medians = self._calc_median_consequence(eco_qnt) - self.log_msg("Successfully determined median repair consequences.", - prepend_timestamp=False) + self.log_msg( + "Successfully determined median repair consequences.", + prepend_timestamp=False, + ) # combine the median consequences with the samples of deviation from the # median to get the consequence realizations. - self.log_msg("\nConsidering deviations from the median values to obtain " - "random DV sample...") + self.log_msg( + "\nConsidering deviations from the median values to obtain " + "random DV sample..." + ) - self.log_msg("Preparing random variables for repair cost and time...", - prepend_timestamp=False) + self.log_msg( + "Preparing random variables for repair cost and time...", + prepend_timestamp=False, + ) RV_reg = self._create_DV_RVs(dmg_quantities.columns) if RV_reg is not None: RV_reg.generate_sample( - sample_size=sample_size, method=self._asmnt.options.sampling_method) + sample_size=sample_size, method=self._asmnt.options.sampling_method + ) std_sample = base.convert_to_MultiIndex( - pd.DataFrame(RV_reg.RV_sample), axis=1).sort_index(axis=1) + pd.DataFrame(RV_reg.RV_sample), axis=1 + ).sort_index(axis=1) std_sample.columns.names = ['dv', 'cmp', 'ds', 'loc', 'dir', 'uid'] # convert column names to int @@ -932,9 +960,11 @@ def _generate_DV_sample(self, dmg_quantities, sample_size): else: std_sample = None - self.log_msg(f"\nSuccessfully generated {sample_size} realizations of " - "deviation from the median consequences.", - prepend_timestamp=False) + self.log_msg( + f"\nSuccessfully generated {sample_size} realizations of " + "deviation from the median consequences.", + prepend_timestamp=False, + ) res_list = [] key_list = [] @@ -953,7 +983,6 @@ def _generate_DV_sample(self, dmg_quantities, sample_size): # for DV_type, _ in zip(['COST', 'TIME'], ['Cost', 'Time']): for DV_type in DV_types: - if DV_type in std_DV_types: prob_cmp_list = std_sample[DV_type].columns.unique(level=0) else: @@ -965,32 +994,31 @@ def _generate_DV_sample(self, dmg_quantities, sample_size): continue for cmp_i in medians[DV_type].columns.unique(level=0): - # check if there is damage in the component driver_type, dmg_cmp_i = self.loss_map.loc[cmp_i, 'Driver'] loss_cmp_i = self.loss_map.loc[cmp_i, 'Consequence'] if driver_type != 'DMG': - raise ValueError(f"Loss Driver type not " - f"recognized: {driver_type}") + raise ValueError( + f"Loss Driver type not " f"recognized: {driver_type}" + ) - if not (dmg_cmp_i - in dmg_quantities.columns.unique(level=0)): + if not (dmg_cmp_i in dmg_quantities.columns.unique(level=0)): continue ds_list = [] for ds in medians[DV_type].loc[:, cmp_i].columns.unique(level=0): - loc_list = [] for loc_id, loc in enumerate( - dmg_quantities.loc[ - :, (dmg_cmp_i, ds)].columns.unique(level=0)): - - if ((self._asmnt.options.eco_scale[ - "AcrossFloors"] is True) and ( - loc_id > 0)): + dmg_quantities.loc[:, (dmg_cmp_i, ds)].columns.unique( + level=0 + ) + ): + if ( + self._asmnt.options.eco_scale["AcrossFloors"] is True + ) and (loc_id > 0): break if self._asmnt.options.eco_scale["AcrossFloors"] is True: @@ -1019,7 +1047,9 @@ def _generate_DV_sample(self, dmg_quantities, sample_size): loc_list.append(loc) if self._asmnt.options.eco_scale["AcrossFloors"] is True: - ds_list += [ds, ] + ds_list += [ + ds, + ] else: ds_list += [(ds, loc) for loc in loc_list] @@ -1027,26 +1057,34 @@ def _generate_DV_sample(self, dmg_quantities, sample_size): cmp_list += [(loss_cmp_i, dmg_cmp_i, ds) for ds in ds_list] else: cmp_list += [ - (loss_cmp_i, dmg_cmp_i, ds, loc) for ds, loc in ds_list] + (loss_cmp_i, dmg_cmp_i, ds, loc) for ds, loc in ds_list + ] if self._asmnt.options.eco_scale["AcrossFloors"] is True: - key_list += [(DV_type, loss_cmp_i, dmg_cmp_i, ds) - for loss_cmp_i, dmg_cmp_i, ds in cmp_list] + key_list += [ + (DV_type, loss_cmp_i, dmg_cmp_i, ds) + for loss_cmp_i, dmg_cmp_i, ds in cmp_list + ] else: - key_list += [(DV_type, loss_cmp_i, dmg_cmp_i, ds, loc) - for loss_cmp_i, dmg_cmp_i, ds, loc in cmp_list] + key_list += [ + (DV_type, loss_cmp_i, dmg_cmp_i, ds, loc) + for loss_cmp_i, dmg_cmp_i, ds, loc in cmp_list + ] lvl_names = ['dv', 'loss', 'dmg', 'ds', 'loc', 'dir', 'uid'] - DV_sample = pd.concat(res_list, axis=1, keys=key_list, - names=lvl_names) + DV_sample = pd.concat(res_list, axis=1, keys=key_list, names=lvl_names) DV_sample = DV_sample.fillna(0).convert_dtypes() DV_sample.columns.names = lvl_names # Get the flags for replacement consequence trigger - DV_sum = DV_sample.groupby(level=[1, ], axis=1).sum() + DV_sum = DV_sample.groupby( + level=[ + 1, + ], + axis=1, + ).sum() if 'replacement' in DV_sum.columns: - # When the 'replacement' consequence is triggered, all # local repair consequences are discarded. Note that # global consequences are assigned to location '0'. @@ -1062,8 +1100,7 @@ def _generate_DV_sample(self, dmg_quantities, sample_size): self._sample = DV_sample - self.log_msg("Successfully obtained DV sample.", - prepend_timestamp=False) + self.log_msg("Successfully obtained DV sample.", prepend_timestamp=False) def prep_constant_median_DV(median): @@ -1081,6 +1118,7 @@ def prep_constant_median_DV(median): A function that returns the constant median DV for all component quantities. """ + def f(*args): # pylint: disable=unused-argument return median @@ -1112,11 +1150,13 @@ def prep_bounded_multilinear_median_DV(medians, quantities): A function that returns the median DV given the quantity of damaged components. """ + def f(quantity): if quantity is None: raise ValueError( 'A bounded linear median Decision Variable function called ' - 'without specifying the quantity of damaged components') + 'without specifying the quantity of damaged components' + ) q_array = np.asarray(quantity, dtype=np.float64) diff --git a/pelicun/model/pelicun_model.py b/pelicun/model/pelicun_model.py index a90c009da..bcf9a2e8d 100644 --- a/pelicun/model/pelicun_model.py +++ b/pelicun/model/pelicun_model.py @@ -65,7 +65,6 @@ class PelicunModel: """ def __init__(self, assessment): - # link the PelicunModel object to its Assessment object self._asmnt = assessment @@ -108,18 +107,21 @@ def convert_marginal_params(self, marginal_params, units, arg_units=None): """ assert np.all(marginal_params.index == units.index) if arg_units is not None: - assert np.all( - marginal_params.index == arg_units.index) + assert np.all(marginal_params.index == arg_units.index) # preserve the columns in the input marginal_params original_cols = marginal_params.columns # add extra columns if they are not available in the marginals - for col_name in ('Family', - 'Theta_0', 'Theta_1', 'Theta_2', - 'TruncateLower', 'TruncateUpper'): + for col_name in ( + 'Family', + 'Theta_0', + 'Theta_1', + 'Theta_2', + 'TruncateLower', + 'TruncateUpper', + ): if col_name not in marginal_params.columns: - marginal_params[col_name] = np.nan # get a list of unique units @@ -127,7 +129,6 @@ def convert_marginal_params(self, marginal_params, units, arg_units=None): # for each unit for unit_name in unique_units: - # get the scale factor for converting from the source unit unit_factor = self._asmnt.calc_unit_scale_factor(unit_name) @@ -136,7 +137,6 @@ def convert_marginal_params(self, marginal_params, units, arg_units=None): # for each variable for row_id in unit_ids: - # pull the parameters of the marginal distribution family = marginal_params.at[row_id, 'Family'] @@ -145,12 +145,12 @@ def convert_marginal_params(self, marginal_params, units, arg_units=None): # load the theta values theta = marginal_params.loc[ - row_id, ['Theta_0', 'Theta_1', 'Theta_2']].values + row_id, ['Theta_0', 'Theta_1', 'Theta_2'] + ].values # for each theta args = [] for t_i, theta_i in enumerate(theta): - # if theta_i evaluates to NaN, it is considered undefined if pd.isna(theta_i): args.append([]) @@ -162,7 +162,6 @@ def convert_marginal_params(self, marginal_params, units, arg_units=None): args.append([]) except ValueError: - # otherwise, we assume it is a string using SimCenter # array notation to identify coordinates of a # multilinear function @@ -178,18 +177,17 @@ def convert_marginal_params(self, marginal_params, units, arg_units=None): # load the truncation limits tr_limits = marginal_params.loc[ - row_id, ['TruncateLower', 'TruncateUpper']] + row_id, ['TruncateLower', 'TruncateUpper'] + ] arg_unit_factor = 1.0 # check if there is a need to scale due to argument units if not (arg_units is None): - # get the argument unit for the given marginal arg_unit = arg_units.get(row_id) if arg_unit != '1 EA': - # get the scale factor arg_unit_factor = self._asmnt.calc_unit_scale_factor( arg_unit @@ -197,29 +195,32 @@ def convert_marginal_params(self, marginal_params, units, arg_units=None): # scale arguments, if needed for a_i, arg in enumerate(args): - if isinstance(arg, np.ndarray): args[a_i] = arg * arg_unit_factor # convert the distribution parameters to SI theta, tr_limits = uq.scale_distribution( - unit_factor / arg_unit_factor, family, theta, tr_limits) + unit_factor / arg_unit_factor, family, theta, tr_limits + ) # convert multilinear function parameters back into strings for a_i, arg in enumerate(args): - if len(arg) > 0: - theta[a_i] = '|'.join( - [','.join([f'{val:g}' for val in vals]) - for vals in (theta[a_i], args[a_i])]) + [ + ','.join([f'{val:g}' for val in vals]) + for vals in (theta[a_i], args[a_i]) + ] + ) # and update the values in the DF marginal_params.loc[ - row_id, ['Theta_0', 'Theta_1', 'Theta_2']] = theta + row_id, ['Theta_0', 'Theta_1', 'Theta_2'] + ] = theta marginal_params.loc[ - row_id, ['TruncateLower', 'TruncateUpper']] = tr_limits + row_id, ['TruncateLower', 'TruncateUpper'] + ] = tr_limits # remove the added columns marginal_params = marginal_params[original_cols] From a5e49b6ebf0fb051ea40801572e47ddace50522a Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Fri, 1 Mar 2024 05:59:06 -0800 Subject: [PATCH 03/48] Abstract default path substitution Eliminates repetition in resolving `PelicunDefault/` --- pelicun/file_io.py | 58 +++++++++++++++++++++++++++++++++++ pelicun/model/damage_model.py | 7 +---- pelicun/model/loss_model.py | 7 +---- pelicun/tests/test_file_io.py | 13 ++++++++ 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/pelicun/file_io.py b/pelicun/file_io.py index 27f0174e8..6ee2ca86b 100644 --- a/pelicun/file_io.py +++ b/pelicun/file_io.py @@ -262,6 +262,64 @@ def save_to_csv(data, filepath, units=None, unit_conversion_factors=None, return None +def substitute_default_path(data_paths): + """ + Substitutes the default directory path in a list of data paths + with a specified path. + + This function iterates over a list of data paths and replaces + occurrences of the 'PelicunDefault/' substring with the path + specified by `base.pelicun_path` concatenated with + '/resources/SimCenterDBDL/'. This operation is performed to update + paths that are using a default location to a user-defined location + within the pelicun framework. The updated list of paths is then + returned. + + Parameters + ---------- + data_paths : list of str + A list containing the paths to data files. These paths may + include a placeholder directory 'PelicunDefault/' that needs + to be substituted with the actual path specified in + `base.pelicun_path`. + + Returns + ------- + list of str + The list with updated paths where 'PelicunDefault/' has been + replaced with the specified path in `base.pelicun_path` + concatenated with '/resources/SimCenterDBDL/'. + + Notes + ----- + - The function assumes that `base.pelicun_path` is properly + initialized and points to the correct directory where resources + are located. + - If a path in the input list does not contain 'PelicunDefault/', + it is added to the output list unchanged. + + Example + ------- + >>> data_paths = ['PelicunDefault/data/file1.txt', + 'data/file2.txt'] + >>> substitute_default_path(data_paths) + ['{base.pelicun_path}/resources/SimCenterDBDL/data/file1.txt', + 'data/file2.txt'] + + """ + updated_paths = [] + for data_path in data_paths: + if 'PelicunDefault/' in data_path: + path = data_path.replace( + 'PelicunDefault/', + f'{base.pelicun_path}/resources/SimCenterDBDL/', + ) + updated_paths.append(path) + else: + updated_paths.append(data_path) + return updated_paths + + def load_data( data_source, unit_conversion_factors, diff --git a/pelicun/model/damage_model.py b/pelicun/model/damage_model.py index cc30bf78b..3e583a7f9 100644 --- a/pelicun/model/damage_model.py +++ b/pelicun/model/damage_model.py @@ -164,12 +164,7 @@ def load_damage_model(self, data_paths): self.log_msg('Loading damage model...') # replace default flag with default data path - for d_i, data_path in enumerate(data_paths): - if 'PelicunDefault/' in data_path: - data_paths[d_i] = data_path.replace( - 'PelicunDefault/', - f'{base.pelicun_path}/resources/SimCenterDBDL/', - ) + data_paths = file_io.substitute_default_path(data_paths) data_list = [] # load the data files one by one diff --git a/pelicun/model/loss_model.py b/pelicun/model/loss_model.py index 69e111ba2..ce548b907 100644 --- a/pelicun/model/loss_model.py +++ b/pelicun/model/loss_model.py @@ -182,12 +182,7 @@ def load_model(self, data_paths, mapping_path, decision_variables=None): self.log_msg(f'Loading loss parameters for {self.loss_type}...') # replace default flag with default data path - for d_i, data_path in enumerate(data_paths): - if 'PelicunDefault/' in data_path: - data_paths[d_i] = data_path.replace( - 'PelicunDefault/', - f'{base.pelicun_path}/resources/SimCenterDBDL/', - ) + data_paths = file_io.substitute_default_path(data_paths) data_list = [] # load the data files one by one diff --git a/pelicun/tests/test_file_io.py b/pelicun/tests/test_file_io.py index 40617ec68..1e85433e3 100644 --- a/pelicun/tests/test_file_io.py +++ b/pelicun/tests/test_file_io.py @@ -273,6 +273,19 @@ def msg(self, text, **kwargs): assert mylogger.logs[-1][0] == 'WARNING: Data was empty, no file saved.' +def test_substitute_default_path(): + prior_path = file_io.base.pelicun_path + file_io.base.pelicun_path = 'some_path' + input_paths = ['PelicunDefault/data/file1.txt', '/data/file2.txt'] + expected_paths = [ + 'some_path/resources/SimCenterDBDL/data/file1.txt', + '/data/file2.txt', + ] + result_paths = file_io.substitute_default_path(input_paths) + assert result_paths == expected_paths + file_io.base.pelicun_path = prior_path + + def test_load_data(): # test loading data with orientation 0 From 19010982ca3a990d124bde9a11e5f83c1456a3c0 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 2 Mar 2024 07:45:07 -0800 Subject: [PATCH 04/48] Refactor random variable handling in pelicun This commit breaks down the signle `RandomVariable` object into individual dedicated random variable objects, utilizing class inheritance. This reduces if/else logic and optional arguments, making the code much easier to understand and work with. It attempts to minimize the extent of changes outside of the UQ module. Key changes include: - Introduction of a base `BaseRandomVariable` class and specialized RV classes for different distribution types (e.g., `NormalRandomVariable`, `LogNormalRandomVariable`). - Implementation of a `rv_class_map` function, allowing for the previously used distribution names to be mapped to the new objects. - Modifications to the `RandomVariableSet` and `RandomVariableRegistry` classes to accommodate the new RV structure. - Numerous updates to unit tests (`test_uq.py`) to reflect the changes in RV handling. - A limited numer of code formatting changes to other areas of the affected files. - [X] Linter checks and unit tests pass. --- pelicun/model/asset_model.py | 38 +- pelicun/model/damage_model.py | 28 +- pelicun/model/demand_model.py | 20 +- pelicun/model/loss_model.py | 12 +- pelicun/tests/test_uq.py | 591 ++++++-------- pelicun/uq.py | 1388 ++++++++++++++++++++++----------- 6 files changed, 1233 insertions(+), 844 deletions(-) diff --git a/pelicun/model/asset_model.py b/pelicun/model/asset_model.py index 12a8d220f..d7a7dfe71 100644 --- a/pelicun/model/asset_model.py +++ b/pelicun/model/asset_model.py @@ -413,20 +413,32 @@ def _create_cmp_RVs(self): cmp = rv_params.Index # create a random variable and add it to the registry - RV_reg.add_RV( - uq.RandomVariable( - name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', - distribution=getattr(rv_params, "Family", np.nan), - theta=[ - getattr(rv_params, f"Theta_{t_i}", np.nan) - for t_i in range(3) - ], - truncation_limits=[ - getattr(rv_params, f"Truncate{side}", np.nan) - for side in ("Lower", "Upper") - ], + family = getattr(rv_params, "Family", 'deterministic') + if family == 'deterministic': + # no truncation limits + RV_reg.add_RV( + uq.rv_class_map(family)( + name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', + theta=[ + getattr(rv_params, f"Theta_{t_i}", np.nan) + for t_i in range(3) + ], + ) + ) + else: + RV_reg.add_RV( + uq.rv_class_map(family)( + name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', + theta=[ + getattr(rv_params, f"Theta_{t_i}", np.nan) + for t_i in range(3) + ], + truncation_limits=[ + getattr(rv_params, f"Truncate{side}", np.nan) + for side in ("Lower", "Upper") + ], + ) ) - ) self.log_msg( f"\n{self.cmp_marginal_params.shape[0]} random variables created.", diff --git a/pelicun/model/damage_model.py b/pelicun/model/damage_model.py index 3e583a7f9..c9ea9e023 100644 --- a/pelicun/model/damage_model.py +++ b/pelicun/model/damage_model.py @@ -308,9 +308,8 @@ def assign_lsds(ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag): ds_id += 1 lsds_RV_reg.add_RV( - uq.RandomVariable( + uq.DeterministicRandomVariable( name=lsds_rv_tag, - distribution='deterministic', theta=ds_id, ) ) @@ -326,9 +325,8 @@ def map_ds(values, offset=int(ds_id + 1)): return values + offset lsds_RV_reg.add_RV( - uq.RandomVariable( + uq.MultinomialRandomVariable( name=lsds_rv_tag, - distribution='multinomial', theta=ds_weights, f_map=map_ds, ) @@ -415,7 +413,7 @@ def map_ds(values, offset=int(ds_id + 1)): frg_params_LS = frg_params[f'LS{ls_id}'] theta_0 = frg_params_LS.get('Theta_0', np.nan) - family = frg_params_LS.get('Family', np.nan) + family = frg_params_LS.get('Family', 'deterministic') ds_weights = frg_params_LS.get('DamageStateWeights', np.nan) # check if the limit state is defined for the component @@ -490,13 +488,19 @@ def map_ds(values, offset=int(ds_id + 1)): ) ) - RV = uq.RandomVariable( - name=frg_rv_tag, - distribution=family, - theta=theta, - truncation_limits=tr_lims, - anchor=anchor, - ) + if family != 'deterministic': + RV = uq.rv_class_map(family)( + name=frg_rv_tag, + theta=theta, + anchor=anchor, + ) + else: + RV = uq.rv_class_map(family)( + name=frg_rv_tag, + theta=theta, + truncation_limits=tr_lims, + anchor=anchor, + ) capacity_RV_reg.add_RV(RV) diff --git a/pelicun/model/demand_model.py b/pelicun/model/demand_model.py index a89935e1a..d01c7b00e 100644 --- a/pelicun/model/demand_model.py +++ b/pelicun/model/demand_model.py @@ -700,7 +700,7 @@ def _create_RVs(self, preserve_order=False): for rv_params in self.marginal_params.itertuples(): edp = rv_params.Index rv_tag = f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' - family = getattr(rv_params, "Family", np.nan) + family = getattr(rv_params, "Family", 'deterministic') if family == 'empirical': if preserve_order: @@ -710,19 +710,29 @@ def _create_RVs(self, preserve_order=False): # empirical RVs need the data points RV_reg.add_RV( - uq.RandomVariable( + uq.rv_class_map(dist_family)( name=rv_tag, - distribution=dist_family, raw_samples=self.empirical_data.loc[:, edp].values, ) ) + elif family == 'deterministic': + # all other RVs need parameters of their distributions + RV_reg.add_RV( + uq.DeterministicRandomVariable( + name=rv_tag, + theta=[ + getattr(rv_params, f"Theta_{t_i}", np.nan) + for t_i in range(3) + ], + ) + ) + else: # all other RVs need parameters of their distributions RV_reg.add_RV( - uq.RandomVariable( + uq.rv_class_map(family)( name=rv_tag, - distribution=family, theta=[ getattr(rv_params, f"Theta_{t_i}", np.nan) for t_i in range(3) diff --git a/pelicun/model/loss_model.py b/pelicun/model/loss_model.py index ce548b907..d61d55f7d 100644 --- a/pelicun/model/loss_model.py +++ b/pelicun/model/loss_model.py @@ -523,9 +523,8 @@ def _create_DV_RVs(self, case_list): ) RV_reg.add_RV( - uq.RandomVariable( + uq.rv_class_map(cost_family)( name=cost_rv_tag, - distribution=cost_family, theta=cost_theta, truncation_limits=[0.0, np.nan], ) @@ -539,9 +538,8 @@ def _create_DV_RVs(self, case_list): ) RV_reg.add_RV( - uq.RandomVariable( + uq.rv_class_map(time_family)( name=time_rv_tag, - distribution=time_family, theta=time_theta, truncation_limits=[0.0, np.nan], ) @@ -555,9 +553,8 @@ def _create_DV_RVs(self, case_list): ) RV_reg.add_RV( - uq.RandomVariable( + uq.rv_class_map(carbon_family)( name=carbon_rv_tag, - distribution=carbon_family, theta=carbon_theta, truncation_limits=[0.0, np.nan], ) @@ -571,9 +568,8 @@ def _create_DV_RVs(self, case_list): ) RV_reg.add_RV( - uq.RandomVariable( + uq.rv_class_map(energy_family)( name=energy_rv_tag, - distribution=energy_family, theta=energy_theta, truncation_limits=[0.0, np.nan], ) diff --git a/pelicun/tests/test_uq.py b/pelicun/tests/test_uq.py index c620312d4..eeed09545 100644 --- a/pelicun/tests/test_uq.py +++ b/pelicun/tests/test_uq.py @@ -799,132 +799,19 @@ def test__OLS_percentiles(): # The following tests verify the methods of the objects of the module. -def test_RandomVariable(): - # instantiate a random variable with default attributes - rv_1 = uq.RandomVariable('rv_1', 'empirical') - # verify that the attributes have been assigned as expected - assert rv_1.name == 'rv_1' - assert rv_1._distribution == 'empirical' - assert np.isnan(rv_1._theta[0]) - - # instantiate a random variable with default attributes - rv_2 = uq.RandomVariable('rv_2', 'coupled_empirical') - # verify that the attributes have been assigned as expected - assert rv_2.name == 'rv_2' - assert rv_2._distribution == 'coupled_empirical' - assert np.isnan(rv_2._theta[0]) - - # verify that other distributions require theta - distributions = ( - 'normal', - 'lognormal', - 'multinomial', - 'custom', - 'uniform', - 'deterministic', - ) - for distribution in distributions: - with pytest.raises(ValueError): - uq.RandomVariable("won't see the light of day", distribution) - - # define a distribution with a given theta - rv_3 = uq.RandomVariable('rv_3', 'normal', np.array((1.00, 0.20))) - # redefine the theta attribute - rv_3.theta = np.array((2.00, 0.20)) - # retrieve other attributes - assert np.allclose(rv_3.theta, np.array((2.00, 0.20))) - assert rv_3.custom_expr is None - assert rv_3.RV_set is None - assert rv_3.sample_DF is None - # assign an anchor value - rv_3.anchor = 2.00 - - # multinomial with invalid p values provided in the theta vector - with pytest.raises(ValueError): - uq.RandomVariable( - 'rv_invalid', 'multinomial', np.array((0.20, 0.70, 0.10, 42.00)) - ) - - # multilinear CDF: cases that should fail - - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (100.00, 0.20, 0.20, 0.80, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.20, 0.20, 0.80, 0.80) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - x_values = (0.00, 3.00, 1.00, 2.00, 4.00) - y_values = (0.00, 0.25, 0.50, 0.75, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.75, 0.50, 0.25, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.50, 0.50, 0.50, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - x_values = (0.00, 2.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.20, 0.40, 0.50, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - # truncation limits not supported - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.25, 0.50, 0.75, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values, - truncation_limits=np.array((0.20, 0.80)) - ) - - -def test_RandomVariable_cdf(): - # create a normal random variable - rv = uq.RandomVariable( +def test_NormalRandomVariable(): + rv = uq.NormalRandomVariable('rv_name', theta=np.array((0.00, 1.00))) + assert rv.name == 'rv_name' + np.testing.assert_allclose(rv.theta, np.array((0.00, 1.00))) + assert np.all(np.isnan(rv.truncation_limits)) + assert rv.RV_set is None + assert rv.sample_DF is None + + +def test_NormalRandomVariable_cdf(): + # test CDF method + rv = uq.NormalRandomVariable( 'test_rv', - 'normal', theta=(1.0, 1.0), truncation_limits=np.array((0.00, np.nan)), ) @@ -937,7 +824,7 @@ def test_RandomVariable_cdf(): assert np.allclose(cdf, (0.0, 0.0, 0.1781461, 0.40571329, 0.81142658), rtol=1e-5) # repeat without truncation limits - rv = uq.RandomVariable('test_rv', 'normal', theta=(1.0, 1.0)) + rv = uq.NormalRandomVariable('test_rv', theta=(1.0, 1.0)) # evaluate CDF at different points x = (-1.0, 0.0, 0.5, 1.0, 2.0) @@ -948,137 +835,196 @@ def test_RandomVariable_cdf(): cdf, (0.02275013, 0.15865525, 0.30853754, 0.5, 0.84134475), rtol=1e-5 ) - # lognormal, lower truncation - rv = uq.RandomVariable( + +def test_NormalRandomVariable_inverse_transform(): + samples = np.array((0.10, 0.20, 0.30)) + + rv = uq.NormalRandomVariable('test_rv', theta=(1.0, 0.5)) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.allclose( + inverse_transform, np.array((0.35922422, 0.57918938, 0.73779974)), rtol=1e-5 + ) + + rv = uq.NormalRandomVariable('test_rv', theta=(1.0, 0.5)) + with pytest.raises(ValueError): + rv.inverse_transform_sampling() + + # with truncation limits + + rv = uq.NormalRandomVariable( + 'test_rv', theta=(1.0, 0.5), truncation_limits=(np.nan, 1.20) + ) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.allclose( + inverse_transform, np.array((0.24508018, 0.43936, 0.57313359)), rtol=1e-5 + ) + + rv = uq.NormalRandomVariable( + 'test_rv', theta=(1.0, 0.5), truncation_limits=(0.80, np.nan) + ) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.allclose( + inverse_transform, np.array((0.8863824, 0.96947866, 1.0517347)), rtol=1e-5 + ) + + rv = uq.NormalRandomVariable( + 'test_rv', theta=(1.0, 0.5), truncation_limits=(0.80, 1.20) + ) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.allclose( + inverse_transform, np.array((0.84155378, 0.88203946, 0.92176503)), rtol=1e-5 + ) + + # + # edge cases + # + + # normal with problematic truncation limits + rv = uq.NormalRandomVariable( + 'test_rv', theta=(1.0, 0.5), truncation_limits=(1e8, 2e8) + ) + rv.uni_sample = samples + with pytest.raises(ValueError): + rv.inverse_transform_sampling() + + +def test_LogNormalRandomVariable_cdf(): + # lower truncation + rv = uq.LogNormalRandomVariable( 'test_rv', - 'lognormal', theta=(1.0, 1.0), truncation_limits=np.array((0.10, np.nan)), ) - x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose( cdf, (0.0, 0.0, 0.23597085, 0.49461712, 0.75326339), rtol=1e-5 ) - # lognormal, upper truncation - rv = uq.RandomVariable( + # upper truncation + rv = uq.LogNormalRandomVariable( 'test_rv', - 'lognormal', theta=(1.0, 1.0), truncation_limits=np.array((np.nan, 5.00)), ) - x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose( cdf, (0.00, 0.00, 0.25797755, 0.52840734, 0.79883714), rtol=1e-5 ) - # lognormal, no truncation - rv = uq.RandomVariable('test_rv', 'lognormal', theta=(1.0, 1.0)) - + # no truncation + rv = uq.LogNormalRandomVariable('test_rv', theta=(1.0, 1.0)) x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose(cdf, (0.0, 0.0, 0.2441086, 0.5, 0.7558914), rtol=1e-5) - # uniform, both theta values - rv = uq.RandomVariable('test_rv', 'uniform', theta=(0.0, 1.0)) +def test_LogNormalRandomVariable_inverse_transform(): + samples = np.array((0.10, 0.20, 0.30)) + rv = uq.LogNormalRandomVariable('test_rv', theta=(1.0, 0.5)) + + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + + assert np.allclose( + inverse_transform, np.array((0.52688352, 0.65651442, 0.76935694)), rtol=1e-5 + ) + + # + # lognormal with truncation limits + # + + rv = uq.LogNormalRandomVariable( + 'test_rv', + theta=(1.0, 0.5), + truncation_limits=np.array((0.50, np.nan)), + ) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.allclose( + inverse_transform, np.array((0.62614292, 0.73192471, 0.83365823)), rtol=1e-5 + ) + + # + # edge cases + # + + # lognormal without values to sample from + rv = uq.LogNormalRandomVariable('test_rv', theta=(1.0, 0.5)) + with pytest.raises(ValueError): + rv.inverse_transform_sampling() + + +def test_UniformRandomVariable_cdf(): + # uniform, both theta values + rv = uq.UniformRandomVariable('test_rv', theta=(0.0, 1.0)) x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose(cdf, (0.0, 0.0, 0.5, 1.0, 1.0), rtol=1e-5) with warnings.catch_warnings(): warnings.simplefilter('ignore') # uniform, only upper theta value ( -inf implied ) - rv = uq.RandomVariable('test_rv', 'uniform', theta=(np.nan, 100.00)) - + rv = uq.UniformRandomVariable('test_rv', theta=(np.nan, 100.00)) x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.all(np.isnan(cdf)) # uniform, only lower theta value ( +inf implied ) - rv = uq.RandomVariable('test_rv', 'uniform', theta=(0.00, np.nan)) - + rv = uq.UniformRandomVariable('test_rv', theta=(0.00, np.nan)) x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose(cdf, (0.0, 0.0, 0.0, 0.0, 0.0), rtol=1e-5) # uniform, with truncation limits - rv = uq.RandomVariable( + rv = uq.UniformRandomVariable( 'test_rv', - 'uniform', theta=(0.0, 10.0), truncation_limits=np.array((0.00, 1.00)), ) - x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose(cdf, (0.0, 0.0, 0.5, 1.0, 1.0), rtol=1e-5) - # multilinear CDF - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.20, 0.30, 0.80, 1.00) - values = np.column_stack((x_values, y_values)) - rv = uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - x = (-100.00, 0.00, 0.50, 1.00, 1.50, 2.00, 2.50, 3.00, 3.50, 4.00, 100.00) - cdf = rv.cdf(x) - - assert np.allclose( - cdf, - (0.00, 0.00, 0.10, 0.20, 0.25, 0.30, 0.55, 0.80, 0.90, 1.00, 1.0), - rtol=1e-5, - ) - - -def test_RandomVariable_inverse_transform(): - # - # uniform - # - - rv = uq.RandomVariable('test_rv', 'uniform', theta=(0.0, 1.0)) +def test_UniformRandomVariable_inverse_transform(): + rv = uq.UniformRandomVariable('test_rv', theta=(0.0, 1.0)) samples = np.array((0.10, 0.20, 0.30)) - rv.uni_sample = samples rv.inverse_transform_sampling() inverse_transform = rv.sample - assert np.allclose(inverse_transform, samples, rtol=1e-5) # # uniform with unspecified bounds # - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - rv = uq.RandomVariable('test_rv', 'uniform', theta=(np.nan, 1.0)) - samples = np.array((0.10, 0.20, 0.30)) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample - assert np.all(np.isnan(inverse_transform)) - rv = uq.RandomVariable('test_rv', 'uniform', theta=(0.00, np.nan)) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample - assert np.all(np.isinf(inverse_transform)) - rv = uq.RandomVariable( + rv = uq.UniformRandomVariable('test_rv', theta=(np.nan, 1.0)) + samples = np.array((0.10, 0.20, 0.30)) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.all(np.isnan(inverse_transform)) + + rv = uq.UniformRandomVariable('test_rv', theta=(0.00, np.nan)) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.all(np.isinf(inverse_transform)) + + rv = uq.UniformRandomVariable( 'test_rv', - 'uniform', theta=(0.00, 1.00), truncation_limits=np.array((0.20, 0.80)), ) @@ -1088,7 +1034,7 @@ def test_RandomVariable_inverse_transform(): assert np.allclose(inverse_transform, np.array((0.26, 0.32, 0.38)), rtol=1e-5) # sample as a pandas series, with a log() map - rv._f_map = np.log + rv.f_map = np.log assert rv.sample_DF.to_dict() == { 0: -1.3470736479666092, 1: -1.1394342831883646, @@ -1096,177 +1042,132 @@ def test_RandomVariable_inverse_transform(): } # - # lognormal + # edge cases # - rv = uq.RandomVariable('test_rv', 'lognormal', theta=(1.0, 0.5)) + # uniform without values to sample from + rv = uq.UniformRandomVariable('test_rv', theta=(0.0, 1.0)) + with pytest.raises(ValueError): + rv.inverse_transform_sampling() - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample - assert np.allclose( - inverse_transform, np.array((0.52688352, 0.65651442, 0.76935694)), rtol=1e-5 - ) +def test_MultinomialRandomVariable(): + # multinomial with invalid p values provided in the theta vector + with pytest.raises(ValueError): + uq.MultinomialRandomVariable( + 'rv_invalid', np.array((0.20, 0.70, 0.10, 42.00)) + ) - # - # lognormal with truncation limits - # - rv = uq.RandomVariable( - 'test_rv', - 'lognormal', - theta=(1.0, 0.5), - truncation_limits=np.array((0.50, np.nan)), - ) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample - assert np.allclose( - inverse_transform, np.array((0.62614292, 0.73192471, 0.83365823)), rtol=1e-5 - ) +def test_MultilinearCDFRandomVariable(): + # multilinear CDF: cases that should fail - # - # normal - # + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (100.00, 0.20, 0.20, 0.80, 1.00) + values = np.column_stack((x_values, y_values)) + with pytest.raises(ValueError): + uq.MultilinearCDFRandomVariable('test_rv', theta=values) - rv = uq.RandomVariable('test_rv', 'normal', theta=(1.0, 0.5)) + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.20, 0.20, 0.80, 0.80) + values = np.column_stack((x_values, y_values)) + with pytest.raises(ValueError): + uq.MultilinearCDFRandomVariable('test_rv', theta=values) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample + x_values = (0.00, 3.00, 1.00, 2.00, 4.00) + y_values = (0.00, 0.25, 0.50, 0.75, 1.00) + values = np.column_stack((x_values, y_values)) + with pytest.raises(ValueError): + uq.MultilinearCDFRandomVariable('test_rv', theta=values) - assert np.allclose( - inverse_transform, np.array((0.35922422, 0.57918938, 0.73779974)), rtol=1e-5 - ) + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.75, 0.50, 0.25, 1.00) + values = np.column_stack((x_values, y_values)) + with pytest.raises(ValueError): + uq.MultilinearCDFRandomVariable('test_rv', theta=values) - rv = uq.RandomVariable('test_rv', 'normal', theta=(1.0, 0.5)) + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.50, 0.50, 0.50, 1.00) + values = np.column_stack((x_values, y_values)) with pytest.raises(ValueError): - rv.inverse_transform_sampling() + uq.MultilinearCDFRandomVariable('test_rv', theta=values) - # - # normal with truncation limits - # + x_values = (0.00, 2.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.20, 0.40, 0.50, 1.00) + values = np.column_stack((x_values, y_values)) + with pytest.raises(ValueError): + uq.MultilinearCDFRandomVariable('test_rv', theta=values) + + +def test_MultilinearCDFRandomVariable_cdf(): + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.20, 0.30, 0.80, 1.00) + values = np.column_stack((x_values, y_values)) + rv = uq.MultilinearCDFRandomVariable('test_rv', theta=values) + x = (-100.00, 0.00, 0.50, 1.00, 1.50, 2.00, 2.50, 3.00, 3.50, 4.00, 100.00) + cdf = rv.cdf(x) - rv = uq.RandomVariable( - 'test_rv', 'normal', theta=(1.0, 0.5), truncation_limits=(np.nan, 1.20) - ) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample - assert np.allclose( - inverse_transform, np.array((0.24508018, 0.43936, 0.57313359)), rtol=1e-5 - ) - rv = uq.RandomVariable( - 'test_rv', 'normal', theta=(1.0, 0.5), truncation_limits=(0.80, np.nan) - ) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample assert np.allclose( - inverse_transform, np.array((0.8863824, 0.96947866, 1.0517347)), rtol=1e-5 - ) - rv = uq.RandomVariable( - 'test_rv', 'normal', theta=(1.0, 0.5), truncation_limits=(0.80, 1.20) + cdf, + (0.00, 0.00, 0.10, 0.20, 0.25, 0.30, 0.55, 0.80, 0.90, 1.00, 1.0), + rtol=1e-5, ) - rv.uni_sample = samples + + +def test_MultilinearCDFRandomVariable_inverse_transform(): + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.20, 0.30, 0.80, 1.00) + values = np.column_stack((x_values, y_values)) + rv = uq.MultilinearCDFRandomVariable('test_rv', theta=values) + + rv.uni_sample = np.array((0.00, 0.1, 0.2, 0.5, 0.8, 0.9, 1.00)) rv.inverse_transform_sampling() inverse_transform = rv.sample assert np.allclose( - inverse_transform, np.array((0.84155378, 0.88203946, 0.92176503)), rtol=1e-5 + inverse_transform, + np.array((0.00, 0.50, 1.00, 2.40, 3.00, 3.50, 4.00)), + rtol=1e-5, ) - # - # empirical - # - rv = uq.RandomVariable( - 'test_rv', 'empirical', raw_samples=(1.00, 2.00, 3.00, 4.00) - ) +def test_EmpiricalRandomVariable_inverse_transform(): + samples = np.array((0.10, 0.20, 0.30)) + + rv = uq.EmpiricalRandomVariable('test_rv', raw_samples=(1.00, 2.00, 3.00, 4.00)) samples = np.array((0.10, 0.50, 0.90)) rv.uni_sample = samples - rv.inverse_transform_sampling(len(samples)) + rv.inverse_transform_sampling() inverse_transform = rv.sample assert np.allclose(inverse_transform, np.array((1.00, 3.00, 4.00)), rtol=1e-5) - rv = uq.RandomVariable( + rv = uq.CoupledEmpiricalRandomVariable( 'test_rv', - 'coupled_empirical', raw_samples=np.array((1.00, 2.00, 3.00, 4.00)), ) - rv.inverse_transform_sampling(6) + rv.inverse_transform_sampling(sample_size=6) inverse_transform = rv.sample assert np.allclose( inverse_transform, np.array((1.00, 2.00, 3.00, 4.00, 1.00, 2.00)), rtol=1e-5 ) - # multilinear CDF - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.20, 0.30, 0.80, 1.00) - values = np.column_stack((x_values, y_values)) - rv = uq.RandomVariable('test_rv', 'multilinear_CDF', theta=values) - - rv.uni_sample = np.array( - (0.00, 0.1, 0.2, 0.5, 0.8, 0.9, 1.00) - ) - rv.inverse_transform_sampling() +def test_DeterministicRandomVariable_inverse_transform(): + rv = uq.DeterministicRandomVariable('test_rv', theta=np.array((0.00,))) + rv.inverse_transform_sampling(4) inverse_transform = rv.sample assert np.allclose( - inverse_transform, - np.array((0.00, 0.50, 1.00, 2.40, 3.00, 3.50, 4.00)), - rtol=1e-5, - ) - - # - # edge cases - # - - # normal with problematic truncation limits - rv = uq.RandomVariable( - 'test_rv', 'normal', theta=(1.0, 0.5), truncation_limits=(1e8, 2e8) - ) - rv.uni_sample = samples - with pytest.raises(ValueError): - rv.inverse_transform_sampling() - - # lognormal without values to sample from - rv = uq.RandomVariable('test_rv', 'lognormal', theta=(1.0, 0.5)) - with pytest.raises(ValueError): - rv.inverse_transform_sampling() - - # uniform without values to sample from - rv = uq.RandomVariable('test_rv', 'uniform', theta=(0.0, 1.0)) - with pytest.raises(ValueError): - rv.inverse_transform_sampling() - - # empirical, coupled_empirical without values to sample from - for distr in ('empirical', 'coupled_empirical'): - rv = uq.RandomVariable('test_rv', distr) - with pytest.raises(ValueError): - rv.inverse_transform_sampling() - - # deterministic - rv = uq.RandomVariable('test_rv', 'deterministic', theta=np.array((0.00, 1.00))) - with pytest.raises(ValueError): - rv.inverse_transform_sampling() - - # multinomial - rv = uq.RandomVariable( - 'test_rv', 'multinomial', theta=np.array((0.20, 0.30, 0.50)) + inverse_transform, np.array((0.00, 0.00, 0.00, 0.00)), rtol=1e-5 ) - with pytest.raises(ValueError): - rv.inverse_transform_sampling() def test_RandomVariable_Set(): # a set of two random variables - rv_1 = uq.RandomVariable('rv1', 'normal', theta=(1.0, 1.0)) - rv_2 = uq.RandomVariable('rv2', 'normal', theta=(1.0, 1.0)) + rv_1 = uq.NormalRandomVariable('rv1', theta=(1.0, 1.0)) + rv_2 = uq.NormalRandomVariable('rv2', theta=(1.0, 1.0)) rv_set = uq.RandomVariableSet( # noqa: F841 'test_set', (rv_1, rv_2), np.array(((1.0, 0.50), (0.50, 1.0))) ) @@ -1275,7 +1176,7 @@ def test_RandomVariable_Set(): assert rv_set.size == 2 # a set with only one random variable - rv_1 = uq.RandomVariable('rv1', 'normal', theta=(1.0, 1.0)) + rv_1 = uq.NormalRandomVariable('rv1', theta=(1.0, 1.0)) rv_set = uq.RandomVariableSet( # noqa: F841 'test_set', (rv_1,), np.array(((1.0, 0.50),)) ) @@ -1287,8 +1188,8 @@ def test_RandomVariable_Set_apply_correlation(reset=False): # correlated, uniform np.random.seed(40) - rv_1 = uq.RandomVariable(name='rv1', distribution='uniform', theta=(-5.0, 5.0)) - rv_2 = uq.RandomVariable(name='rv2', distribution='uniform', theta=(-5.0, 5.0)) + rv_1 = uq.UniformRandomVariable(name='rv1', theta=(-5.0, 5.0)) + rv_2 = uq.UniformRandomVariable(name='rv2', theta=(-5.0, 5.0)) rv_1.uni_sample = np.random.random(size=100) rv_2.uni_sample = np.random.random(size=100) @@ -1309,8 +1210,8 @@ def test_RandomVariable_Set_apply_correlation(reset=False): # we also test .sample here - rv_1.inverse_transform_sampling(10) - rv_2.inverse_transform_sampling(10) + rv_1.inverse_transform_sampling() + rv_2.inverse_transform_sampling() rvset_sample = rvs.sample assert set(rvset_sample.keys()) == set(('rv1', 'rv2')) vals = list(rvset_sample.values()) @@ -1334,8 +1235,8 @@ def test_RandomVariable_Set_apply_correlation_special(): # non positive semidefinite correlation matrix rho = np.array(((1.00, 0.50), (0.50, -1.00))) - rv_1 = uq.RandomVariable('rv1', 'normal', theta=[5.0, 0.1]) - rv_2 = uq.RandomVariable('rv2', 'normal', theta=[5.0, 0.1]) + rv_1 = uq.NormalRandomVariable('rv1', theta=[5.0, 0.1]) + rv_2 = uq.NormalRandomVariable('rv2', theta=[5.0, 0.1]) rv_1.uni_sample = np.random.random(size=100) rv_2.uni_sample = np.random.random(size=100) rv_set = uq.RandomVariableSet('rv_set', [rv_1, rv_2], rho) @@ -1343,8 +1244,8 @@ def test_RandomVariable_Set_apply_correlation_special(): # non full rank matrix rho = np.array(((0.00, 0.00), (0.0, 0.0))) - rv_1 = uq.RandomVariable('rv1', 'normal', theta=[5.0, 0.1]) - rv_2 = uq.RandomVariable('rv2', 'normal', theta=[5.0, 0.1]) + rv_1 = uq.NormalRandomVariable('rv1', theta=[5.0, 0.1]) + rv_2 = uq.NormalRandomVariable('rv2', theta=[5.0, 0.1]) rv_1.uni_sample = np.random.random(size=100) rv_2.uni_sample = np.random.random(size=100) rv_set = uq.RandomVariableSet('rv_set', [rv_1, rv_2], rho) @@ -1358,13 +1259,13 @@ def test_RandomVariable_Set_orthotope_density(reset=False): data_dir = 'pelicun/tests/data/uq/test_random_variable_set_orthotope_density' # create some random variables - rv_1 = uq.RandomVariable( - 'rv1', 'normal', theta=[5.0, 0.1], truncation_limits=np.array((np.nan, 10.0)) + rv_1 = uq.NormalRandomVariable( + 'rv1', theta=[5.0, 0.1], truncation_limits=np.array((np.nan, 10.0)) ) - rv_2 = uq.RandomVariable('rv2', 'lognormal', theta=[10.0, 0.2]) - rv_3 = uq.RandomVariable('rv3', 'uniform', theta=[13.0, 17.0]) - rv_4 = uq.RandomVariable('rv4', 'uniform', theta=[0.0, 1.0]) - rv_5 = uq.RandomVariable('rv5', 'uniform', theta=[0.0, 1.0]) + rv_2 = uq.LogNormalRandomVariable('rv2', theta=[10.0, 0.2]) + rv_3 = uq.UniformRandomVariable('rv3', theta=[13.0, 17.0]) + rv_4 = uq.UniformRandomVariable('rv4', theta=[0.0, 1.0]) + rv_5 = uq.UniformRandomVariable('rv5', theta=[0.0, 1.0]) # create a random variable set rv_set = uq.RandomVariableSet( @@ -1425,7 +1326,7 @@ def test_RandomVariableRegistry_generate_sample(reset=False): rng = np.random.default_rng(0) rv_registry_single = uq.RandomVariableRegistry(rng) # create the random variable and add it to the registry - RV = uq.RandomVariable('x', distribution='normal', theta=[1.0, 1.0]) + RV = uq.NormalRandomVariable('x', theta=[1.0, 1.0]) rv_registry_single.add_RV(RV) # Generate a sample @@ -1454,9 +1355,9 @@ def test_RandomVariableRegistry_generate_sample(reset=False): # create a random variable registry and add some random variables to it rng = np.random.default_rng(4) rv_registry = uq.RandomVariableRegistry(rng) - rv_1 = uq.RandomVariable('rv1', 'normal', theta=[5.0, 0.1]) - rv_2 = uq.RandomVariable('rv2', 'lognormal', theta=[10.0, 0.2]) - rv_3 = uq.RandomVariable('rv3', 'uniform', theta=[13.0, 17.0]) + rv_1 = uq.NormalRandomVariable('rv1', theta=[5.0, 0.1]) + rv_2 = uq.LogNormalRandomVariable('rv2', theta=[10.0, 0.2]) + rv_3 = uq.UniformRandomVariable('rv3', theta=[13.0, 17.0]) rv_registry.add_RV(rv_1) rv_registry.add_RV(rv_2) rv_registry.add_RV(rv_3) @@ -1470,8 +1371,8 @@ def test_RandomVariableRegistry_generate_sample(reset=False): rv_registry.add_RV_set(rv_set) # add some more random variables that are not part of the set - rv_4 = uq.RandomVariable('rv4', 'normal', theta=[14.0, 0.30]) - rv_5 = uq.RandomVariable('rv5', 'normal', theta=[15.0, 0.50]) + rv_4 = uq.NormalRandomVariable('rv4', theta=[14.0, 0.30]) + rv_5 = uq.NormalRandomVariable('rv5', theta=[15.0, 0.50]) rv_registry.add_RV(rv_4) rv_registry.add_RV(rv_5) @@ -1494,5 +1395,13 @@ def test_RandomVariableRegistry_generate_sample(reset=False): assert 'rv3' not in rv_dictionary +def test_rv_class_map(): + rv_class = uq.rv_class_map('normal') + assert rv_class.__name__ == 'NormalRandomVariable' + + with pytest.raises(ValueError): + uq.rv_class_map('') + + if __name__ == '__main__': pass diff --git a/pelicun/uq.py b/pelicun/uq.py index 55cc4de73..fefdaf283 100644 --- a/pelicun/uq.py +++ b/pelicun/uq.py @@ -58,6 +58,7 @@ """ +from abc import ABC, abstractmethod from scipy.stats import uniform, norm from scipy.stats import multivariate_normal as mvn from scipy.stats._mvn import mvndst # pylint: disable=no-name-in-module @@ -247,9 +248,7 @@ def _get_theta(params, inits, dist_list): theta = np.zeros(inits.shape) for i, (params_i, inits_i, dist_i) in enumerate(zip(params, inits, dist_list)): - if dist_i in {'normal', 'lognormal'}: - # Note that the standard deviation is fit in log space, hence the # unusual-looking transformation here sig = np.exp(np.log(inits_i[1]) + params_i[1]) @@ -291,7 +290,6 @@ def _get_limit_probs(limits, distribution, theta): """ if distribution in {'normal', 'normal-stdev', 'lognormal'}: - a, b = limits mu = theta[0] sig = theta[1] @@ -344,18 +342,20 @@ def _get_std_samples(samples, theta, tr_limits, dist_list): std_samples = np.zeros(samples.shape) for i, (samples_i, theta_i, tr_lim_i, dist_i) in enumerate( - zip(samples, theta, tr_limits, dist_list)): - + zip(samples, theta, tr_limits, dist_list) + ): if dist_i in {'normal', 'normal-stdev', 'lognormal'}: - lim_low = tr_lim_i[0] lim_high = tr_lim_i[1] - if (True in (samples_i > lim_high).tolist() - or True in (samples_i < lim_low).tolist()): + if ( + True in (samples_i > lim_high).tolist() + or True in (samples_i < lim_low).tolist() + ): raise ValueError( 'One or more sample values lie outside ' - 'of the specified truncation limits.') + 'of the specified truncation limits.' + ) # first transform from normal to uniform uni_samples = norm.cdf(samples_i, loc=theta_i[0], scale=theta_i[1]) @@ -369,7 +369,7 @@ def _get_std_samples(samples, theta, tr_limits, dist_list): uni_samples = (uni_samples - p_a) / (p_b - p_a) # then transform from uniform to standard normal - std_samples[i] = norm.ppf(uni_samples, loc=0., scale=1.) + std_samples[i] = norm.ppf(uni_samples, loc=0.0, scale=1.0) else: raise ValueError(f'Unsupported distribution: {dist_i}') @@ -400,10 +400,8 @@ def _get_std_corr_matrix(std_samples): Correlation matrix. """ - if (True in np.isinf(std_samples) - or True in np.isnan(std_samples)): - raise ValueError( - 'std_samples array must not contain inf or NaN values') + if True in np.isinf(std_samples) or True in np.isnan(std_samples): + raise ValueError('std_samples array must not contain inf or NaN values') n_dims, n_samples = std_samples.shape @@ -415,23 +413,22 @@ def _get_std_corr_matrix(std_samples): for dim_i in range(n_dims): for dim_j in np.arange(dim_i + 1, n_dims): rho_hat[dim_i, dim_j] = ( - np.sum(std_samples[dim_i] * std_samples[dim_j]) / n_samples) + np.sum(std_samples[dim_i] * std_samples[dim_j]) / n_samples + ) rho_hat[dim_j, dim_i] = rho_hat[dim_i, dim_j] # make sure rho_hat is positive semidefinite try: - cholesky(rho_hat, lower=True) # if this works, we're good # otherwise, we can try to fix the matrix using SVD except np.linalg.LinAlgError: - try: - - U, s, _ = svd(rho_hat, ) + U, s, _ = svd( + rho_hat, + ) except np.linalg.LinAlgError: - # if this also fails, we give up return None @@ -441,7 +438,7 @@ def _get_std_corr_matrix(std_samples): np.fill_diagonal(rho_hat, 1.0) # check if we introduced any unreasonable values - if ((np.max(rho_hat) > 1.01) or (np.min(rho_hat) < -1.01)): + if (np.max(rho_hat) > 1.01) or (np.min(rho_hat) < -1.01): return None # round values to 1.0 and -1.0, if needed @@ -473,9 +470,18 @@ def _mvn_scale(x, rho): return b / a -def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, - dist_list, tr_limits, det_limits, censored_count, - enforce_bounds=False): +def _neg_log_likelihood( + params, + inits, + bnd_lower, + bnd_upper, + samples, + dist_list, + tr_limits, + det_limits, + censored_count, + enforce_bounds=False, +): """ Calculate the negative log likelihood of the given data samples given the parameter values and distribution information. @@ -540,8 +546,8 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, # calculate the marginal likelihoods for i, (theta_i, samples_i, tr_lim_i, dist_i) in enumerate( - zip(theta, samples, tr_limits, dist_list)): - + zip(theta, samples, tr_limits, dist_list) + ): # consider truncation if needed p_a, p_b = _get_limit_probs(tr_lim_i, dist_i, theta_i) # this is the probability mass within the @@ -552,8 +558,9 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, # Note that we are performing this without any transformation to be able # to respect truncation limits if dist_i in {'normal', 'lognormal'}: - likelihoods[i] = norm.pdf( - samples_i, loc=theta_i[0], scale=theta_i[1]) / tr_alpha + likelihoods[i] = ( + norm.pdf(samples_i, loc=theta_i[0], scale=theta_i[1]) / tr_alpha + ) # transform every sample into standard normal space std_samples = _get_std_samples(samples, theta, tr_limits, dist_list) @@ -568,13 +575,12 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, # likelihoods related to censoring need to be handled together if censored_count > 0: - det_lower = np.zeros(n_dims) det_upper = np.zeros(n_dims) for i, (theta_i, tr_lim_i, det_lim_i, dist_i) in enumerate( - zip(theta, tr_limits, det_limits, dist_list)): - + zip(theta, tr_limits, det_limits, dist_list) + ): # prepare the standardized truncation and detection limits p_a, p_b = _get_limit_probs(tr_lim_i, dist_i, theta_i) p_l, p_u = _get_limit_probs(det_lim_i, dist_i, theta_i) @@ -584,15 +590,16 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, p_l, p_u = [(lim - p_a) / (p_b - p_a) for lim in (p_l, p_u)] # transform limits to standard normal space - det_lower[i], det_upper[i] = norm.ppf([p_l, p_u], loc=0., scale=1.) + det_lower[i], det_upper[i] = norm.ppf([p_l, p_u], loc=0.0, scale=1.0) # get the likelihood of getting a non-censored sample given the # detection limits and the correlation matrix det_alpha, eps_alpha = mvn_orthotope_density( - np.zeros(n_dims), rho_hat, det_lower, det_upper) + np.zeros(n_dims), rho_hat, det_lower, det_upper + ) # Make sure det_alpha is estimated with sufficient accuracy - if det_alpha <= 100. * eps_alpha: + if det_alpha <= 100.0 * eps_alpha: return 1e10 # make sure that the likelihood of censoring a sample is positive @@ -618,8 +625,10 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, likelihoods = np.clip(likelihoods, a_min=np.nextafter(0, 1), a_max=None) # calculate the total negative log likelihood - NLL = -(np.sum(np.log(likelihoods)) # from samples - + censored_count * np.log(cen_likelihood)) # censoring influence + NLL = -( + np.sum(np.log(likelihoods)) # from samples + + censored_count * np.log(cen_likelihood) + ) # censoring influence # normalize the NLL with the sample count NLL = NLL / samples.size @@ -629,11 +638,15 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, return NLL -def fit_distribution_to_sample(raw_samples, distribution, - truncation_limits=(np.nan, np.nan), - censored_count=0, detection_limits=(np.nan, np.nan), - multi_fit=False, - logger_object=None): +def fit_distribution_to_sample( + raw_samples, + distribution, + truncation_limits=(np.nan, np.nan), + censored_count=0, + detection_limits=(np.nan, np.nan), + multi_fit=False, + logger_object=None, +): """ Fit a distribution to sample using maximum likelihood estimation. @@ -720,9 +733,7 @@ def fit_distribution_to_sample(raw_samples, distribution, # Convert samples and limits to log space if the distribution is lognormal for d_i, distr in enumerate(dist_list): - if distr == 'lognormal': - samples[d_i] = np.log(samples[d_i]) for lim in range(2): @@ -739,7 +750,6 @@ def fit_distribution_to_sample(raw_samples, distribution, sig_init = np.ones_like(mu_init) * np.nan for d_i, distr in enumerate(dist_list): - if distr in {'normal', 'normal-stdev', 'lognormal'}: # use the first two moments mu_init[d_i] = np.mean(samples[d_i]) @@ -751,8 +761,7 @@ def fit_distribution_to_sample(raw_samples, distribution, # replace zero standard dev with negligible standard dev sig_zero_id = np.where(sig_init == 0.0)[0] - sig_init[sig_zero_id] = (1e-6 * np.abs(mu_init[sig_zero_id]) - + np.nextafter(0, 1)) + sig_init[sig_zero_id] = 1e-6 * np.abs(mu_init[sig_zero_id]) + np.nextafter(0, 1) # prepare a vector of initial values # Note: The actual optimization uses zeros as initial parameters to @@ -773,10 +782,11 @@ def fit_distribution_to_sample(raw_samples, distribution, # There is nothing to gain from a time-consuming optimization if.. # the number of samples is too small - if ((n_samples < 3) or ( - # there are no truncation or detection limits involved - np.all(np.isnan(tr_limits)) and np.all(np.isnan(det_limits)))): - + if (n_samples < 3) or ( + # there are no truncation or detection limits involved + np.all(np.isnan(tr_limits)) + and np.all(np.isnan(det_limits)) + ): # In this case, it is typically hard to improve on the method of # moments estimates for the parameters of the marginal distributions theta = inits @@ -784,64 +794,87 @@ def fit_distribution_to_sample(raw_samples, distribution, # Otherwise, we run the optimization that aims to find the parameters that # maximize the likelihood of observing the samples else: - # First, optimize for each marginal independently for dim in range(n_dims): - - inits_i = inits[dim:dim + 1] + inits_i = inits[dim : dim + 1] # Censored samples are only considered in the following step, but # we fit a truncated distribution if there are censored samples to # make it easier to fit the censored distribution later. tr_limits_i = [np.nan, np.nan] for lim in range(2): - if ((np.isnan(tr_limits[dim][lim])) and ( - not np.isnan(det_limits[dim][lim]))): + if (np.isnan(tr_limits[dim][lim])) and ( + not np.isnan(det_limits[dim][lim]) + ): tr_limits_i[lim] = det_limits[dim][lim] elif not np.isnan(det_limits[dim][lim]): if lim == 0: - tr_limits_i[lim] = np.min([tr_limits[dim][lim], - det_limits[dim][lim]]) + tr_limits_i[lim] = np.min( + [tr_limits[dim][lim], det_limits[dim][lim]] + ) elif lim == 1: - tr_limits_i[lim] = np.max([tr_limits[dim][lim], - det_limits[dim][lim]]) + tr_limits_i[lim] = np.max( + [tr_limits[dim][lim], det_limits[dim][lim]] + ) else: tr_limits_i[lim] = tr_limits[dim][lim] - out_m_i = minimize(_neg_log_likelihood, - np.zeros(inits[dim].size), - args=(inits_i, - bnd_lower[dim], - bnd_upper[dim], - samples[dim:dim + 1], - [dist_list[dim], ], - [tr_limits_i, ], - [np.nan, np.nan], - 0, True,), - method='BFGS', - options={'maxiter': 50} - ) + out_m_i = minimize( + _neg_log_likelihood, + np.zeros(inits[dim].size), + args=( + inits_i, + bnd_lower[dim], + bnd_upper[dim], + samples[dim : dim + 1], + [ + dist_list[dim], + ], + [ + tr_limits_i, + ], + [np.nan, np.nan], + 0, + True, + ), + method='BFGS', + options={'maxiter': 50}, + ) out = out_m_i.x.reshape(inits_i.shape) - theta = _get_theta(out, inits_i, [dist_list[dim], ]) + theta = _get_theta( + out, + inits_i, + [ + dist_list[dim], + ], + ) inits[dim] = theta[0] # Second, if multi_fit is requested or there are censored samples, # we attempt the multivariate fitting using the marginal results as # initial parameters. if multi_fit or (censored_count > 0): - bnd_lower = bnd_lower.flatten() bnd_upper = bnd_upper.flatten() - out_m = minimize(_neg_log_likelihood, - np.zeros(inits.size), - args=(inits, bnd_lower, bnd_upper, samples, - dist_list, tr_limits, det_limits, - censored_count, True,), - method='BFGS', - options={'maxiter': 50} - ) + out_m = minimize( + _neg_log_likelihood, + np.zeros(inits.size), + args=( + inits, + bnd_lower, + bnd_upper, + samples, + dist_list, + tr_limits, + det_limits, + censored_count, + True, + ), + method='BFGS', + options={'maxiter': 50}, + ) out = out_m.x.reshape(inits.shape) theta = _get_theta(out, inits, dist_list) @@ -852,8 +885,7 @@ def fit_distribution_to_sample(raw_samples, distribution, # Calculate rho in the standard normal space because we will generate new # samples using that type of correlation (i.e., Gaussian copula) std_samples = _get_std_samples(samples, theta, tr_limits, dist_list) - if True in np.isnan(std_samples) or \ - True in np.isinf(std_samples): + if True in np.isnan(std_samples) or True in np.isinf(std_samples): raise ValueError( 'Something went wrong.' '\n' @@ -872,11 +904,14 @@ def fit_distribution_to_sample(raw_samples, distribution, logger_object.msg( "\nWARNING: Demand sample size too small to reliably estimate " "the correlation matrix. Assuming uncorrelated demands.", - prepend_timestamp=False, prepend_blank_space=False) + prepend_timestamp=False, + prepend_blank_space=False, + ) else: print( "\nWARNING: Demand sample size too small to reliably estimate " - "the correlation matrix. Assuming uncorrelated demands.") + "the correlation matrix. Assuming uncorrelated demands." + ) for d_i, distr in enumerate(dist_list): # Convert mean back to linear space if the distribution is lognormal @@ -925,7 +960,6 @@ def _OLS_percentiles(params, values, perc, family): """ if family == 'normal': - theta_0 = params[0] theta_1 = params[1] @@ -935,7 +969,6 @@ def _OLS_percentiles(params, values, perc, family): val_hat = norm.ppf(perc, loc=theta_0, scale=theta_1) elif family == 'lognormal': - theta_0 = params[0] theta_1 = params[1] @@ -984,43 +1017,48 @@ def fit_distribution_to_percentiles(values, percentiles, families): extreme_id = np.argmax(percentiles - 0.5) for family in families: - - inits = [values[median_id], ] + inits = [ + values[median_id], + ] if family == 'normal': inits.append( - (np.abs(values[extreme_id] - inits[0]) - / np.abs(norm.ppf(percentiles[extreme_id], - loc=0, scale=1)))) + ( + np.abs(values[extreme_id] - inits[0]) + / np.abs(norm.ppf(percentiles[extreme_id], loc=0, scale=1)) + ) + ) elif family == 'lognormal': inits.append( - (np.abs(np.log(values[extreme_id] / inits[0])) - / np.abs(norm.ppf(percentiles[extreme_id], - loc=0, scale=1)))) + ( + np.abs(np.log(values[extreme_id] / inits[0])) + / np.abs(norm.ppf(percentiles[extreme_id], loc=0, scale=1)) + ) + ) - out_list.append(minimize(_OLS_percentiles, inits, - args=(values, percentiles, family), - method='BFGS')) + out_list.append( + minimize( + _OLS_percentiles, + inits, + args=(values, percentiles, family), + method='BFGS', + ) + ) best_out_id = np.argmin([out.fun for out in out_list]) return families[best_out_id], out_list[best_out_id].x -class RandomVariable: +class BaseRandomVariable(ABC): """ - Description + Base abstract class for different types of random variables. Parameters ---------- name: string A unique string that identifies the random variable. - distribution: {'normal', 'lognormal', 'multinomial', 'custom', - 'empirical', 'coupled_empirical', 'uniform', 'deterministic', - 'multilinear_CDF'}, optional - Defines the type of probability distribution for the random - variable. theta: float scalar or ndarray, optional Set of parameters that define the Cumulative Distribution Function (CDF) of the variable given its distribution @@ -1029,26 +1067,11 @@ class RandomVariable: normal - mean, standard deviation; lognormal - median, log standard deviation; uniform - a, b, the lower and upper bounds of the distribution; - multinomial - likelihood of each unique event (the last event's - likelihood is adjusted automatically to ensure the likelihoods sum up - to one); + multinomial - ; custom - according to the custom expression provided; empirical and coupled_empirical - N/A; deterministic - the deterministic value assigned to the variable. - multilinear_CDF - a Nx2 numpy array defining the - vertices of a multilinear CDF curve in the form ((X_0, 0.00), - (X_1, Y_1), ..., (X_n, 1.00)). The first Y value has to be - 0.00 and the last 1.00 for a valid CDF, and the X_i's as well - as the Y_i's should be in increasing order, otherwise an error - is raised. - truncation_limits: float ndarray, optional - Defines the np.array((a, b)) truncation limits for the - distribution. Use np.nan to assign no limit in one direction, - like so: np.array((a, np.nan)), or np.array((np.nan, b)). - custom_expr: string, optional - Provide an expression that is a Python syntax for a custom CDF. The - controlling variable shall be "x" and the parameters shall be "p1", - "p2", etc. + multilinear_CDF - f_map: function, optional A user-defined function that is applied on the realizations before returning a sample. @@ -1057,16 +1080,12 @@ class RandomVariable: variable will be perfectly correlated with its anchor. Note that the attributes of this variable and its anchor do not have to be identical. + """ def __init__( self, name, - distribution, - theta=np.nan, - truncation_limits=np.nan, - custom_expr=None, - raw_samples=None, f_map=None, anchor=None, ): @@ -1086,464 +1105,852 @@ def __init__( """ self.name = name - - if pd.isna(distribution): - distribution = 'deterministic' - - if ( - distribution not in ['empirical', 'coupled_empirical'] - ) and (np.all(np.isnan(theta))): - - raise ValueError( - f"A random variable that follows a {distribution} distribution " - f"is characterized by a set of parameters (theta). The " - f"parameters need to be provided when the RV is created." - ) - - if distribution == 'multinomial': - if np.sum(theta) > 1: - raise ValueError( - f"The set of p values provided for a multinomial " - f"distribution shall sum up to less than or equal to 1.0. " - f"The provided values sum up to {np.sum(theta)}. p = " - f"{theta} ." - ) - - if distribution == 'multilinear_CDF': - y_1 = theta[0, 1] - if y_1 != 0.00: - raise ValueError( - "For multilinear CDF random variables, " - "y_1 should be set to 0.00" - ) - y_n = theta[-1, 1] - if y_n != 1.00: - raise ValueError( - "For multilinear CDF random variables, " - "y_n should be set to 1.00" - ) - - x_s = theta[:, 0] - if not np.array_equal(np.sort(x_s), x_s): - raise ValueError( - "For multilinear CDF random variables, " - "Xs should be specified in ascending order" - ) - if np.any(np.isclose(np.diff(x_s), 0.00)): - raise ValueError( - "For multilinear CDF random variables, " - "Xs should be specified in strictly ascending order" - ) - - y_s = theta[:, 1] - if not np.array_equal(np.sort(y_s), y_s): - raise ValueError( - "For multilinear CDF random variables, " - "Ys should be specified in ascending order" - ) - - if np.any(np.isclose(np.diff(y_s), 0.00)): - raise ValueError( - "For multilinear CDF random variables, " - "Ys should be specified in strictly ascending order" - ) - if np.any(~np.isnan(truncation_limits)): - raise ValueError( - "Truncation limits not supported " - "for multilinear CDF random variables." - ) - - # save the other parameters internally - self._distribution = distribution - self._theta = np.atleast_1d(theta) - self._truncation_limits = truncation_limits - self._custom_expr = custom_expr - self._f_map = f_map - self._raw_samples = np.atleast_1d(raw_samples) + self.distribution = None + # self.theta = np.atleast_1d(theta) + # self.truncation_limits = truncation_limits + # self._raw_samples = np.atleast_1d(raw_samples) + self.f_map = f_map self._uni_samples = None - self._RV_set = None + self.RV_set = None self._sample_DF = None - + self._sample = None if anchor is None: - self._anchor = self + self.anchor = self else: - self._anchor = anchor + self.anchor = anchor @property - def distribution(self): + def sample(self): """ - Return the assigned probability distribution type. + Return the empirical or generated sample. """ - return self._distribution + if self.f_map is not None: + return self.f_map(self._sample) + return self._sample - @property - def theta(self): + @sample.setter + def sample(self, value): """ - Return the assigned probability distribution parameters. + Assign a sample to the random variable """ - return self._theta + self._sample = value + self._sample_DF = pd.Series(value) - @theta.setter - def theta(self, value): + @property + def sample_DF(self): """ - Define the parameters of the distribution of the random variable + Return the empirical or generated sample in a pandas Series. """ - self._theta = value + if self.f_map is not None: + return self._sample_DF.apply(self.f_map) + + return self._sample_DF @property - def truncation_limits(self): + def uni_sample(self): """ - Return the assigned truncation limits. + Return the sample from the controlling uniform distribution. """ - return self._truncation_limits + return self.anchor._uni_samples - @property - def custom_expr(self): + @uni_sample.setter + def uni_sample(self, value): """ - Return the assigned custom expression for CDF. + Assign the controlling sample to the random variable + + Parameters + ---------- + value: float ndarray + An array of floating point values in the [0, 1] domain. """ - return self._custom_expr + self._uni_samples = value - @property - def RV_set(self): + +class CommonRandomVariable(BaseRandomVariable): + """ + Random variable that needs `values` in `inverse_transform` + """ + + @abstractmethod + def inverse_transform(self, values): """ - Return the RV_set this RV is a member of + Uses inverse probability integral transformation on the + provided values. + """ - return self._RV_set - @RV_set.setter - def RV_set(self, value): + def inverse_transform_sampling(self): """ - Assign an RV_set to this RV + Creates a sample using inverse probability integral + transformation. """ - self._RV_set = value + if self.uni_sample is None: + raise ValueError('No available uniform sample.') + self.sample = self.inverse_transform(self.uni_sample) - @property - def sample(self): + +class SampleSizeRandomVariable(BaseRandomVariable): + """ + Random variable that needs `sample_size` in `inverse_transform` + """ + + @abstractmethod + def inverse_transform(self, sample_size): """ - Return the empirical or generated sample. + Uses inverse probability integral transformation on the + provided values. + """ - if self._f_map is not None: - return self._f_map(self._sample) + def inverse_transform_sampling(self, sample_size): + """ + Creates a sample using inverse probability integral + transformation. + """ + self.sample = self.inverse_transform(sample_size) - # else: - return self._sample - @property - def sample_DF(self): +class NormalRandomVariable(CommonRandomVariable): + """ + Normal random variable. + + """ + + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): """ - Return the empirical or generated sample in a pandas Series. + Instantiates a normal random variable. + + Parameters + ---------- + theta: 2-element float ndarray + Set of parameters that define the Cumulative Distribution + Function (CDF) of the variable: Mean, coefficient of + variation. + truncation_limits: float ndarray, optional + Defines the np.array((a, b)) truncation limits for the + distribution. Use np.nan to assign no limit in one direction, + like so: np.array((a, np.nan)), or np.array((np.nan, b)). + + """ + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'normal' + self.theta = np.atleast_1d(theta) + self.truncation_limits = truncation_limits + + def cdf(self, values): """ - if self._f_map is not None: + Returns the Cumulative Density Function (CDF) at the specified + values. - return self._sample_DF.apply(self._f_map) + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the CDF - # else: - return self._sample_DF + Returns + ------- + + 1D float ndarray + CDF values - @sample.setter - def sample(self, value): - """ - Assign a sample to the random variable """ - self._sample = value - self._sample_DF = pd.Series(value) + mu, cov = self.theta[:2] + sig = np.abs(mu) * cov - @property - def uni_sample(self): + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits + + if np.isnan(a): + a = -np.inf + if np.isnan(b): + b = np.inf + + p_a, p_b = [norm.cdf((lim - mu) / sig) for lim in (a, b)] + + # cap the values at the truncation limits + values = np.minimum(np.maximum(values, a), b) + + # get the cdf from a non-truncated normal + p_vals = norm.cdf(values, loc=mu, scale=sig) + + # adjust for truncation + result = (p_vals - p_a) / (p_b - p_a) + + else: + result = norm.cdf(values, loc=mu, scale=sig) + + return result + + def inverse_transform(self, values): """ - Return the sample from the controlling uniform distribution. + Evaluates the inverse of the Cumulative Density Function (CDF) + for the given values. Used to generate random variable + realizations. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the inverse CDF + + Returns + ------- + 1D float ndarray + Inverse CDF values + + Raises + ------ + ValueError + If the probability massss within the truncation limits is + too small + """ - return self._anchor._uni_samples - @uni_sample.setter - def uni_sample(self, value): + mu, cov = self.theta[:2] + sig = np.abs(mu) * cov + + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits + + if np.isnan(a): + a = -np.inf + if np.isnan(b): + b = np.inf + + p_a, p_b = [norm.cdf((lim - mu) / sig) for lim in (a, b)] + + if p_b - p_a == 0: + raise ValueError( + "The probability mass within the truncation limits is " + "too small and the truncated distribution cannot be " + "sampled with sufficiently high accuracy. This is most " + "probably due to incorrect truncation limits set for " + "the distribution." + ) + + result = norm.ppf(values * (p_b - p_a) + p_a, loc=mu, scale=sig) + + else: + result = norm.ppf(values, loc=mu, scale=sig) + + return result + + +class LogNormalRandomVariable(CommonRandomVariable): + """ + Lognormal random variable. + + """ + + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): """ - Assign the controlling sample to the random variable + Instantiates a lognormal random variable. Parameters ---------- - value: float ndarray - An array of floating point values in the [0, 1] domain. + theta: 2-element float ndarray + Set of parameters that define the Cumulative Distribution + Function (CDF) of the variable: Median, dispersion. + truncation_limits: float ndarray, optional + Defines the np.array((a, b)) truncation limits for the + distribution. Use np.nan to assign no limit in one direction, + like so: np.array((a, np.nan)), or np.array((np.nan, b)). + + """ + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'lognormal' + self.theta = np.atleast_1d(theta) + self.truncation_limits = truncation_limits + + def cdf(self, values): """ - self._uni_samples = value + Returns the Cumulative Density Function (CDF) at the specified + values. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the CDF + + Returns + ------- + + 1D float ndarray + CDF values - @property - def anchor(self): """ - Return the anchor of the variable (if any). + theta, beta = self.theta[:2] + + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits + + if np.isnan(a): + a = np.nextafter(0, 1) + if np.isnan(b): + b = np.inf + + p_a, p_b = [ + norm.cdf((np.log(lim) - np.log(theta)) / beta) for lim in (a, b) + ] + + # cap the values at the truncation limits + values = np.minimum(np.maximum(values, a), b) + + # get the cdf from a non-truncated lognormal + p_vals = norm.cdf(np.log(values), loc=np.log(theta), scale=beta) + + # adjust for truncation + result = (p_vals - p_a) / (p_b - p_a) + + else: + values = np.maximum(values, np.nextafter(0, 1)) + + result = norm.cdf(np.log(values), loc=np.log(theta), scale=beta) + + return result + + def inverse_transform(self, values): """ - return self._anchor + Evaluates the inverse of the Cumulative Density Function (CDF) + for the given values. Used to generate random variable + realizations. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the inverse CDF + + Returns + ------- + 1D float ndarray + Inverse CDF values - @anchor.setter - def anchor(self, value): """ - Assign an anchor to the random variable + + theta, beta = self.theta[:2] + + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits + + if np.isnan(a): + a = np.nextafter(0, 1) + else: + a = np.maximum(np.nextafter(0, 1), a) + + if np.isnan(b): + b = np.inf + + p_a, p_b = [ + norm.cdf((np.log(lim) - np.log(theta)) / beta) for lim in (a, b) + ] + + result = np.exp( + norm.ppf(values * (p_b - p_a) + p_a, loc=np.log(theta), scale=beta) + ) + + else: + result = np.exp(norm.ppf(values, loc=np.log(theta), scale=beta)) + + return result + + +class UniformRandomVariable(CommonRandomVariable): + """ + Uniform random variable. + + """ + + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): """ - self._anchor = value + Instantiates a uniform random variable. + + Parameters + ---------- + theta: 2-element float ndarray + Set of parameters that define the Cumulative Distribution + Function (CDF) of the variable: min, max. + truncation_limits: float ndarray, optional + Defines the np.array((a, b)) truncation limits for the + distribution. Use np.nan to assign no limit in one direction, + like so: np.array((a, np.nan)), or np.array((np.nan, b)). + + """ + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'uniform' + self.theta = np.atleast_1d(theta) + self.truncation_limits = truncation_limits def cdf(self, values): """ - Returns the cdf at the given values + Returns the Cumulative Density Function (CDF) at the specified + values. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the CDF + + Returns + ------- + + 1D float ndarray + CDF values + """ - result = None + a, b = self.theta[:2] - if self.distribution == 'normal': - mu, cov = self.theta[:2] - sig = np.abs(mu) * cov + if np.isnan(a): + a = -np.inf + if np.isnan(b): + b = np.inf + + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits + result = uniform.cdf(values, loc=a, scale=(b - a)) + + return result - if np.isnan(a): - a = -np.inf - if np.isnan(b): - b = np.inf + def inverse_transform(self, values): + """ + Evaluates the inverse of the Cumulative Density Function (CDF) + for the given values. Used to generate random variable + realizations. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the inverse CDF - p_a, p_b = [norm.cdf((lim - mu) / sig) for lim in (a, b)] + Returns + ------- + 1D float ndarray + Inverse CDF values - # cap the values at the truncation limits - values = np.minimum(np.maximum(values, a), b) + """ + a, b = self.theta[:2] - # get the cdf from a non-truncated normal - p_vals = norm.cdf(values, loc=mu, scale=sig) + if np.isnan(a): + a = -np.inf + if np.isnan(b): + b = np.inf - # adjust for truncation - result = (p_vals - p_a) / (p_b - p_a) + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits - else: - result = norm.cdf(values, loc=mu, scale=sig) + result = uniform.ppf(values, loc=a, scale=(b - a)) - elif self.distribution == 'lognormal': - theta, beta = self.theta[:2] + return result - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits - if np.isnan(a): - a = np.nextafter(0, 1) - if np.isnan(b): - b = np.inf +class MultilinearCDFRandomVariable(CommonRandomVariable): + """ + Multilinear CDF random variable. This RV is defined by specifying + the points that define its Cumulative Density Function (CDF), and + linear interpolation between them. - p_a, p_b = [norm.cdf((np.log(lim) - np.log(theta)) / beta) - for lim in (a, b)] + """ - # cap the values at the truncation limits - values = np.minimum(np.maximum(values, a), b) + def __init__( + self, + name, + theta, + f_map=None, + anchor=None, + ): + """ + Instantiates a "multilinear CDF" random variable. - # get the cdf from a non-truncated lognormal - p_vals = norm.cdf(np.log(values), loc=np.log(theta), scale=beta) + Parameters + ---------- + theta: 2D float ndarray + A Nx2 numpy array defining the vertices of a multilinear CDF + curve in the form ((X_0, 0.00), (X_1, Y_1), ..., (X_n, + 1.00)). The first Y value has to be 0.00 and the last 1.00 + for a valid CDF, and the X_i's as well as the Y_i's should + be in increasing order, otherwise an error is raised. + + """ + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'multilinear_CDF' - # adjust for truncation - result = (p_vals - p_a) / (p_b - p_a) + y_1 = theta[0, 1] + if y_1 != 0.00: + raise ValueError( + "For multilinear CDF random variables, y_1 should be set to 0.00" + ) + y_n = theta[-1, 1] + if y_n != 1.00: + raise ValueError( + "For multilinear CDF random variables, y_n should be set to 1.00" + ) - else: - values = np.maximum(values, np.nextafter(0, 1)) + x_s = theta[:, 0] + if not np.array_equal(np.sort(x_s), x_s): + raise ValueError( + "For multilinear CDF random variables, " + "Xs should be specified in ascending order" + ) + if np.any(np.isclose(np.diff(x_s), 0.00)): + raise ValueError( + "For multilinear CDF random variables, " + "Xs should be specified in strictly ascending order" + ) - result = norm.cdf(np.log(values), loc=np.log(theta), scale=beta) + y_s = theta[:, 1] + if not np.array_equal(np.sort(y_s), y_s): + raise ValueError( + "For multilinear CDF random variables, " + "Ys should be specified in ascending order" + ) - elif self.distribution == 'uniform': - a, b = self.theta[:2] + if np.any(np.isclose(np.diff(y_s), 0.00)): + raise ValueError( + "For multilinear CDF random variables, " + "Ys should be specified in strictly ascending order" + ) - if np.isnan(a): - a = -np.inf - if np.isnan(b): - b = np.inf + self.theta = np.atleast_1d(theta) - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits + def cdf(self, values): + """ + Returns the Cumulative Density Function (CDF) at the specified + values. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the CDF - result = uniform.cdf(values, loc=a, scale=(b - a)) + Returns + ------- - elif self.distribution == 'multilinear_CDF': + 1D float ndarray + CDF values - x_i = [-np.inf] + [x[0] for x in self.theta] + [np.inf] - y_i = [0.00] + [x[1] for x in self.theta] + [1.00] + """ + x_i = [-np.inf] + [x[0] for x in self.theta] + [np.inf] + y_i = [0.00] + [x[1] for x in self.theta] + [1.00] - ifun = interp1d(x_i, y_i, kind='linear') + ifun = interp1d(x_i, y_i, kind='linear') - result = ifun(values) + result = ifun(values) return result - def inverse_transform(self, values=None, sample_size=None): + def inverse_transform(self, values): """ - Uses inverse probability integral transformation on the provided values. + Evaluates the inverse of the Cumulative Density Function (CDF) + for the given values. Used to generate random variable + realizations. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the inverse CDF + + Returns + ------- + 1D float ndarray + Inverse CDF values Raises ------ ValueError - If no values are specified. - ValueError - If problematic truncation limits are assigned. + If the probability massss within the truncation limits is + too small + """ - result = None - if self.distribution == 'normal': + x_i = [x[0] for x in self.theta] + y_i = [x[1] for x in self.theta] - if values is None: - raise ValueError( - "Missing uniform sample for inverse transform sampling a " - "normal random variable.") + # define the inverse CDF + ifun = interp1d(y_i, x_i, kind='linear') + # note: by definition, y_i /has/ to include the values + # 0.00 and 1.00, and `values` have to be in the range + # [0.00, 1.00], so there is no need to handle edge cases + # here (i.e., extrapolate). - # else: + result = ifun(values) - mu, cov = self.theta[:2] - sig = np.abs(mu) * cov + return result - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits - if np.isnan(a): - a = -np.inf - if np.isnan(b): - b = np.inf +class EmpiricalRandomVariable(CommonRandomVariable): + """ + Empirical random variable. - p_a, p_b = [norm.cdf((lim - mu) / sig) for lim in (a, b)] + """ - if p_b - p_a == 0: - raise ValueError( - "The probability mass within the truncation limits is " - "too small and the truncated distribution cannot be " - "sampled with sufficiently high accuracy. This is most " - "probably due to incorrect truncation limits set for " - "the distribution." - ) + def __init__( + self, + name, + raw_samples, + f_map=None, + anchor=None, + ): + """ + Instantiates an empirical random variable. - result = norm.ppf(values * (p_b - p_a) + p_a, - loc=mu, scale=sig) + Parameters + ---------- + raw_samples: 1D float ndarray + Samples from which to draw empirical realizations. - else: - result = norm.ppf(values, loc=mu, scale=sig) + """ - elif self.distribution == 'lognormal': + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'empirical' + self._raw_samples = np.atleast_1d(raw_samples) - if values is None: - raise ValueError( - "Missing uniform sample for inverse transform sampling a " - "lognormal random variable.") + def inverse_transform(self, values): + """ + Maps given values to their corresponding positions within the + empirical data array, simulating an inverse transformation + based on the empirical distribution. This can be seen as a + simple form of inverse CDF where values represent normalized + positions within the empirical data set. - # else: + Parameters + ---------- + values: 1D float ndarray + Normalized values between 0 and 1, representing positions + within the empirical data distribution. - theta, beta = self.theta[:2] + Returns + ------- + 1D float ndarray + The empirical data points corresponding to the given + normalized positions. - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits + """ + s_ids = (values * len(self._raw_samples)).astype(int) + result = self._raw_samples[s_ids] + return result - if np.isnan(a): - a = np.nextafter(0, 1) - else: - a = np.maximum(np.nextafter(0, 1), a) - if np.isnan(b): - b = np.inf +class CoupledEmpiricalRandomVariable(SampleSizeRandomVariable): + """ + Coupled empirical random variable. - p_a, p_b = [norm.cdf((np.log(lim) - np.log(theta)) / beta) - for lim in (a, b)] + """ - result = np.exp( - norm.ppf(values * (p_b - p_a) + p_a, - loc=np.log(theta), scale=beta)) + def __init__( + self, + name, + raw_samples, + f_map=None, + anchor=None, + ): + """ + Instantiates a coupled empirical random variable. - else: - result = np.exp(norm.ppf(values, loc=np.log(theta), scale=beta)) + Parameters + ---------- + raw_samples: 1D float ndarray + Samples from which to draw empirical realizations. - elif self.distribution == 'uniform': + """ + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'coupled_empirical' + self._raw_samples = np.atleast_1d(raw_samples) - if values is None: - raise ValueError( - "Missing uniform sample for inverse transform sampling a " - "uniform random variable.") + def inverse_transform(self, sample_size): + """ + Generates a new sample array from the existing empirical data + by repeating the dataset until it matches the requested sample + size. - # else: + Parameters + ---------- + sample_size: int + The desired size of the sample array to be generated. It + dictates how many times the original dataset will be + repeated to match or exceed this size, after which the array + is trimmed to precisely match the requested size. - a, b = self.theta[:2] + Returns + ------- + 1D float ndarray + A new sample array derived from repeating the original + dataset. - if np.isnan(a): - a = -np.inf - if np.isnan(b): - b = np.inf + """ - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits + raw_sample_count = len(self._raw_samples) + new_sample = np.tile( + self._raw_samples, int(sample_size / raw_sample_count) + 1 + ) + result = new_sample[:sample_size] + return result - result = uniform.ppf(values, loc=a, scale=(b - a)) - elif self.distribution == 'empirical': +class DeterministicRandomVariable(SampleSizeRandomVariable): + """ + Deterministic random variable. - if values is None: - raise ValueError( - "Missing uniform sample for inverse transform sampling an " - "empirical random variable.") + """ - # else: + def __init__( + self, + name, + theta, + f_map=None, + anchor=None, + ): + """ + Instantiates a deterministic random variable. This behaves + like a RandomVariable object but represents a specific, + deterministic value. - s_ids = (values * len(self._raw_samples)).astype(int) - result = self._raw_samples[s_ids] + Parameters + ---------- + theta: 1-element float ndarray + The value. - elif self.distribution == 'coupled_empirical': + """ + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'deterministic' + self.theta = np.atleast_1d(theta) - if sample_size is None: - raise ValueError( - "Missing sample size information for sampling a coupled " - "empirical random variable.") - # else: - raw_sample_count = len(self._raw_samples) - new_sample = np.tile(self._raw_samples, - int(sample_size / raw_sample_count) + 1) - result = new_sample[:sample_size] + def inverse_transform(self, sample_size): + """ + Generates samples that correspond to the value. - elif self.distribution == 'deterministic': + Parameters + ---------- + sample_size: int + The desired size of the sample array to be generated. - if sample_size is None: - raise ValueError( - "Missing sample size information for sampling a " - "deterministic random variable.") - # else: - result = np.full(sample_size, self.theta[0]) + Returns + ------- + 1D float ndarray + Sample array containing the deterministic value. - elif self.distribution == 'multinomial': + """ - if values is None: - raise ValueError( - "Missing uniform sample for sampling a multinomial random " - "variable.") + result = np.full(sample_size, self.theta[0]) + return result - # else: - p_cum = np.cumsum(self.theta)[:-1] +class MultinomialRandomVariable(CommonRandomVariable): + """ + Multinomial random variable. - samples = values + """ - for i, p_i in enumerate(p_cum): - samples[samples < p_i] = 10 + i - samples[samples <= 1.0] = 10 + len(p_cum) + def __init__( + self, + name, + theta, + f_map=None, + anchor=None, + ): + """ + Instantiates a multinomial random variable. - result = samples - 10 + Parameters + ---------- + theta: 2-element float ndarray + Likelihood of each unique event (the last event's likelihood + is adjusted automatically to ensure the likelihoods sum up + to one) - elif self.distribution == 'multilinear_CDF': + """ + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'multinomial' - x_i = [x[0] for x in self.theta] - y_i = [x[1] for x in self.theta] + if np.sum(theta) > 1.00: + raise ValueError( + f"The set of p values provided for a multinomial " + f"distribution shall sum up to less than or equal to 1.0. " + f"The provided values sum up to {np.sum(theta)}. p = " + f"{theta} ." + ) - # define the inverse CDF - ifun = interp1d(y_i, x_i, kind='linear') - # note: by definition, y_i /has/ to include the values - # 0.00 and 1.00, and `values` have to be in the range - # [0.00, 1.00], so there is no need to handle edge cases - # here (i.e., extrapolate). + self.theta = np.atleast_1d(theta) - result = ifun(values) + def inverse_transform(self, values): + """ + Transforms continuous values into discrete events based + on the cumulative probabilities of the multinomial + distribution derived by `theta`. - return result + Parameters + ---------- + values: 1D float ndarray + Continuous values to be transformed into discrete events + according to the multinomial distribution's cumulative + probabilities. + + Returns + ------- + 1D int ndarray + Discrete events corresponding to the input values. - def inverse_transform_sampling(self, sample_size=None): - """ - Creates a sample using inverse probability integral transformation. """ + p_cum = np.cumsum(self.theta)[:-1] + + for i, p_i in enumerate(p_cum): + values[values < p_i] = 10 + i + values[values <= 1.0] = 10 + len(p_cum) + + result = values - 10 - self.sample = self.inverse_transform(self.uni_sample, sample_size) + return result class RandomVariableSet: @@ -1568,11 +1975,9 @@ class RandomVariableSet: """ def __init__(self, name, RV_list, Rho): - self.name = name if len(RV_list) > 1: - # put the RVs in a dictionary for more efficient access reorder = np.argsort([RV.name for RV in RV_list]) self._variables = {RV_list[i].name: RV_list[i] for i in reorder} @@ -1616,9 +2021,7 @@ def Rho(self, var_subset=None): """ if var_subset is None: return self._Rho - # else: - var_ids = [list(self._variables.keys()).index(var_i) - for var_i in var_subset] + var_ids = [list(self._variables.keys()).index(var_i) for var_i in var_subset] return (self._Rho[var_ids]).T[var_ids] def apply_correlation(self): @@ -1645,12 +2048,13 @@ def apply_correlation(self): UC_RV = norm.cdf(NC_RV) except np.linalg.LinAlgError: - # if the Cholesky doesn't work, we need to use the more # time-consuming but more robust approach based on SVD N_RV = norm.ppf(U_RV) - U, s, _ = svd(self._Rho, ) + U, s, _ = svd( + self._Rho, + ) S = np.diagflat(np.sqrt(s)) NC_RV = (N_RV.T @ S @ U.T).T @@ -1714,7 +2118,6 @@ def orthotope_density(self, lower=np.nan, upper=np.nan, var_subset=None): # first, convert limits to standard normal values for var_i, var_name in enumerate(variables): - var = self._variables[var_name] if (np.any(~np.isnan(lower))) and (~np.isnan(lower[var_i])): @@ -1727,10 +2130,15 @@ def orthotope_density(self, lower=np.nan, upper=np.nan, var_subset=None): lower_std = lower_std.T upper_std = upper_std.T - OD = [mvn_orthotope_density(mu=np.zeros(len(variables)), - COV=self.Rho(var_subset), - lower=l_i, upper=u_i)[0] - for l_i, u_i in zip(lower_std, upper_std)] + OD = [ + mvn_orthotope_density( + mu=np.zeros(len(variables)), + COV=self.Rho(var_subset), + lower=l_i, + upper=u_i, + )[0] + for l_i, u_i in zip(lower_std, upper_std) + ] return np.asarray(OD) @@ -1772,8 +2180,7 @@ def add_RV(self, RV): Add a new random variable to the registry. """ if RV.name in self._variables: - raise ValueError( - f'RV {RV.name} already exists in the registry.') + raise ValueError(f'RV {RV.name} already exists in the registry.') self._variables.update({RV.name: RV}) @property @@ -1816,17 +2223,22 @@ def generate_sample(self, sample_size, method): # Generate a dictionary with IDs of the free (non-anchored and # non-deterministic) variables - RV_list = [RV_name for RV_name, RV in self.RV.items() if - ((RV.anchor == RV) or ( - RV.distribution in { - 'deterministic', 'coupled_empirical'}))] + RV_list = [ + RV_name + for RV_name, RV in self.RV.items() + if ( + (RV.anchor == RV) + or (RV.distribution in {'deterministic', 'coupled_empirical'}) + ) + ] RV_ID = {RV_name: ID for ID, RV_name in enumerate(RV_list)} RV_count = len(RV_ID) # Generate controlling samples from a uniform distribution for free RVs if 'LHS' in method: - bin_low = np.array([self._rng.permutation(sample_size) - for i in range(RV_count)]) + bin_low = np.array( + [self._rng.permutation(sample_size) for i in range(RV_count)] + ) if method == 'LHS_midpoint': U_RV = np.ones([RV_count, sample_size]) * 0.5 @@ -1850,4 +2262,50 @@ def generate_sample(self, sample_size, method): # Convert from uniform to the target distribution for every RV for RV in self.RV.values(): - RV.inverse_transform_sampling(sample_size) + if RV.__class__.__mro__[1] is CommonRandomVariable: + # no sample size needed, since that information is + # available in the uniform sample + RV.inverse_transform_sampling() + elif RV.__class__.__mro__[1] is SampleSizeRandomVariable: + RV.inverse_transform_sampling(sample_size) + else: + raise NotImplementedError('Unknown RV parent class.') + + +def rv_class_map(distribution_name): + """ + Maps convenient distribution names to their corresponding random + variable class. + + Parameters + ---------- + distribution_name: str + The name of a distribution. + + Returns + ------- + RandomVariable ojbect. + + Raises + ------ + ValueError: + If the given distribution name does not correspond to a + distribution class. + + + """ + if pd.isna(distribution_name): + distribution_name = 'deterministic' + distribution_map = { + 'normal': NormalRandomVariable, + 'lognormal': LogNormalRandomVariable, + 'uniform': UniformRandomVariable, + 'multilinear_CDF': MultilinearCDFRandomVariable, + 'empirical': EmpiricalRandomVariable, + 'coupled_empirical': CoupledEmpiricalRandomVariable, + 'deterministic': DeterministicRandomVariable, + 'multinomial': MultinomialRandomVariable, + } + if distribution_name not in distribution_map: + raise ValueError(f'Unsupported distribution: {distribution_name}') + return distribution_map[distribution_name] From 2f6f7dd8f5aa1855caf1a45de958a0bbe0f4ad4e Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 2 Mar 2024 10:47:15 -0800 Subject: [PATCH 05/48] Remove `bldg` term from repair models --- pelicun/assessment.py | 14 +-- pelicun/db.py | 8 +- pelicun/model/__init__.py | 2 +- pelicun/model/loss_model.py | 6 +- pelicun/resources/auto/Hazus_Earthquake_IM.py | 10 +- .../resources/auto/Hazus_Earthquake_Story.py | 6 +- pelicun/tests/test_assessment.py | 4 +- pelicun/tests/test_model.py | 52 ++++----- pelicun/tools/DL_calculation.py | 110 +++++++++--------- 9 files changed, 106 insertions(+), 106 deletions(-) diff --git a/pelicun/assessment.py b/pelicun/assessment.py index de4f1fd38..2b2752e34 100644 --- a/pelicun/assessment.py +++ b/pelicun/assessment.py @@ -68,7 +68,7 @@ class Assessment: ... damage: DamageModel ... - bldg_repair: BldgRepairModel + repair: RepairModel ... stories: int Number of stories. @@ -146,18 +146,18 @@ def damage(self): return self.damage @property - def bldg_repair(self): + def repair(self): """ - Return an BldgRepairModel object that manages the repair information. + Return a RepairModel object that manages the repair information. """ # pylint: disable = access-member-before-definition - if hasattr(self, '_bldg_repair'): - return self._bldg_repair + if hasattr(self, '_repair'): + return self._repair - self._bldg_repair = model.BldgRepairModel(self) - return self.bldg_repair + self._repair = model.RepairModel(self) + return self.repair def get_default_data(self, data_name): """ diff --git a/pelicun/db.py b/pelicun/db.py index 999cdde81..dd0b99ed6 100644 --- a/pelicun/db.py +++ b/pelicun/db.py @@ -45,12 +45,12 @@ .. autosummary:: create_FEMA_P58_fragility_db - create_FEMA_P58_bldg_repair_db + create_FEMA_P58_repair_db create_FEMA_P58_bldg_injury_db create_FEMA_P58_bldg_redtag_db create_Hazus_EQ_fragility_db - create_Hazus_EQ_bldg_repair_db + create_Hazus_EQ_repair_db create_Hazus_EQ_bldg_injury_db """ @@ -515,7 +515,7 @@ def create_FEMA_P58_fragility_db(source_file, print("Successfully parsed and saved the fragility data from FEMA P58") -def create_FEMA_P58_bldg_repair_db( +def create_FEMA_P58_repair_db( source_file, meta_file='', target_data_file='loss_repair_DB_FEMA_P58_2nd.csv', @@ -2209,7 +2209,7 @@ def create_Hazus_EQ_fragility_db(source_file, print("Successfully parsed and saved the fragility data from Hazus EQ") -def create_Hazus_EQ_bldg_repair_db(source_file, +def create_Hazus_EQ_repair_db(source_file, meta_file='', target_data_file='loss_repair_DB_Hazus_EQ_bldg.csv', target_meta_file='loss_repair_DB_Hazus_EQ_bldg.json', diff --git a/pelicun/model/__init__.py b/pelicun/model/__init__.py index f30053fdd..fdb212f1d 100644 --- a/pelicun/model/__init__.py +++ b/pelicun/model/__init__.py @@ -46,4 +46,4 @@ from .asset_model import AssetModel from .damage_model import DamageModel from .loss_model import LossModel -from .loss_model import BldgRepairModel +from .loss_model import RepairModel diff --git a/pelicun/model/loss_model.py b/pelicun/model/loss_model.py index d61d55f7d..16cbce25b 100644 --- a/pelicun/model/loss_model.py +++ b/pelicun/model/loss_model.py @@ -49,7 +49,7 @@ prep_bounded_multilinear_median_DV LossModel - BldgRepairModel + RepairModel """ @@ -330,7 +330,7 @@ def calculate(self): self.log_msg("Loss calculation successful.") -class BldgRepairModel(LossModel): +class RepairModel(LossModel): """ Manages building repair consequence assessments. @@ -342,7 +342,7 @@ class BldgRepairModel(LossModel): def __init__(self, assessment): super().__init__(assessment) - self.loss_type = 'BldgRepair' + self.loss_type = 'Repair' # def load_model(self, data_paths, mapping_path): diff --git a/pelicun/resources/auto/Hazus_Earthquake_IM.py b/pelicun/resources/auto/Hazus_Earthquake_IM.py index 62e23b0d1..1c80c2abc 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_IM.py +++ b/pelicun/resources/auto/Hazus_Earthquake_IM.py @@ -427,7 +427,7 @@ def auto_populate(AIM): "Demands": { }, "Losses": { - "BldgRepair": { + "Repair": { "ConsequenceDatabase": "Hazus Earthquake - Buildings", "MapApproach": "Automatic" } @@ -463,7 +463,7 @@ def auto_populate(AIM): "Demands": { }, "Losses": { - "BldgRepair": { + "Repair": { "ConsequenceDatabase": "Hazus Earthquake - Transportation", "MapApproach": "Automatic" } @@ -495,7 +495,7 @@ def auto_populate(AIM): "Demands": { }, "Losses": { - "BldgRepair": { + "Repair": { "ConsequenceDatabase": "Hazus Earthquake - Transportation", "MapApproach": "Automatic" } @@ -525,7 +525,7 @@ def auto_populate(AIM): "Demands": { }, "Losses": { - "BldgRepair": { + "Repair": { "ConsequenceDatabase": "Hazus Earthquake - Transportation", "MapApproach": "Automatic" } @@ -536,4 +536,4 @@ def auto_populate(AIM): else: print(f"AssetType: {assetType} is not supported in Hazus Earthquake IM DL method") - return GI_ap, DL_ap, CMP \ No newline at end of file + return GI_ap, DL_ap, CMP diff --git a/pelicun/resources/auto/Hazus_Earthquake_Story.py b/pelicun/resources/auto/Hazus_Earthquake_Story.py index 5d14d1988..d44f59f41 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_Story.py +++ b/pelicun/resources/auto/Hazus_Earthquake_Story.py @@ -235,7 +235,7 @@ def auto_populate(AIM): plan_area = GI.get('PlanArea', 1.0) - bldg_repair_config = { + repair_config = { "ConsequenceDatabase": "Hazus Earthquake - Stories", "MapApproach": "Automatic", "DecisionVariables": { @@ -260,11 +260,11 @@ def auto_populate(AIM): "Demands": { }, "Losses": { - "BldgRepair": bldg_repair_config + "Repair": repair_config } } else: print(f"AssetType: {assetType} is not supported in Hazus Earthquake Story-based DL method") - return GI_ap, DL_ap, CMP \ No newline at end of file + return GI_ap, DL_ap, CMP diff --git a/pelicun/tests/test_assessment.py b/pelicun/tests/test_assessment.py index 6fa50e3e2..cd7b95484 100644 --- a/pelicun/tests/test_assessment.py +++ b/pelicun/tests/test_assessment.py @@ -79,8 +79,8 @@ def test_Assessment_init(): assert isinstance(asmt.asset, model.AssetModel) assert asmt.damage assert isinstance(asmt.damage, model.DamageModel) - assert asmt.bldg_repair - assert isinstance(asmt.bldg_repair, model.BldgRepairModel) + assert asmt.repair + assert isinstance(asmt.repair, model.RepairModel) def test_assessment_get_default_metadata(): diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index 4fae4f23d..2d66ec96a 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -1626,10 +1626,10 @@ def test__generate_DV_sample(self, loss_model): loss_model._generate_DV_sample(None, None) -class TestBldgRepairModel(TestPelicunModel): +class TestRepairModel(TestPelicunModel): @pytest.fixture - def bldg_repair_model(self, assessment_instance): - return assessment_instance.bldg_repair + def repair_model(self, assessment_instance): + return assessment_instance.repair @pytest.fixture def loss_params_A(self): @@ -1670,17 +1670,17 @@ def loss_params_A(self): ), ) - def test_init(self, bldg_repair_model): - assert bldg_repair_model.log_msg - assert bldg_repair_model.log_div + def test_init(self, repair_model): + assert repair_model.log_msg + assert repair_model.log_div - assert bldg_repair_model._sample is None - assert bldg_repair_model.loss_type == 'BldgRepair' + assert repair_model._sample is None + assert repair_model.loss_type == 'Repair' - def test__create_DV_RVs(self, bldg_repair_model, loss_params_A): - bldg_repair_model.loss_params = loss_params_A + def test__create_DV_RVs(self, repair_model, loss_params_A): + repair_model.loss_params = loss_params_A - bldg_repair_model.loss_map = pd.DataFrame( + repair_model.loss_map = pd.DataFrame( ((("DMG", "some.test.component"), "some.test.component"),), columns=("Driver", "Consequence"), ) @@ -1694,7 +1694,7 @@ def test__create_DV_RVs(self, bldg_repair_model, loss_params_A): names=("cmp", "loc", "dir", "uid", "ds"), ) - rv_reg = bldg_repair_model._create_DV_RVs(case_list) + rv_reg = repair_model._create_DV_RVs(case_list) assert list(rv_reg.RV.keys()) == [ 'Cost-0-1-2-2-0', 'Time-0-1-2-2-0', @@ -1718,10 +1718,10 @@ def test__create_DV_RVs(self, bldg_repair_model, loss_params_A): rvs[3].theta, np.array((1.00, 0.464027, np.nan)) ) - def test__calc_median_consequence(self, bldg_repair_model, loss_params_A): - bldg_repair_model.loss_params = loss_params_A + def test__calc_median_consequence(self, repair_model, loss_params_A): + repair_model.loss_params = loss_params_A - bldg_repair_model.loss_map = pd.DataFrame( + repair_model.loss_map = pd.DataFrame( ((("DMG", "some.test.component"), "some.test.component"),), columns=("Driver", "Consequence"), ) @@ -1737,12 +1737,12 @@ def test__calc_median_consequence(self, bldg_repair_model, loss_params_A): ), ) - medians = bldg_repair_model._calc_median_consequence(eco_qnt) + medians = repair_model._calc_median_consequence(eco_qnt) assert medians['Cost'].to_dict() == {(0, '1'): {0: 25704.0, 1: 22848.0}} assert medians['Time'].to_dict() == {(0, '1'): {0: 22.68, 1: 20.16}} - def test_aggregate_losses(self, bldg_repair_model, loss_params_A): - bldg_repair_model._sample = pd.DataFrame( + def test_aggregate_losses(self, repair_model, loss_params_A): + repair_model._sample = pd.DataFrame( ((100.00, 1.00),), columns=pd.MultiIndex.from_tuples( ( @@ -1767,9 +1767,9 @@ def test_aggregate_losses(self, bldg_repair_model, loss_params_A): ), ) - bldg_repair_model.loss_params = loss_params_A + repair_model.loss_params = loss_params_A - df_agg = bldg_repair_model.aggregate_losses() + df_agg = repair_model.aggregate_losses() assert df_agg.to_dict() == { ('repair_cost', ''): {0: 100.0}, @@ -1777,7 +1777,7 @@ def test_aggregate_losses(self, bldg_repair_model, loss_params_A): ('repair_time', 'sequential'): {0: 1.0}, } - def test__generate_DV_sample(self, bldg_repair_model): + def test__generate_DV_sample(self, repair_model): expected_sample = { (True, True): { ( @@ -1861,7 +1861,7 @@ def test__generate_DV_sample(self, bldg_repair_model): (True, True), (True, False), ): # todo: (False, True), (False, False) fails - assessment_instance = bldg_repair_model._asmnt + assessment_instance = repair_model._asmnt assessment_instance.options.eco_scale["AcrossFloors"] = ecofl assessment_instance.options.eco_scale["AcrossDamageStates"] = ecods @@ -1883,12 +1883,12 @@ def test__generate_DV_sample(self, bldg_repair_model): ), ) - bldg_repair_model.loss_map = pd.DataFrame( + repair_model.loss_map = pd.DataFrame( ((("DMG", "some.test.component"), "some.test.component"),), columns=("Driver", "Consequence"), ) - bldg_repair_model.loss_params = pd.DataFrame( + repair_model.loss_params = pd.DataFrame( ( ( None, @@ -1928,10 +1928,10 @@ def test__generate_DV_sample(self, bldg_repair_model): ), ) - bldg_repair_model._generate_DV_sample(dmg_quantities, 4) + repair_model._generate_DV_sample(dmg_quantities, 4) assert ( - bldg_repair_model._sample.to_dict() + repair_model._sample.to_dict() == expected_sample[(ecods, ecofl)] ) diff --git a/pelicun/tools/DL_calculation.py b/pelicun/tools/DL_calculation.py index da08862dc..8cf99d6b5 100644 --- a/pelicun/tools/DL_calculation.py +++ b/pelicun/tools/DL_calculation.py @@ -125,12 +125,12 @@ def log_msg(msg): "DMG_stats.csv", "DMG_grp.zip", "DMG_grp_stats.csv", - "DV_bldg_repair_sample.zip", - "DV_bldg_repair_stats.csv", - "DV_bldg_repair_grp.zip", - "DV_bldg_repair_grp_stats.csv", - "DV_bldg_repair_agg.zip", - "DV_bldg_repair_agg_stats.csv", + "DV_repair_sample.zip", + "DV_repair_stats.csv", + "DV_repair_grp.zip", + "DV_repair_grp_stats.csv", + "DV_repair_agg.zip", + "DV_repair_agg_stats.csv", "DL_summary.csv", "DL_summary_stats.csv", ] @@ -145,7 +145,7 @@ def log_msg(msg): 'GroupedStatistics': True, }, 'Loss': { - 'BldgRepair': { + 'Repair': { 'Sample': True, 'Statistics': True, 'GroupedSample': True, @@ -167,7 +167,7 @@ def log_msg(msg): 'GroupedStatistics': False, }, 'Loss': { - 'BldgRepair': { + 'Repair': { 'Sample': True, 'Statistics': True, 'GroupedSample': True, @@ -1243,23 +1243,23 @@ def run_pelicun( out_config_loss = out_config.get('Loss', {}) # if requested, calculate repair consequences - if loss_config.get('BldgRepair', False): - bldg_repair_config = loss_config['BldgRepair'] + if loss_config.get('Repair', False): + repair_config = loss_config['Repair'] # load the fragility information if ( - bldg_repair_config['ConsequenceDatabase'] + repair_config['ConsequenceDatabase'] in default_DBs['repair'].keys() ): consequence_db = [ 'PelicunDefault/' + default_DBs['repair'][ - bldg_repair_config['ConsequenceDatabase'] + repair_config['ConsequenceDatabase'] ], ] conseq_df = PAL.get_default_data( - default_DBs['repair'][bldg_repair_config['ConsequenceDatabase']][ + default_DBs['repair'][repair_config['ConsequenceDatabase']][ :-4 ] ) @@ -1268,15 +1268,15 @@ def run_pelicun( conseq_df = pd.DataFrame() - if bldg_repair_config.get('ConsequenceDatabasePath', False) is not False: - extra_comps = bldg_repair_config['ConsequenceDatabasePath'] + if repair_config.get('ConsequenceDatabasePath', False) is not False: + extra_comps = repair_config['ConsequenceDatabasePath'] consequence_db += [ extra_comps, ] extra_conseq_df = load_data( - bldg_repair_config['ConsequenceDatabasePath'], + repair_config['ConsequenceDatabasePath'], unit_conversion_factors=None, orientation=1, reindex=False, @@ -1305,12 +1305,12 @@ def run_pelicun( ), ) - # DL_method = bldg_repair_config['ConsequenceDatabase'] + # DL_method = repair_config['ConsequenceDatabase'] DL_method = damage_config.get('DamageProcess', 'User Defined') rc = ('replacement', 'Cost') - if 'ReplacementCost' in bldg_repair_config.keys(): - rCost_config = bldg_repair_config['ReplacementCost'] + if 'ReplacementCost' in repair_config.keys(): + rCost_config = repair_config['ReplacementCost'] adf.loc[rc, ('Quantity', 'Unit')] = "1 EA" @@ -1347,8 +1347,8 @@ def run_pelicun( adf.loc[rc, ('DS1', 'Theta_0')] = 1 rt = ('replacement', 'Time') - if 'ReplacementTime' in bldg_repair_config.keys(): - rTime_config = bldg_repair_config['ReplacementTime'] + if 'ReplacementTime' in repair_config.keys(): + rTime_config = repair_config['ReplacementTime'] rt = ('replacement', 'Time') adf.loc[rt, ('Quantity', 'Unit')] = "1 EA" @@ -1388,8 +1388,8 @@ def run_pelicun( adf.loc[rt, ('DS1', 'Theta_0')] = 1 rcarb = ('replacement', 'Carbon') - if 'ReplacementCarbon' in bldg_repair_config.keys(): - rCarbon_config = bldg_repair_config['ReplacementCarbon'] + if 'ReplacementCarbon' in repair_config.keys(): + rCarbon_config = repair_config['ReplacementCarbon'] rcarb = ('replacement', 'Carbon') adf.loc[rcarb, ('Quantity', 'Unit')] = "1 EA" @@ -1418,8 +1418,8 @@ def run_pelicun( adf.drop(rcarb, inplace=True) ren = ('replacement', 'Energy') - if 'ReplacementEnergy' in bldg_repair_config.keys(): - rEnergy_config = bldg_repair_config['ReplacementEnergy'] + if 'ReplacementEnergy' in repair_config.keys(): + rEnergy_config = repair_config['ReplacementEnergy'] ren = ('replacement', 'Energy') adf.loc[ren, ('Quantity', 'Unit')] = "1 EA" @@ -1447,7 +1447,7 @@ def run_pelicun( # prepare the loss map loss_map = None - if bldg_repair_config['MapApproach'] == "Automatic": + if repair_config['MapApproach'] == "Automatic": # get the damage sample dmg_sample = PAL.damage.save_sample() @@ -1494,23 +1494,23 @@ def run_pelicun( loss_models.append(loss_cmp) loss_map = pd.DataFrame( - loss_models, columns=['BldgRepair'], index=drivers + loss_models, columns=['Repair'], index=drivers ) - elif bldg_repair_config['MapApproach'] == "User Defined": + elif repair_config['MapApproach'] == "User Defined": loss_map = pd.read_csv( - bldg_repair_config['MapFilePath'], index_col=0 + repair_config['MapFilePath'], index_col=0 ) # prepare additional loss map entries, if needed if 'DMG-collapse' not in loss_map.index: - loss_map.loc['DMG-collapse', 'BldgRepair'] = 'replacement' - loss_map.loc['DMG-irreparable', 'BldgRepair'] = 'replacement' + loss_map.loc['DMG-collapse', 'Repair'] = 'replacement' + loss_map.loc['DMG-irreparable', 'Repair'] = 'replacement' # assemble the list of requested decision variables DV_list = [] - if bldg_repair_config.get('DecisionVariables', False) is not False: - for DV_i, DV_status in bldg_repair_config[ + if repair_config.get('DecisionVariables', False) is not False: + for DV_i, DV_status in repair_config[ 'DecisionVariables' ].items(): if DV_status is True: @@ -1519,7 +1519,7 @@ def run_pelicun( else: DV_list = None - PAL.bldg_repair.load_model( + PAL.repair.load_model( consequence_db + [ adf, @@ -1528,13 +1528,13 @@ def run_pelicun( decision_variables=DV_list, ) - PAL.bldg_repair.calculate() + PAL.repair.calculate() - agg_repair = PAL.bldg_repair.aggregate_losses() + agg_repair = PAL.repair.aggregate_losses() # if requested, save results - if out_config_loss.get('BldgRepair', False): - repair_sample, repair_units = PAL.bldg_repair.save_sample( + if out_config_loss.get('Repair', False): + repair_sample, repair_units = PAL.repair.save_sample( save_units=True ) repair_units = repair_units.to_frame().T @@ -1559,7 +1559,7 @@ def run_pelicun( out_reqs = [ out if val else "" - for out, val in out_config_loss['BldgRepair'].items() + for out, val in out_config_loss['Repair'].items() ] if np.any( @@ -1583,14 +1583,14 @@ def run_pelicun( repair_sample_s, axis=1 ) repair_sample_s.to_csv( - output_path / "DV_bldg_repair_sample.zip", + output_path / "DV_repair_sample.zip", index_label=repair_sample_s.columns.name, compression=dict( method='zip', - archive_name='DV_bldg_repair_sample.csv', + archive_name='DV_repair_sample.csv', ), ) - output_files.append('DV_bldg_repair_sample.zip') + output_files.append('DV_repair_sample.zip') if 'Statistics' in out_reqs: repair_stats = describe(repair_sample) @@ -1598,10 +1598,10 @@ def run_pelicun( repair_stats = convert_to_SimpleIndex(repair_stats, axis=1) repair_stats.to_csv( - output_path / "DV_bldg_repair_stats.csv", + output_path / "DV_repair_stats.csv", index_label=repair_stats.columns.name, ) - output_files.append('DV_bldg_repair_stats.csv') + output_files.append('DV_repair_stats.csv') if np.any( np.isin(['GroupedSample', 'GroupedStatistics'], out_reqs) @@ -1625,14 +1625,14 @@ def run_pelicun( grp_repair_s, axis=1 ) grp_repair_s.to_csv( - output_path / "DV_bldg_repair_grp.zip", + output_path / "DV_repair_grp.zip", index_label=grp_repair_s.columns.name, compression=dict( method='zip', - archive_name='DV_bldg_repair_grp.csv', + archive_name='DV_repair_grp.csv', ), ) - output_files.append('DV_bldg_repair_grp.zip') + output_files.append('DV_repair_grp.zip') if 'GroupedStatistics' in out_reqs: grp_stats = describe(grp_repair) @@ -1640,10 +1640,10 @@ def run_pelicun( grp_stats = convert_to_SimpleIndex(grp_stats, axis=1) grp_stats.to_csv( - output_path / "DV_bldg_repair_grp_stats.csv", + output_path / "DV_repair_grp_stats.csv", index_label=grp_stats.columns.name, ) - output_files.append('DV_bldg_repair_grp_stats.csv') + output_files.append('DV_repair_grp_stats.csv') if np.any( np.isin(['AggregateSample', 'AggregateStatistics'], out_reqs) @@ -1651,24 +1651,24 @@ def run_pelicun( if 'AggregateSample' in out_reqs: agg_repair_s = convert_to_SimpleIndex(agg_repair, axis=1) agg_repair_s.to_csv( - output_path / "DV_bldg_repair_agg.zip", + output_path / "DV_repair_agg.zip", index_label=agg_repair_s.columns.name, compression=dict( method='zip', - archive_name='DV_bldg_repair_agg.csv', + archive_name='DV_repair_agg.csv', ), ) - output_files.append('DV_bldg_repair_agg.zip') + output_files.append('DV_repair_agg.zip') if 'AggregateStatistics' in out_reqs: agg_stats = convert_to_SimpleIndex( describe(agg_repair), axis=1 ) agg_stats.to_csv( - output_path / "DV_bldg_repair_agg_stats.csv", + output_path / "DV_repair_agg_stats.csv", index_label=agg_stats.columns.name, ) - output_files.append('DV_bldg_repair_agg_stats.csv') + output_files.append('DV_repair_agg_stats.csv') # Result Summary ----------------------------------------------------------- @@ -1676,7 +1676,7 @@ def run_pelicun( damage_sample = PAL.damage.save_sample() if 'agg_repair' not in locals(): - agg_repair = PAL.bldg_repair.aggregate_losses() + agg_repair = PAL.repair.aggregate_losses() damage_sample = damage_sample.groupby(level=[0, 3], axis=1).sum() damage_sample_s = convert_to_SimpleIndex(damage_sample, axis=1) From b66257a3a25a0643a6799a58f0761f63567885f1 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 2 Mar 2024 13:27:49 -0800 Subject: [PATCH 06/48] Remove block weights --- pelicun/model/asset_model.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/pelicun/model/asset_model.py b/pelicun/model/asset_model.py index d7a7dfe71..02206d819 100644 --- a/pelicun/model/asset_model.py +++ b/pelicun/model/asset_model.py @@ -258,25 +258,7 @@ def get_directions(dir_str): def get_attribute(attribute_str, dtype=float, default=np.nan): if pd.isnull(attribute_str): return default - - # else: - - try: - res = dtype(attribute_str) - return res - - except ValueError as exc: - if "," in attribute_str: - # a list of weights - w = np.array(attribute_str.split(','), dtype=float) - - # return a normalized vector - return w / np.sum(w) - - # else: - raise ValueError( - f"Cannot parse Blocks string: {attribute_str}" - ) from exc + return dtype(attribute_str) self.log_div() self.log_msg('Loading component model...') From 1600b7c8bf9e410e9db52c08fce9f06d285f58a4 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 2 Mar 2024 15:47:32 -0800 Subject: [PATCH 07/48] Use `deepcopy` to avoid fixture mutation --- pelicun/tests/test_model.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index 2d66ec96a..e6492803c 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -44,6 +44,7 @@ import os import tempfile +from copy import deepcopy import pytest import numpy as np import pandas as pd @@ -75,13 +76,13 @@ def create_instance(verbose): @pytest.fixture(params=[True, False]) def assessment_instance(self, request, assessment_factory): - return assessment_factory(request.param) + return deepcopy(assessment_factory(request.param)) class TestDemandModel(TestModelModule): @pytest.fixture def demand_model(self, assessment_instance): - return assessment_instance.demand + return deepcopy(assessment_instance.demand) @pytest.fixture def demand_model_with_sample(self, assessment_instance): @@ -90,7 +91,7 @@ def demand_model_with_sample(self, assessment_instance): 'pelicun/tests/data/model/' 'test_DemandModel_load_sample/demand_sample_A.csv' ) - return mdl + return deepcopy(mdl) @pytest.fixture def calibrated_demand_model(self, demand_model_with_sample): @@ -108,7 +109,7 @@ def calibrated_demand_model(self, demand_model_with_sample): }, } demand_model_with_sample.calibrate_model(config) - return demand_model_with_sample + return deepcopy(demand_model_with_sample) @pytest.fixture def demand_model_with_sample_B(self, assessment_instance): @@ -117,7 +118,7 @@ def demand_model_with_sample_B(self, assessment_instance): 'pelicun/tests/data/model/' 'test_DemandModel_load_sample/demand_sample_B.csv' ) - return mdl + return deepcopy(mdl) @pytest.fixture def demand_model_with_sample_C(self, assessment_instance): @@ -126,7 +127,7 @@ def demand_model_with_sample_C(self, assessment_instance): 'pelicun/tests/data/model/' 'test_DemandModel_load_sample/demand_sample_C.csv' ) - return mdl + return deepcopy(mdl) @pytest.fixture def demand_model_with_sample_D(self, assessment_instance): @@ -135,7 +136,7 @@ def demand_model_with_sample_D(self, assessment_instance): 'pelicun/tests/data/model/' 'test_DemandModel_load_sample/demand_sample_D.csv' ) - return mdl + return deepcopy(mdl) def test_init(self, demand_model): assert demand_model.log_msg @@ -454,7 +455,7 @@ def test_generate_sample_with_demand_cloning(self, assessment_instance): class TestPelicunModel(TestModelModule): @pytest.fixture def pelicun_model(self, assessment_instance): - return model.PelicunModel(assessment_instance) + return deepcopy(model.PelicunModel(assessment_instance)) def test_init(self, pelicun_model): assert pelicun_model.log_msg @@ -574,7 +575,7 @@ def test_convert_marginal_params(self, pelicun_model): class TestAssetModel(TestPelicunModel): @pytest.fixture def asset_model(self, assessment_instance): - return assessment_instance.asset + return deepcopy(assessment_instance.asset) def test_init(self, asset_model): assert asset_model.log_msg @@ -849,7 +850,7 @@ def calibration_config_A(self): @pytest.fixture def damage_model(self, assessment_instance): - return assessment_instance.damage + return deepcopy(assessment_instance.damage) @pytest.fixture def damage_model_model_loaded(self, damage_model, cmp_sample_A): @@ -857,7 +858,7 @@ def damage_model_model_loaded(self, damage_model, cmp_sample_A): asmt.get_default_data('damage_DB_FEMA_P58_2nd') asmt.asset._cmp_sample = cmp_sample_A damage_model.load_damage_model(['PelicunDefault/damage_DB_FEMA_P58_2nd.csv']) - return damage_model + return deepcopy(damage_model) @pytest.fixture def damage_model_with_sample(self, assessment_instance): @@ -981,7 +982,7 @@ def damage_model_with_sample(self, assessment_instance): name='Units', dtype='object', ) - return assessment_instance.damage + return deepcopy(assessment_instance.damage) def test_init(self, damage_model): assert damage_model.log_msg @@ -1507,7 +1508,7 @@ def test_calculate_multilinear_CDF(self, damage_model): class TestLossModel(TestPelicunModel): @pytest.fixture def loss_model(self, assessment_instance): - return model.LossModel(assessment_instance) + return deepcopy(model.LossModel(assessment_instance)) def test_init(self, loss_model): assert loss_model.log_msg @@ -1629,7 +1630,7 @@ def test__generate_DV_sample(self, loss_model): class TestRepairModel(TestPelicunModel): @pytest.fixture def repair_model(self, assessment_instance): - return assessment_instance.repair + return deepcopy(assessment_instance.repair) @pytest.fixture def loss_params_A(self): From 92bcadbf1e8bc3fac7c9b47c6475ad8f00434008 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 2 Mar 2024 15:48:39 -0800 Subject: [PATCH 08/48] Improve `empirical_data` handling Improve `empirical_data` handling for saving/loading DemandModel objects. --- pelicun/model/demand_model.py | 33 +++++++++++++++++++-------------- pelicun/tests/test_model.py | 26 +++++++++++++++++++------- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/pelicun/model/demand_model.py b/pelicun/model/demand_model.py index d01c7b00e..ddc01b2d7 100644 --- a/pelicun/model/demand_model.py +++ b/pelicun/model/demand_model.py @@ -88,6 +88,8 @@ class DemandModel(PelicunModel): units: Series Available after any demand data has been loaded. The index identifies the demand variables and the values provide the unit for each variable. + calibrated: bool + Signifies whether the DemandModel object has been calibrated. """ @@ -98,6 +100,7 @@ def __init__(self, assessment): self.correlation = None self.empirical_data = None self.units = None + self.calibrated = False self._RVs = None self.sample = None @@ -313,6 +316,9 @@ def calibrate_model(self, config): """ + if self.calibrated: + raise ValueError('DemandModel has been previously calibrated.') + def parse_settings(settings, demand_type): def parse_str_to_float(in_str, context_string): try: @@ -488,7 +494,8 @@ def get_filter_mask(lower_lims, upper_lims): if cal_df.loc[edp, 'Family'] == 'empirical': empirical_edps.append(edp) - self.empirical_data = demand_sample.loc[:, empirical_edps].copy() + if empirical_edps: + self.empirical_data = demand_sample.loc[:, empirical_edps].copy() # remove the empirical demands from the samples used for calibration demand_sample = demand_sample.drop(empirical_edps, axis=1) @@ -565,6 +572,8 @@ def get_filter_mask(lower_lims, upper_lims): prepend_timestamp=False, ) + self.calibrated = True + def save_model(self, file_prefix): """ Save parameters of the demand model to a set of csv files @@ -576,13 +585,14 @@ def save_model(self, file_prefix): # save the correlation and empirical data file_io.save_to_csv(self.correlation, file_prefix + '_correlation.csv') - file_io.save_to_csv( - self.empirical_data, - file_prefix + '_empirical.csv', - units=self.units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - log=self._asmnt.log, - ) + if self.empirical_data is not None: + file_io.save_to_csv( + self.empirical_data, + file_prefix + '_empirical.csv', + units=self.units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + log=self._asmnt.log, + ) # the log standard deviations in the marginal parameters need to be # scaled up before feeding to the saving method where they will be @@ -644,12 +654,7 @@ def load_model(self, data_source): self._asmnt.unit_conversion_factors, log=self._asmnt.log, ) - if not self.empirical_data.empty: - self.empirical_data.columns.set_names( - ['type', 'loc', 'dir'], inplace=True - ) - else: - self.empirical_data = None + self.empirical_data.columns.names = ('type', 'loc', 'dir') else: self.empirical_data = None diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index e6492803c..ace45e0ab 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -245,6 +245,9 @@ def test_calibrate_model( == 0.06 ) + def test_calibrate_model_censoring( + self, calibrated_demand_model, demand_model_with_sample_C + ): # with a config featuring censoring the RIDs config = { "ALL": { @@ -258,6 +261,9 @@ def test_calibrate_model( } demand_model_with_sample_C.calibrate_model(config) + def test_calibrate_model_truncation( + self, calibrated_demand_model, demand_model_with_sample_C + ): # with a config that specifies a truncation limit smaller than # the samples config = { @@ -271,6 +277,9 @@ def test_calibrate_model( }, } demand_model_with_sample_C.calibrate_model(config) + # calibrating again should raise an error + with pytest.raises(ValueError): + demand_model_with_sample_C.calibrate_model(config) def test_save_load_model_with_empirical( self, calibrated_demand_model, assessment_instance @@ -286,13 +295,19 @@ def test_save_load_model_with_empirical( new_demand_model = assessment_instance.demand new_demand_model.load_model(f'{temp_dir}/temp') pd.testing.assert_frame_equal( - calibrated_demand_model.marginal_params, new_demand_model.marginal_params + calibrated_demand_model.marginal_params, + new_demand_model.marginal_params, + atol=1e-4, ) pd.testing.assert_frame_equal( - calibrated_demand_model.correlation, new_demand_model.correlation + calibrated_demand_model.correlation, + new_demand_model.correlation, + atol=1e-4, ) pd.testing.assert_frame_equal( - calibrated_demand_model.empirical_data, new_demand_model.empirical_data + calibrated_demand_model.empirical_data, + new_demand_model.empirical_data, + atol=1e-4, ) # # todo: this currently fails @@ -1931,10 +1946,7 @@ def test__generate_DV_sample(self, repair_model): repair_model._generate_DV_sample(dmg_quantities, 4) - assert ( - repair_model._sample.to_dict() - == expected_sample[(ecods, ecofl)] - ) + assert repair_model._sample.to_dict() == expected_sample[(ecods, ecofl)] # _____ _ _ From d62352d2340858c5af09891fe0b5d922dca011de Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 2 Mar 2024 16:01:40 -0800 Subject: [PATCH 09/48] Replace Scipy's `interp1d` with Numpy's `interp` Using `interp` for these operations is more straightforward and consistent with the rest of the linear interpolation operations in the code base. --- pelicun/uq.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/pelicun/uq.py b/pelicun/uq.py index fefdaf283..4064ca75a 100644 --- a/pelicun/uq.py +++ b/pelicun/uq.py @@ -64,7 +64,6 @@ from scipy.stats._mvn import mvndst # pylint: disable=no-name-in-module from scipy.linalg import cholesky, svd from scipy.optimize import minimize -from scipy.interpolate import interp1d import numpy as np import pandas as pd @@ -1668,9 +1667,8 @@ def cdf(self, values): x_i = [-np.inf] + [x[0] for x in self.theta] + [np.inf] y_i = [0.00] + [x[1] for x in self.theta] + [1.00] - ifun = interp1d(x_i, y_i, kind='linear') - - result = ifun(values) + # Using Numpy's interp for linear interpolation + result = np.interp(values, x_i, y_i, left=0.00, right=1.00) return result @@ -1701,14 +1699,14 @@ def inverse_transform(self, values): x_i = [x[0] for x in self.theta] y_i = [x[1] for x in self.theta] - # define the inverse CDF - ifun = interp1d(y_i, x_i, kind='linear') - # note: by definition, y_i /has/ to include the values - # 0.00 and 1.00, and `values` have to be in the range - # [0.00, 1.00], so there is no need to handle edge cases - # here (i.e., extrapolate). - - result = ifun(values) + # using Numpy's interp for the inverse CDF + # note: by definition, y_i /has/ to include the values 0.00 + # and 1.00, and `values` have to be in the range [0.00, 1.00], + # so there is no need to handle edge cases here (i.e., + # extrapolate). + # note: swapping the roles of x_i and y_i for inverse + # interpolation + result = np.interp(values, y_i, x_i) return result From 22a9e1d10b3e42242f582d611cc86358c6b4611f Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 2 Mar 2024 16:29:13 -0800 Subject: [PATCH 10/48] Consider `auto.py` when linting - [X] Linter checks pass --- .flake8 | 2 +- .pylintrc | 2 +- pelicun/auto.py | 27 ++++++++++++++++----------- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/.flake8 b/.flake8 index 3d2dd9047..c8f6175c9 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,4 @@ [flake8] max-line-length = 85 ignore = E203, E241, E701, W503 -exclude = db.py,auto.py,flycheck*,Hazus_Earthquake_IM.py,Hazus_Earthquake_Story.py,export_DB.py \ No newline at end of file +exclude = db.py,flycheck*,Hazus_Earthquake_IM.py,Hazus_Earthquake_Story.py,export_DB.py \ No newline at end of file diff --git a/.pylintrc b/.pylintrc index bc8ac23d4..511dfb8fa 100644 --- a/.pylintrc +++ b/.pylintrc @@ -6,7 +6,7 @@ init-hook='import sys; sys.path.append("."); sys.path.append("../"); sys.path.ap # Files or directories to be skipped. They should be base names, not # paths. -ignore=auto.py,db.py,flycheck_*.py +ignore=db.py,flycheck_*.py # Add files or directories matching the regex patterns to the ignore-list. The # regex matches against paths and can be in Posix or Windows format. diff --git a/pelicun/auto.py b/pelicun/auto.py index d1fadc03b..df0a75e82 100644 --- a/pelicun/auto.py +++ b/pelicun/auto.py @@ -50,29 +50,35 @@ import sys import importlib -import json from pathlib import Path from . import base -def auto_populate(config, auto_script_path, **kwargs): + +def auto_populate( + config, + auto_script_path, + **kwargs # pylint: disable=unused-argument +): """ - Automatically prepares the DL configuration for a Pelicun calculation. + Automatically populates the DL configuration for a Pelicun + calculation. Parameters ---------- config: dict - Configuration dictionary with a GeneralInformation key that holds - another dictionary with attributes of the asset of interest. + Configuration dictionary with a GeneralInformation key that + holds another dictionary with attributes of the asset of + interest. auto_script_path: string - Path pointing to a python script with the auto-population rules. - Built-in scripts can be referenced using the PelicunDefault/XY format - where XY is the name of the script. + Path pointing to a python script with the auto-population + rules. Built-in scripts can be referenced using the + PelicunDefault/XY format where XY is the name of the script. """ # try to get the AIM attributes AIM = config.get('GeneralInformation', None) - if AIM == None: + if AIM is None: raise ValueError( "No Asset Information provided for the auto-population routine." ) @@ -84,7 +90,7 @@ def auto_populate(config, auto_script_path, **kwargs): # load the auto population module ASP = Path(auto_script_path).resolve() - sys.path.insert(0, str(ASP.parent)+'/') + sys.path.insert(0, str(ASP.parent) + '/') auto_script = importlib.__import__(ASP.name[:-3], globals(), locals(), [], 0) auto_populate_ext = auto_script.auto_populate @@ -97,4 +103,3 @@ def auto_populate(config, auto_script_path, **kwargs): # return the extended config data and the component quantities return config, CMP - From e51e8acaf27f1f7e8bbfda916f7ae24f10c45329 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 2 Mar 2024 16:31:30 -0800 Subject: [PATCH 11/48] Minor formatting --- pelicun/auto.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pelicun/auto.py b/pelicun/auto.py index df0a75e82..e7a1ea864 100644 --- a/pelicun/auto.py +++ b/pelicun/auto.py @@ -56,9 +56,7 @@ def auto_populate( - config, - auto_script_path, - **kwargs # pylint: disable=unused-argument + config, auto_script_path, **kwargs # pylint: disable=unused-argument ): """ Automatically populates the DL configuration for a Pelicun @@ -86,7 +84,8 @@ def auto_populate( # replace default keyword with actual path in auto_script location if 'PelicunDefault/' in auto_script_path: auto_script_path = auto_script_path.replace( - 'PelicunDefault/', f'{base.pelicun_path}/resources/auto/') + 'PelicunDefault/', f'{base.pelicun_path}/resources/auto/' + ) # load the auto population module ASP = Path(auto_script_path).resolve() From dca077a1293c8788306d022c0a3a3e2fea70d44e Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 2 Mar 2024 16:52:40 -0800 Subject: [PATCH 12/48] Add unit tests for `auto.py` --- .coveragerc | 3 - pelicun/tests/test_auto.py | 126 +++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 pelicun/tests/test_auto.py diff --git a/.coveragerc b/.coveragerc index 9aabb0e68..fd87b56d3 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,8 +1,5 @@ [run] omit = db.py - auto.py */tests/* [report] -exclude_lines = - def get_required_resources diff --git a/pelicun/tests/test_auto.py b/pelicun/tests/test_auto.py new file mode 100644 index 000000000..b1b50fa58 --- /dev/null +++ b/pelicun/tests/test_auto.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +These are unit and integration tests on the auto module of pelicun. + +""" + +import pytest +from unittest.mock import patch +from unittest.mock import MagicMock +from pelicun.auto import auto_populate + + +# pylint: disable=missing-function-docstring + +# The tests maintain the order of definitions of the `auto.py` file. + +# _____ _ _ +# | ___| _ _ __ ___| |_(_) ___ _ __ ___ +# | |_ | | | | '_ \ / __| __| |/ _ \| '_ \/ __| +# | _|| |_| | | | | (__| |_| | (_) | | | \__ \ +# |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/ +# +# The following tests verify the functions of the module. + + +@pytest.fixture +def setup_valid_config(): + return {'GeneralInformation': {'someKey': 'someValue'}} + + +@pytest.fixture +def setup_auto_script_path(): + return 'PelicunDefault/test_script' + + +@pytest.fixture +def setup_expected_base_path(): + return '/expected/path/resources/auto/' + + +def test_valid_inputs(setup_valid_config, setup_auto_script_path): + with patch('pelicun.base.pelicun_path', '/expected/path'), patch( + 'os.path.exists', return_value=True + ), patch('importlib.__import__') as mock_import: + mock_auto_populate_ext = MagicMock( + return_value=({'AIM_ap': 'value'}, {'DL_ap': 'value'}, 'CMP') + ) + mock_import.return_value.auto_populate = mock_auto_populate_ext + + config, cmp = auto_populate(setup_valid_config, setup_auto_script_path) + + assert 'DL' in config + assert cmp == 'CMP' + + +def test_missing_general_information(): + with pytest.raises(ValueError) as excinfo: + auto_populate({}, 'some/path') + assert "No Asset Information provided for the auto-population routine." in str( + excinfo.value + ) + + +def test_pelicun_default_path_replacement( + setup_auto_script_path, setup_expected_base_path +): + modified_path = setup_auto_script_path.replace( + 'PelicunDefault/', setup_expected_base_path + ) + assert modified_path.startswith( + setup_expected_base_path + ) + + +def test_auto_population_script_execution( + setup_valid_config, setup_auto_script_path +): + with patch('pelicun.base.pelicun_path', '/expected/path'), patch( + 'os.path.exists', return_value=True + ), patch('importlib.__import__') as mock_import: + mock_auto_populate_ext = MagicMock( + return_value=({'AIM_ap': 'value'}, {'DL_ap': 'value'}, 'CMP') + ) + mock_import.return_value.auto_populate = mock_auto_populate_ext + + auto_populate(setup_valid_config, setup_auto_script_path) + mock_import.assert_called_once() From 5edf5cea45bd41462a953971f089e04c7118de43 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Wed, 6 Mar 2024 04:43:34 -0800 Subject: [PATCH 13/48] Remove block weights (2) --- pelicun/model/damage_model.py | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/pelicun/model/damage_model.py b/pelicun/model/damage_model.py index c9ea9e023..11e75aa88 100644 --- a/pelicun/model/damage_model.py +++ b/pelicun/model/damage_model.py @@ -386,13 +386,10 @@ def map_ds(values, offset=int(ds_id + 1)): cmp_id = PG[0] blocks = PGB.loc[PG, 'Blocks'] - # if the number of blocks is provided, calculate the weights - if np.atleast_1d(blocks).shape[0] == 1: - blocks = np.full(int(blocks), 1.0 / blocks) - # otherwise, assume that the list contains the weights + # Calculate the block weights + blocks = np.full(int(blocks), 1.0 / blocks) # initialize the damaged quantity sample variable - assert self.damage_params is not None if cmp_id in self.damage_params.index: frg_params = self.damage_params.loc[cmp_id, :] @@ -969,13 +966,6 @@ def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): A DataFrame that combines the component quantity and damage state information. - Raises - ------ - ValueError - If the number of blocks is not provided or if the list of - weights does not contain the same number of elements as - the number of blocks. - """ # Log a message indicating that the calculation of damage @@ -1011,12 +1001,9 @@ def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): if 'Blocks' in cmp_params.columns: blocks = cmp_params.loc[PG, 'Blocks'] - # If the number of blocks is specified, calculate the - # weights as the reciprocal of the number of blocks - if np.atleast_1d(blocks).shape[0] == 1: - blocks_array = np.full(int(blocks), 1.0 / blocks) - - # Otherwise, assume that the list contains the weights + # Calculate the weights as the reciprocal of the number of + # blocks + blocks_array = np.full(int(blocks), 1.0 / blocks) block_weights += blocks_array.tolist() # Broadcast the block weights to match the shape of the damage @@ -1333,13 +1320,6 @@ def _get_pg_batches(self, block_batch_size): for pg_i in pg_batch.index: if np.any(np.isin(pg_i, self.damage_params.index)): blocks_i = pg_batch.loc[pg_i, 'Blocks'] - - # If the "Blocks" column contains a list of block - # weights, get the number of blocks from the shape of - # the list. - if np.atleast_1d(blocks_i).shape[0] != 1: - blocks_i = np.atleast_1d(blocks_i).shape[0] - pg_batch.loc[pg_i, 'Blocks'] = blocks_i else: From fbe69c9be3774ceeb23b99dffdbc31a4bdfe4724 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Wed, 6 Mar 2024 05:25:14 -0800 Subject: [PATCH 14/48] remove a simple named function --- pelicun/file_io.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/pelicun/file_io.py b/pelicun/file_io.py index 6ee2ca86b..a171405f0 100644 --- a/pelicun/file_io.py +++ b/pelicun/file_io.py @@ -401,20 +401,11 @@ def load_data( if log: log.msg('Converting units...', prepend_timestamp=False) - # todo lambda - def get_conversion_factor(unit): - """ - Utility function to be used in `map`, handling the case - where unit is NaN and otherwise pulling values from the - `unit_conversion_factors` dictionary. - """ - return ( - 1.00 - if pd.isna(unit) - else unit_conversion_factors.get(unit, 1.00) - ) - - conversion_factors = units.map(get_conversion_factor) + conversion_factors = units.map( + lambda unit: 1.00 + if pd.isna(unit) + else unit_conversion_factors.get(unit, 1.00) + ) if orientation == 1: data.loc[:, numeric_elements] = data.loc[ From 2c7d3caca270b0a1143df595a81967eb8dbc0ea7 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 03:30:43 -0800 Subject: [PATCH 15/48] Default `truncation_limits` for all RV objects This commit adds a default `truncation_limits` argument to all RV objects, even for those that don't support it yet. The default is `nan` in both directions, and for the RVs that don't support truncation, a `NotImplementedError` is raised if the attribute is set to a different value while instantiating the object. This helps eliminate the if/else logic in the `model` module code, where we had to check if the distribution family is one for which we support truncation and not add that argument if it doesn't. Now we rely on the users to avoid specifying truncation limits for families that don't support it, or they will be getting the `NotImplementedError` at runtime. --- pelicun/model/asset_model.py | 36 ++++++++++------------------ pelicun/model/damage_model.py | 19 +++++---------- pelicun/model/demand_model.py | 12 ---------- pelicun/uq.py | 45 ++++++++++++++++++++++++++++++++++- 4 files changed, 62 insertions(+), 50 deletions(-) diff --git a/pelicun/model/asset_model.py b/pelicun/model/asset_model.py index 02206d819..3907ca692 100644 --- a/pelicun/model/asset_model.py +++ b/pelicun/model/asset_model.py @@ -396,31 +396,19 @@ def _create_cmp_RVs(self): # create a random variable and add it to the registry family = getattr(rv_params, "Family", 'deterministic') - if family == 'deterministic': - # no truncation limits - RV_reg.add_RV( - uq.rv_class_map(family)( - name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', - theta=[ - getattr(rv_params, f"Theta_{t_i}", np.nan) - for t_i in range(3) - ], - ) - ) - else: - RV_reg.add_RV( - uq.rv_class_map(family)( - name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', - theta=[ - getattr(rv_params, f"Theta_{t_i}", np.nan) - for t_i in range(3) - ], - truncation_limits=[ - getattr(rv_params, f"Truncate{side}", np.nan) - for side in ("Lower", "Upper") - ], - ) + RV_reg.add_RV( + uq.rv_class_map(family)( + name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', + theta=[ + getattr(rv_params, f"Theta_{t_i}", np.nan) + for t_i in range(3) + ], + truncation_limits=[ + getattr(rv_params, f"Truncate{side}", np.nan) + for side in ("Lower", "Upper") + ], ) + ) self.log_msg( f"\n{self.cmp_marginal_params.shape[0]} random variables created.", diff --git a/pelicun/model/damage_model.py b/pelicun/model/damage_model.py index 11e75aa88..9c3d73b61 100644 --- a/pelicun/model/damage_model.py +++ b/pelicun/model/damage_model.py @@ -485,19 +485,12 @@ def map_ds(values, offset=int(ds_id + 1)): ) ) - if family != 'deterministic': - RV = uq.rv_class_map(family)( - name=frg_rv_tag, - theta=theta, - anchor=anchor, - ) - else: - RV = uq.rv_class_map(family)( - name=frg_rv_tag, - theta=theta, - truncation_limits=tr_lims, - anchor=anchor, - ) + RV = uq.rv_class_map(family)( + name=frg_rv_tag, + theta=theta, + truncation_limits=tr_lims, + anchor=anchor, + ) capacity_RV_reg.add_RV(RV) diff --git a/pelicun/model/demand_model.py b/pelicun/model/demand_model.py index ddc01b2d7..9c50472b4 100644 --- a/pelicun/model/demand_model.py +++ b/pelicun/model/demand_model.py @@ -721,18 +721,6 @@ def _create_RVs(self, preserve_order=False): ) ) - elif family == 'deterministic': - # all other RVs need parameters of their distributions - RV_reg.add_RV( - uq.DeterministicRandomVariable( - name=rv_tag, - theta=[ - getattr(rv_params, f"Theta_{t_i}", np.nan) - for t_i in range(3) - ], - ) - ) - else: # all other RVs need parameters of their distributions RV_reg.add_RV( diff --git a/pelicun/uq.py b/pelicun/uq.py index 4064ca75a..90026cf54 100644 --- a/pelicun/uq.py +++ b/pelicun/uq.py @@ -1586,6 +1586,7 @@ def __init__( self, name, theta, + truncation_limits=np.array((np.nan, np.nan)), f_map=None, anchor=None, ): @@ -1600,6 +1601,9 @@ def __init__( 1.00)). The first Y value has to be 0.00 and the last 1.00 for a valid CDF, and the X_i's as well as the Y_i's should be in increasing order, otherwise an error is raised. + truncation_limits: 2D float ndarray + Not supported for multilinear CDF. + Should be np.array((np.nan, np.nan)) """ super().__init__( @@ -1609,6 +1613,11 @@ def __init__( ) self.distribution = 'multilinear_CDF' + if not np.all(np.isnan(truncation_limits)): + raise NotImplementedError( + f'{self.distribution} RVs do not support truncation' + ) + y_1 = theta[0, 1] if y_1 != 0.00: raise ValueError( @@ -1721,6 +1730,7 @@ def __init__( self, name, raw_samples, + truncation_limits=np.array((np.nan, np.nan)), f_map=None, anchor=None, ): @@ -1731,6 +1741,9 @@ def __init__( ---------- raw_samples: 1D float ndarray Samples from which to draw empirical realizations. + truncation_limits: 2D float ndarray + Not supported for Empirical RVs. + Should be np.array((np.nan, np.nan)) """ @@ -1740,6 +1753,11 @@ def __init__( anchor, ) self.distribution = 'empirical' + if not np.all(np.isnan(truncation_limits)): + raise NotImplementedError( + f'{self.distribution} RVs do not support truncation' + ) + self._raw_samples = np.atleast_1d(raw_samples) def inverse_transform(self, values): @@ -1778,6 +1796,7 @@ def __init__( self, name, raw_samples, + truncation_limits=np.array((np.nan, np.nan)), f_map=None, anchor=None, ): @@ -1788,6 +1807,9 @@ def __init__( ---------- raw_samples: 1D float ndarray Samples from which to draw empirical realizations. + truncation_limits: 2D float ndarray + Not supported for CoupledEmpirical RVs. + Should be np.array((np.nan, np.nan)) """ super().__init__( @@ -1796,6 +1818,11 @@ def __init__( anchor, ) self.distribution = 'coupled_empirical' + if not np.all(np.isnan(truncation_limits)): + raise NotImplementedError( + f'{self.distribution} RVs do not support truncation' + ) + self._raw_samples = np.atleast_1d(raw_samples) def inverse_transform(self, sample_size): @@ -1838,6 +1865,7 @@ def __init__( self, name, theta, + truncation_limits=np.array((np.nan, np.nan)), f_map=None, anchor=None, ): @@ -1850,6 +1878,9 @@ def __init__( ---------- theta: 1-element float ndarray The value. + truncation_limits: 2D float ndarray + Not supported for Deterministic RVs. + Should be np.array((np.nan, np.nan)) """ super().__init__( @@ -1858,6 +1889,11 @@ def __init__( anchor, ) self.distribution = 'deterministic' + if not np.all(np.isnan(truncation_limits)): + raise NotImplementedError( + f'{self.distribution} RVs do not support truncation' + ) + self.theta = np.atleast_1d(theta) def inverse_transform(self, sample_size): @@ -1890,6 +1926,7 @@ def __init__( self, name, theta, + truncation_limits=np.array((np.nan, np.nan)), f_map=None, anchor=None, ): @@ -1902,6 +1939,9 @@ def __init__( Likelihood of each unique event (the last event's likelihood is adjusted automatically to ensure the likelihoods sum up to one) + truncation_limits: 2D float ndarray + Not supported for Multinomial RVs. + Should be np.array((np.nan, np.nan)) """ super().__init__( @@ -1909,8 +1949,11 @@ def __init__( f_map, anchor, ) + if not np.all(np.isnan(truncation_limits)): + raise NotImplementedError( + f'{self.distribution} RVs do not support truncation' + ) self.distribution = 'multinomial' - if np.sum(theta) > 1.00: raise ValueError( f"The set of p values provided for a multinomial " From 0f740c2a1ccf4a271e34c4b6071d4bbe3de65cca Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 04:26:06 -0800 Subject: [PATCH 16/48] Bump pandas version Pandas 2 is out. https://pandas.pydata.org/docs/whatsnew/v2.0.0.html Unit tests and additional trial runs pass with Pandas 2 installed. This commit raises the ceiling for the Pandas version to eliminate environment dependency incompatibilities for the users. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7ae66d053..516d89dba 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ def read(*filenames, **kwargs): install_requires=[ 'numpy>=1.22.0, <2.0', 'scipy>=1.7.0, <2.0', - 'pandas>=1.4.0, <2.0', + 'pandas>=1.4.0, <3.0', 'tables>=3.7.0', ], classifiers=[ From 6aa7166aa451b3bfa5cd84baeb3b79d0ef3adad3 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 05:02:15 -0800 Subject: [PATCH 17/48] Give up checking index/column dtypes in tests Windows sometimes uses different dtypes for ints there, but the values are the same. Instead of explicitly checking the OS, it's simpler to just not expect the dtypes to always match. The represented values should still be equal. --- pelicun/tests/test_base.py | 31 +++++---- pelicun/tests/test_model.py | 121 ++++++++++++++++++++++++++---------- 2 files changed, 107 insertions(+), 45 deletions(-) diff --git a/pelicun/tests/test_base.py b/pelicun/tests/test_base.py index 8a5fc8a9c..0f5a4e5e8 100644 --- a/pelicun/tests/test_base.py +++ b/pelicun/tests/test_base.py @@ -311,14 +311,9 @@ def test_convert_dtypes(): # Convert data types df_result = base.convert_dtypes(df_input) - # Verify dtypes - - if os.name == 'nt': - # Windows sometimes uses int32 and sometimes int64, breaking - # our tests. - df_expected['a'] = df_expected['a'].astype('int64') - - pd.testing.assert_frame_equal(df_result, df_expected) + pd.testing.assert_frame_equal( + df_result, df_expected, check_index_type=False, check_column_type=False + ) # No columns that can be converted @@ -327,7 +322,9 @@ def test_convert_dtypes(): ) df_expected = df_input.copy() df_result = base.convert_dtypes(df_input) - pd.testing.assert_frame_equal(df_result, df_expected) + pd.testing.assert_frame_equal( + df_result, df_expected, check_index_type=False, check_column_type=False + ) # Columns with mixed types @@ -339,21 +336,31 @@ def test_convert_dtypes(): } ) df_result = base.convert_dtypes(df_input) - pd.testing.assert_frame_equal(df_result, df_input) + pd.testing.assert_frame_equal( + df_result, df_input, check_index_type=False, check_column_type=False + ) # None values present df_input = pd.DataFrame({'a': [None, '2', '3'], 'b': ['4.0', None, '6.75']}) df_expected = pd.DataFrame({'a': [np.nan, 2, 3], 'b': [4.0, np.nan, 6.75]}) df_result = base.convert_dtypes(df_input) - pd.testing.assert_frame_equal(df_result, df_expected, check_dtype=False) + pd.testing.assert_frame_equal( + df_result, + df_expected, + check_dtype=False, + check_index_type=False, + check_column_type=False, + ) # Empty dataframe df_input = pd.DataFrame({}) df_expected = pd.DataFrame({}) df_result = base.convert_dtypes(df_input) - pd.testing.assert_frame_equal(df_result, df_expected) + pd.testing.assert_frame_equal( + df_result, df_expected, check_index_type=False, check_column_type=False + ) def test_convert_to_SimpleIndex(): diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index ace45e0ab..158c9d925 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -182,8 +182,17 @@ def test_load_sample(self, demand_model_with_sample, demand_model_with_sample_B) # level. Therefore, the two files are expected to result to the # same `obtained_sample` - pd.testing.assert_frame_equal(obtained_sample, obtained_sample_2) - pd.testing.assert_series_equal(obtained_units, obtained_units_2) + pd.testing.assert_frame_equal( + obtained_sample, + obtained_sample_2, + check_index_type=False, + check_column_type=False, + ) + pd.testing.assert_series_equal( + obtained_units, + obtained_units_2, + check_index_type=False, + ) # compare against the expected values for the sample expected_sample = pd.DataFrame( @@ -201,7 +210,12 @@ def test_load_sample(self, demand_model_with_sample, demand_model_with_sample_B) ), index=[0], ) - pd.testing.assert_frame_equal(expected_sample, obtained_sample) + pd.testing.assert_frame_equal( + expected_sample, + obtained_sample, + check_index_type=False, + check_column_type=False, + ) # compare against the expected values for the units expected_units = pd.Series( @@ -217,7 +231,11 @@ def test_load_sample(self, demand_model_with_sample, demand_model_with_sample_B) ), name='Units', ) - pd.testing.assert_series_equal(expected_units, obtained_units) + pd.testing.assert_series_equal( + expected_units, + obtained_units, + check_index_type=False, + ) def test_estimate_RID(self, demand_model_with_sample): demands = demand_model_with_sample.sample['PID'] @@ -298,16 +316,22 @@ def test_save_load_model_with_empirical( calibrated_demand_model.marginal_params, new_demand_model.marginal_params, atol=1e-4, + check_index_type=False, + check_column_type=False, ) pd.testing.assert_frame_equal( calibrated_demand_model.correlation, new_demand_model.correlation, atol=1e-4, + check_index_type=False, + check_column_type=False, ) pd.testing.assert_frame_equal( calibrated_demand_model.empirical_data, new_demand_model.empirical_data, atol=1e-4, + check_index_type=False, + check_column_type=False, ) # # todo: this currently fails @@ -385,7 +409,12 @@ def test_generate_sample(self, calibrated_demand_model): index=pd.Index((0, 1, 2), dtype='object'), ) pd.testing.assert_frame_equal( - expected_sample, obtained_sample, check_exact=False, atol=1e-4 + expected_sample, + obtained_sample, + check_exact=False, + atol=1e-4, + check_index_type=False, + check_column_type=False, ) # compare against the expected values for the units @@ -402,7 +431,11 @@ def test_generate_sample(self, calibrated_demand_model): ), name='Units', ) - pd.testing.assert_series_equal(expected_units, obtained_units) + pd.testing.assert_series_equal( + expected_units, + obtained_units, + check_index_type=False, + ) def test_generate_sample_with_demand_cloning(self, assessment_instance): # # used for debugging @@ -551,7 +584,9 @@ def test_convert_marginal_params(self, pelicun_model): ), ) - pd.testing.assert_frame_equal(expected_df, res) + pd.testing.assert_frame_equal( + expected_df, res, check_index_type=False, check_column_type=False + ) # a case with arg_units marginal_params = pd.DataFrame( @@ -584,7 +619,9 @@ def test_convert_marginal_params(self, pelicun_model): names=('cmp', 'loc', 'dir'), ), ) - pd.testing.assert_frame_equal(expected_df, res) + pd.testing.assert_frame_equal( + expected_df, res, check_index_type=False, check_column_type=False + ) class TestAssetModel(TestPelicunModel): @@ -664,23 +701,22 @@ def test_load_cmp_model_1(self, asset_model): ), ) - if os.name == 'nt': - expected_cmp_marginal_params['Blocks'] = expected_cmp_marginal_params[ - 'Blocks' - ].astype('int32') - pd.testing.assert_frame_equal( - expected_cmp_marginal_params, asset_model.cmp_marginal_params - ) - else: - pd.testing.assert_frame_equal( - expected_cmp_marginal_params, asset_model.cmp_marginal_params - ) + pd.testing.assert_frame_equal( + expected_cmp_marginal_params, + asset_model.cmp_marginal_params, + check_index_type=False, + check_column_type=False, + ) expected_cmp_units = pd.Series( data=['ea'], index=['component_a'], name='Units' ) - pd.testing.assert_series_equal(expected_cmp_units, asset_model.cmp_units) + pd.testing.assert_series_equal( + expected_cmp_units, + asset_model.cmp_units, + check_index_type=False, + ) def test_load_cmp_model_2(self, asset_model): # component marginals utilizing the keywords '--', 'all', 'top', 'roof' @@ -738,7 +774,11 @@ def test_load_cmp_model_2(self, asset_model): name='Units', ) - pd.testing.assert_series_equal(expected_cmp_units, asset_model.cmp_units) + pd.testing.assert_series_equal( + expected_cmp_units, + asset_model.cmp_units, + check_index_type=False, + ) def test_load_cmp_model_csv(self, asset_model): # load by directly specifying the csv file @@ -801,7 +841,12 @@ def test_generate_cmp_sample(self, asset_model): ), ) - pd.testing.assert_frame_equal(expected_cmp_sample, asset_model.cmp_sample) + pd.testing.assert_frame_equal( + expected_cmp_sample, + asset_model.cmp_sample, + check_index_type=False, + check_column_type=False, + ) # currently this is not working # def test_load_cmp_model_block_weights(self, asset_model): @@ -1022,7 +1067,12 @@ def test_save_load_sample(self, damage_model_with_sample, assessment_instance): # saving to a variable sample_from_variable = damage_model_with_sample.save_sample(save_units=False) - pd.testing.assert_frame_equal(sample_from_file, sample_from_variable) + pd.testing.assert_frame_equal( + sample_from_file, + sample_from_variable, + check_index_type=False, + check_column_type=False, + ) _, units_from_variable = damage_model_with_sample.save_sample( save_units=True ) @@ -1235,12 +1285,6 @@ def test__generate_dmg_sample(self, damage_model_model_loaded): assert list(res.index) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - assert capacity_sample.to_numpy().dtype == np.dtype('float64') - if os.name == 'nt': - assert lsds_sample.to_numpy().dtype == np.dtype('int32') - else: - assert lsds_sample.to_numpy().dtype == np.dtype('int64') - def test__get_required_demand_type(self, damage_model_model_loaded): pg_batch = damage_model_model_loaded._get_pg_batches(block_batch_size=1) batches = pg_batch.index.get_level_values(0).unique() @@ -1444,7 +1488,9 @@ def test__get_pg_batches_2(self, damage_model_model_loaded): columns=('Blocks',), ).astype('Int64') - pd.testing.assert_frame_equal(expected_res, res) + pd.testing.assert_frame_equal( + expected_res, res, check_index_type=False, check_column_type=False + ) res = damage_model_model_loaded._get_pg_batches(block_batch_size=1000) expected_res = pd.DataFrame( @@ -1461,7 +1507,9 @@ def test__get_pg_batches_2(self, damage_model_model_loaded): columns=('Blocks',), ).astype('Int64') - pd.testing.assert_frame_equal(expected_res, res) + pd.testing.assert_frame_equal( + expected_res, res, check_index_type=False, check_column_type=False + ) def test_calculate(self, damage_model_with_sample): # note: Due to inherent randomness, we can't assert the actual @@ -1587,12 +1635,19 @@ def test_load_sample_save_sample(self, loss_model): loss_model.load_sample(sample) - pd.testing.assert_frame_equal(sample, loss_model._sample) + pd.testing.assert_frame_equal( + sample, + loss_model._sample, + check_index_type=False, + check_column_type=False, + ) output = loss_model.save_sample(None) output.index = output.index.astype('int64') - pd.testing.assert_frame_equal(sample, output) + pd.testing.assert_frame_equal( + sample, output, check_index_type=False, check_column_type=False + ) def test_load_model(self, loss_model): data_path_1 = pd.DataFrame( From fc17b873a5983bf50c9f60ce1f73b0ef800c5b99 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 05:05:55 -0800 Subject: [PATCH 18/48] Add a few more keywords --- .pylintrc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pylintrc b/.pylintrc index 511dfb8fa..db32e7392 100644 --- a/.pylintrc +++ b/.pylintrc @@ -143,7 +143,7 @@ logging-format-style=old [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. -notes=FIXME,XXX,TODO +notes=FIXME,XXX,TODO,todo,debug # Regular expression of note tags to take in consideration. #notes-rgx= From 5cf0ede9f00c390f7557b5093ba231550142cfa6 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 05:13:27 -0800 Subject: [PATCH 19/48] Explicit dtype assignment in test --- pelicun/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pelicun/tests/test_base.py b/pelicun/tests/test_base.py index 0f5a4e5e8..7a921198b 100644 --- a/pelicun/tests/test_base.py +++ b/pelicun/tests/test_base.py @@ -305,7 +305,7 @@ def test_convert_dtypes(): # Expected DataFrame df_expected = pd.DataFrame({'a': [1, 2, 3], 'b': [4.0, 5.5, 6.75]}).astype( - {'a': int, 'b': float} + {'a': 'int64', 'b': 'float64'} ) # Convert data types From 361096bf80039fcec1c3693da1dccb36272e9d99 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 05:20:56 -0800 Subject: [PATCH 20/48] Explicit dtype assignment (2) --- pelicun/tests/test_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index 158c9d925..2b9244ac3 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -699,7 +699,7 @@ def test_load_cmp_model_1(self, asset_model): ), names=('cmp', 'loc', 'dir', 'uid'), ), - ) + ).astype({'Theta_0': 'float64', 'Blocks': 'int64'}) pd.testing.assert_frame_equal( expected_cmp_marginal_params, From 3f94ac93cd5a82a43a54f95d3f84435d091914e0 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 05:38:49 -0800 Subject: [PATCH 21/48] Explicit dtype assignment (3) --- pelicun/tests/test_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index 2b9244ac3..8cddf29ba 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -706,6 +706,7 @@ def test_load_cmp_model_1(self, asset_model): asset_model.cmp_marginal_params, check_index_type=False, check_column_type=False, + check_dtype=False ) expected_cmp_units = pd.Series( From fdb5eb22a3402b2836bfb56f5913affeb84a7602 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 05:39:14 -0800 Subject: [PATCH 22/48] Reformatting & Linting Begin including `Hazus_Earthquake_IM.py`, `Hazus_Earthquake_Story.py`, and `export_DB.py` in flake8 checks. --- .flake8 | 2 +- pelicun/resources/auto/Hazus_Earthquake_IM.py | 311 +++++++----------- .../resources/auto/Hazus_Earthquake_Story.py | 146 ++++---- pelicun/tools/export_DB.py | 2 +- 4 files changed, 203 insertions(+), 258 deletions(-) diff --git a/.flake8 b/.flake8 index c8f6175c9..06c55845f 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,4 @@ [flake8] max-line-length = 85 ignore = E203, E241, E701, W503 -exclude = db.py,flycheck*,Hazus_Earthquake_IM.py,Hazus_Earthquake_Story.py,export_DB.py \ No newline at end of file +exclude = db.py,flycheck* \ No newline at end of file diff --git a/pelicun/resources/auto/Hazus_Earthquake_IM.py b/pelicun/resources/auto/Hazus_Earthquake_IM.py index 1c80c2abc..75f2ba7e0 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_IM.py +++ b/pelicun/resources/auto/Hazus_Earthquake_IM.py @@ -39,19 +39,16 @@ import pandas as pd -ap_DesignLevel = { - 1940: 'PC', - 1940: 'LC', - 1975: 'MC', - 2100: 'HC' -} +ap_DesignLevel = {1940: 'LC', 1975: 'MC', 2100: 'HC'} +# original: +# ap_DesignLevel = {1940: 'PC', 1940: 'LC', 1975: 'MC', 2100: 'HC'} +# Note that the duplicated key is ignored, and Python keeps the last +# entry. -ap_DesignLevel_W1 = { - 0: 'PC', - 0: 'LC', - 1975: 'MC', - 2100: 'HC' -} +ap_DesignLevel_W1 = {0: 'LC', 1975: 'MC', 2100: 'HC'} +# original: +# ap_DesignLevel_W1 = {0: 'PC', 0: 'LC', 1975: 'MC', 2100: 'HC'} +# same thing applies ap_Occupancy = { 'Other/Unknown': 'RES3', @@ -66,26 +63,33 @@ 'Industrial - Warehouse': 'IND2', 'Industrial - Heavy': 'IND1', 'Retail': 'COM1', - 'Parking' : 'COM10' + 'Parking': 'COM10', } -def convertBridgeToHAZUSclass(AIM): - #TODO: replace labels in AIM with standard CamelCase versions +def convertBridgeToHAZUSclass(AIM): + # TODO: replace labels in AIM with standard CamelCase versions structureType = AIM["BridgeClass"] - # if type(structureType)== str and len(structureType)>3 and structureType[:3] == "HWB" and 0 < int(structureType[3:]) and 29 > int(structureType[3:]): + # if ( + # type(structureType) == str + # and len(structureType) > 3 + # and structureType[:3] == "HWB" + # and 0 < int(structureType[3:]) + # and 29 > int(structureType[3:]) + # ): # return AIM["bridge_class"] state = AIM["StateCode"] - yr_built = AIM["YearBuilt"] + yr_built = AIM["YearBuilt"] num_span = AIM["NumOfSpans"] - len_max_span = AIM["MaxSpanLength"] + len_max_span = AIM["MaxSpanLength"] - seismic = ((int(state)==6 and int(yr_built)>=1975) or - (int(state)!=6 and int(yr_built)>=1990)) + seismic = (int(state) == 6 and int(yr_built) >= 1975) or ( + int(state) != 6 and int(yr_built) >= 1990 + ) # Use a catch-all, other class by default bridge_class = "HWB28" - + if len_max_span > 150: if not seismic: bridge_class = "HWB1" @@ -98,7 +102,7 @@ def convertBridgeToHAZUSclass(AIM): else: bridge_class = "HWB4" - elif structureType in list(range(101,107)): + elif structureType in list(range(101, 107)): if not seismic: if state != 6: bridge_class = "HWB5" @@ -107,21 +111,21 @@ def convertBridgeToHAZUSclass(AIM): else: bridge_class = "HWB7" - elif structureType in [205,206]: + elif structureType in [205, 206]: if not seismic: bridge_class = "HWB8" else: bridge_class = "HWB9" - elif structureType in list(range(201,207)): + elif structureType in list(range(201, 207)): if not seismic: bridge_class = "HWB10" else: bridge_class = "HWB11" - elif structureType in list(range(301,307)): + elif structureType in list(range(301, 307)): if not seismic: - if len_max_span>=20: + if len_max_span >= 20: if state != 6: bridge_class = "HWB12" else: @@ -134,9 +138,9 @@ def convertBridgeToHAZUSclass(AIM): else: bridge_class = "HWB14" - elif structureType in list(range(402,411)): + elif structureType in list(range(402, 411)): if not seismic: - if len_max_span>=20: + if len_max_span >= 20: bridge_class = "HWB15" elif state != 6: bridge_class = "HWB26" @@ -145,7 +149,7 @@ def convertBridgeToHAZUSclass(AIM): else: bridge_class = "HWB16" - elif structureType in list(range(501,507)): + elif structureType in list(range(501, 507)): if not seismic: if state != 6: bridge_class = "HWB17" @@ -154,130 +158,63 @@ def convertBridgeToHAZUSclass(AIM): else: bridge_class = "HWB19" - elif structureType in [605,606]: + elif structureType in [605, 606]: if not seismic: bridge_class = "HWB20" else: bridge_class = "HWB21" - elif structureType in list(range(601,608)): + elif structureType in list(range(601, 608)): if not seismic: bridge_class = "HWB22" else: bridge_class = "HWB23" - - - #TODO: review and add HWB24-27 rules - #TODO: also double check rules for HWB10-11 and HWB22-23 + + # TODO: review and add HWB24-27 rules + # TODO: also double check rules for HWB10-11 and HWB22-23 return bridge_class - # original code by JZ - """ - if not seismic and len_max_span > 150: - return "HWB1" - elif seismic and len_max_span > 150: - return "HWB2" - elif not seismic and num_span == 1: - return "HWB3" - elif seismic and num_span == 1: - return "HWB4" - elif not seismic and 101 <= structureType and structureType <= 106 and state != 6: - return "HWB5" - elif not seismic and 101 <= structureType and structureType <= 106 and state ==6: - return "HWB6" - elif seismic and 101 <= structureType and structureType <= 106: - return "HWB7" - elif not seismic and 205 <= structureType and structureType <= 206: - return "HWB8" - elif seismic and 205 <= structureType and structureType <= 206: - return "HWB9" - elif not seismic and 201 <= structureType and structureType <= 206: - return "HWB10" - elif seismic and 201 <= structureType and structureType <= 206: - return "HWB11" - elif not seismic and 301 <= structureType and structureType <= 306 and state != 6: - return "HWB12" - elif not seismic and 301 <= structureType and structureType <= 306 and state == 6: - return "HWB13" - elif seismic and 301 <= structureType and structureType <= 306: - return "HWB14" - elif not seismic and 402 <= structureType and structureType <= 410: - return "HWB15" - elif seismic and 402 <= structureType and structureType <= 410: - return "HWB16" - elif not seismic and 501 <= structureType and structureType <= 506 and state != 6: - return "HWB17" - elif not seismic and 501 <= structureType and structureType <= 506 and state == 6: - return "HWB18" - elif seismic and 501 <= structureType and structureType <= 506: - return "HWB19" - elif not seismic and 605 <= structureType and structureType <= 606: - return "HWB20" - elif seismic and 605 <= structureType and structureType <= 606: - return "HWB21" - elif not seismic and 601 <= structureType and structureType <= 607: - return "HWB22" - elif seismic and 601 <= structureType and structureType <= 607: - return "HWB23" - - elif not seismic and 301 <= structureType and structureType <= 306 and state != 6: - return "HWB24" - elif not seismic and 301 <= structureType and structureType <= 306 and state == 6: - return "HWB25" - elif not seismic and 402 <= structureType and structureType <= 410 and state != 6: - return "HWB26" - elif not seismic and 402 <= structureType and structureType <= 410 and state == 6: - return "HWB27" - else: - return "HWB28" - """ - def convertTunnelToHAZUSclass(AIM): - if ("Bored" in AIM["ConstructType"]) or ("Drilled" in AIM["ConstructType"]): return "HTU1" elif ("Cut" in AIM["ConstructType"]) or ("Cover" in AIM["ConstructType"]): return "HTU2" else: - # Select HTU2 for unclassfied tunnels because it is more conservative. - return "HTU2" + # Select HTU2 for unclassfied tunnels because it is more conservative. + return "HTU2" -def convertRoadToHAZUSclass(AIM): +def convertRoadToHAZUSclass(AIM): if AIM["RoadType"] in ["Primary", "Secondary"]: return "HRD1" - elif AIM["RoadType"]=="Residential": + elif AIM["RoadType"] == "Residential": return "HRD2" else: # many unclassified roads are urban roads - return "HRD2" - -def convert_story_rise(structureType, stories): + return "HRD2" +def convert_story_rise(structureType, stories): if structureType in ['W1', 'W2', 'S3', 'PC1', 'MH']: - # These archetypes have no rise information in their IDs rise = None else: - # First, check if we have valid story information try: - stories = int(stories) - except: - - raise ValueError('Missing "NumberOfStories" information, ' - 'cannot infer rise attribute of archetype') + except (ValueError, TypeError): + raise ValueError( + 'Missing "NumberOfStories" information, ' + 'cannot infer `rise` attribute of archetype' + ) if structureType == 'RM1': - if stories <= 3: rise = "L" @@ -291,9 +228,18 @@ def convert_story_rise(structureType, stories): else: rise = "M" - elif structureType in ['S1', 'S2', 'S4', 'S5', 'C1', 'C2', 'C3', \ - 'PC2', 'RM2']: - if stories <=3: + elif structureType in [ + 'S1', + 'S2', + 'S4', + 'S5', + 'C1', + 'C2', + 'C3', + 'PC2', + 'RM2', + ]: + if stories <= 3: rise = "L" elif stories <= 7: @@ -301,9 +247,10 @@ def convert_story_rise(structureType, stories): else: rise = "H" - + return rise + def auto_populate(AIM): """ Automatically creates a performance model for PGA-based Hazus EQ analysis. @@ -311,30 +258,30 @@ def auto_populate(AIM): Parameters ---------- AIM: dict - Asset Information Model - provides features of the asset that can be + Asset Information Model - provides features of the asset that can be used to infer attributes of the performance model. Returns ------- GI_ap: dict - Extended General Information - extends the GI from the input AIM with - additional inferred features. These features are typically used in - intermediate steps during the auto-population and are not required - for the performance assessment. They are returned to allow reviewing + Extended General Information - extends the GI from the input AIM with + additional inferred features. These features are typically used in + intermediate steps during the auto-population and are not required + for the performance assessment. They are returned to allow reviewing how these latent variables affect the final results. DL_ap: dict - Damage and Loss parameters - these define the performance model and + Damage and Loss parameters - these define the performance model and details of the calculation. CMP: DataFrame - Component assignment - Defines the components (in rows) and their + Component assignment - Defines the components (in rows) and their location, direction, and quantity (in columns). """ # extract the General Information GI = AIM.get('GeneralInformation', None) - if GI==None: - #TODO: show an error message + if GI is None: + # TODO: show an error message pass # initialize the auto-populated GI @@ -343,15 +290,14 @@ def auto_populate(AIM): assetType = AIM["assetType"] ground_failure = AIM["Applications"]["DL"]["ApplicationData"]["ground_failure"] - if assetType=="Buildings": - + if assetType == "Buildings": # get the building parameters - bt = GI['StructureType'] #building type + bt = GI['StructureType'] # building type # get the design level dl = GI.get('DesignLevel', None) - if dl == None: + if dl is None: # If there is no DesignLevel provided, we assume that the YearBuilt is # available year_built = GI['YearBuilt'] @@ -360,10 +306,10 @@ def auto_populate(AIM): DesignL = ap_DesignLevel_W1 else: DesignL = ap_DesignLevel - + for year in sorted(DesignL.keys()): if year_built <= year: - dl = DesignL[year] + dl = DesignL[year] break GI_ap['DesignLevel'] = dl @@ -381,24 +327,24 @@ def auto_populate(AIM): else: LF = f'LF.{bt}.{dl}' - CMP = pd.DataFrame( - {f'{LF}': [ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T + {f'{LF}': ['ea', 1, 1, 1, 'N/A']}, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], + ).T # if needed, add components to simulate damage from ground failure if ground_failure: - foundation_type = 'S' FG_GF_H = f'GF.H.{foundation_type}' FG_GF_V = f'GF.V.{foundation_type}' - + CMP_GF = pd.DataFrame( - {f'{FG_GF_H}':[ 'ea', 1, 1, 1, 'N/A'], - f'{FG_GF_V}':[ 'ea', 1, 3, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] + { + f'{FG_GF_H}': ['ea', 1, 1, 1, 'N/A'], + f'{FG_GF_V}': ['ea', 1, 3, 1, 'N/A'], + }, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], ).T CMP = pd.concat([CMP, CMP_GF], axis=0) @@ -412,42 +358,39 @@ def auto_populate(AIM): ot = ap_Occupancy[GI['OccupancyClass']] else: ot = GI['OccupancyClass'] - + DL_ap = { "Asset": { "ComponentAssignmentFile": "CMP_QNT.csv", "ComponentDatabase": "Hazus Earthquake - Buildings", "NumberOfStories": f"{stories}", "OccupancyType": f"{ot}", - "PlanArea": "1" - }, - "Damage": { - "DamageProcess": "Hazus Earthquake" - }, - "Demands": { + "PlanArea": "1", }, + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, "Losses": { "Repair": { "ConsequenceDatabase": "Hazus Earthquake - Buildings", - "MapApproach": "Automatic" + "MapApproach": "Automatic", } - } + }, } elif assetType == "TransportationNetwork": - inf_type = GI["assetSubtype"] - - if inf_type == "HwyBridge": + if inf_type == "HwyBridge": # get the bridge class bt = convertBridgeToHAZUSclass(GI) GI_ap['BridgeHazusClass'] = bt CMP = pd.DataFrame( - {f'HWB.GS.{bt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], - f'HWB.GF': [ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] + { + f'HWB.GS.{bt[3:]}': ['ea', 1, 1, 1, 'N/A'], + 'HWB.GF': ['ea', 1, 1, 1, 'N/A'], + }, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], ).T DL_ap = { @@ -455,31 +398,29 @@ def auto_populate(AIM): "ComponentAssignmentFile": "CMP_QNT.csv", "ComponentDatabase": "Hazus Earthquake - Transportation", "BridgeHazusClass": bt, - "PlanArea": "1" - }, - "Damage": { - "DamageProcess": "Hazus Earthquake" - }, - "Demands": { + "PlanArea": "1", }, + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, "Losses": { "Repair": { "ConsequenceDatabase": "Hazus Earthquake - Transportation", - "MapApproach": "Automatic" + "MapApproach": "Automatic", } - } + }, } elif inf_type == "HwyTunnel": - # get the tunnel class tt = convertTunnelToHAZUSclass(GI) GI_ap['TunnelHazusClass'] = tt CMP = pd.DataFrame( - {f'HTU.GS.{tt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], - f'HTU.GF': [ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] + { + f'HTU.GS.{tt[3:]}': ['ea', 1, 1, 1, 'N/A'], + 'HTU.GF': ['ea', 1, 1, 1, 'N/A'], + }, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], ).T DL_ap = { @@ -487,29 +428,25 @@ def auto_populate(AIM): "ComponentAssignmentFile": "CMP_QNT.csv", "ComponentDatabase": "Hazus Earthquake - Transportation", "TunnelHazusClass": tt, - "PlanArea": "1" - }, - "Damage": { - "DamageProcess": "Hazus Earthquake" - }, - "Demands": { + "PlanArea": "1", }, + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, "Losses": { "Repair": { "ConsequenceDatabase": "Hazus Earthquake - Transportation", - "MapApproach": "Automatic" + "MapApproach": "Automatic", } - } + }, } elif inf_type == "Roadway": - # get the road class rt = convertRoadToHAZUSclass(GI) GI_ap['RoadHazusClass'] = rt CMP = pd.DataFrame( - {f'HRD.GF.{rt[3:]}':[ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] + {f'HRD.GF.{rt[3:]}': ['ea', 1, 1, 1, 'N/A']}, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], ).T DL_ap = { @@ -517,23 +454,23 @@ def auto_populate(AIM): "ComponentAssignmentFile": "CMP_QNT.csv", "ComponentDatabase": "Hazus Earthquake - Transportation", "RoadHazusClass": rt, - "PlanArea": "1" - }, - "Damage": { - "DamageProcess": "Hazus Earthquake" - }, - "Demands": { + "PlanArea": "1", }, + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, "Losses": { "Repair": { "ConsequenceDatabase": "Hazus Earthquake - Transportation", - "MapApproach": "Automatic" + "MapApproach": "Automatic", } - } + }, } else: print("subtype not supported in HWY") else: - print(f"AssetType: {assetType} is not supported in Hazus Earthquake IM DL method") + print( + f"AssetType: {assetType} is not supported " + f"in Hazus Earthquake IM DL method" + ) return GI_ap, DL_ap, CMP diff --git a/pelicun/resources/auto/Hazus_Earthquake_Story.py b/pelicun/resources/auto/Hazus_Earthquake_Story.py index d44f59f41..ef72836ea 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_Story.py +++ b/pelicun/resources/auto/Hazus_Earthquake_Story.py @@ -39,19 +39,11 @@ import pandas as pd -ap_DesignLevel = { - 1940: 'PC', - 1940: 'LC', - 1975: 'MC', - 2100: 'HC' -} +ap_DesignLevel = {1940: 'LC', 1975: 'MC', 2100: 'HC'} +# ap_DesignLevel = {1940: 'PC', 1940: 'LC', 1975: 'MC', 2100: 'HC'} -ap_DesignLevel_W1 = { - 0: 'PC', - 0: 'LC', - 1975: 'MC', - 2100: 'HC' -} +ap_DesignLevel_W1 = {0: 'LC', 1975: 'MC', 2100: 'HC'} +# ap_DesignLevel_W1 = {0: 'PC', 0: 'LC', 1975: 'MC', 2100: 'HC'} ap_Occupancy = { 'Other/Unknown': 'RES3', @@ -66,15 +58,16 @@ 'Industrial - Warehouse': 'IND2', 'Industrial - Heavy': 'IND1', 'Retail': 'COM1', - 'Parking' : 'COM10' + 'Parking': 'COM10', } convert_design_level = { - 'High-Code' : 'HC', - 'Moderate-Code': 'MC', - 'Low-Code' : 'LC', - 'Pre-Code' : 'PC' - } + 'High-Code': 'HC', + 'Moderate-Code': 'MC', + 'Low-Code': 'LC', + 'Pre-Code': 'PC', +} + def story_scale(stories, comp_type): if comp_type == 'NSA': @@ -97,7 +90,7 @@ def story_scale(stories, comp_type): elif stories == 9: return 2.20 elif (stories >= 10) and (stories < 30): - return 2.30 + (stories-10)*0.04 + return 2.30 + (stories - 10) * 0.04 elif stories >= 30: return 3.10 else: @@ -123,7 +116,7 @@ def story_scale(stories, comp_type): elif stories == 9: return 4.50 elif (stories >= 10) and (stories < 50): - return 4.50 + (stories-10)*0.07 + return 4.50 + (stories - 10) * 0.07 elif stories >= 50: return 7.30 else: @@ -137,30 +130,30 @@ def auto_populate(AIM): Parameters ---------- AIM: dict - Asset Information Model - provides features of the asset that can be + Asset Information Model - provides features of the asset that can be used to infer attributes of the performance model. Returns ------- GI_ap: dict - Extended General Information - extends the GI from the input AIM with - additional inferred features. These features are typically used in - intermediate steps during the auto-population and are not required - for the performance assessment. They are returned to allow reviewing + Extended General Information - extends the GI from the input AIM with + additional inferred features. These features are typically used in + intermediate steps during the auto-population and are not required + for the performance assessment. They are returned to allow reviewing how these latent variables affect the final results. DL_ap: dict - Damage and Loss parameters - these define the performance model and + Damage and Loss parameters - these define the performance model and details of the calculation. CMP: DataFrame - Component assignment - Defines the components (in rows) and their + Component assignment - Defines the components (in rows) and their location, direction, and quantity (in columns). """ # extract the General Information GI = AIM.get('GeneralInformation', None) - if GI==None: - #TODO: show an error message + if GI is None: + # TODO: show an error message pass # initialize the auto-populated GI @@ -169,15 +162,14 @@ def auto_populate(AIM): assetType = AIM["assetType"] ground_failure = AIM["Applications"]["DL"]["ApplicationData"]["ground_failure"] - if assetType=="Buildings": - + if assetType == "Buildings": # get the building parameters - bt = GI['StructureType'] #building type + bt = GI['StructureType'] # building type # get the design level dl = GI.get('DesignLevel', None) - if dl == None: + if dl is None: # If there is no DesignLevel provided, we assume that the YearBuilt is # available year_built = GI['YearBuilt'] @@ -186,10 +178,10 @@ def auto_populate(AIM): DesignL = ap_DesignLevel_W1 else: DesignL = ap_DesignLevel - + for year in sorted(DesignL.keys()): if year_built <= year: - dl = DesignL[year] + dl = DesignL[year] break GI_ap['DesignLevel'] = dl @@ -198,35 +190,53 @@ def auto_populate(AIM): stories = GI.get('NumberOfStories', None) FG_S = f'STR.{bt}.{dl}' - FG_NSD = f'NSD' + FG_NSD = 'NSD' FG_NSA = f'NSA.{dl}' CMP = pd.DataFrame( - {f'{FG_S}': ['ea', 'all', '1, 2', f"{story_scale(stories, 'S')/stories/2.}", 'N/A'], - f'{FG_NSA}': ['ea', 'all', 0, f"{story_scale(stories, 'NSA')/stories}", 'N/A'], - f'{FG_NSD}': ['ea', 'all', '1, 2', f"{story_scale(stories, 'NSD')/stories/2.}", 'N/A']}, - index = ['Units','Location','Direction', - 'Theta_0','Family'] - - ).T + { + f'{FG_S}': [ + 'ea', + 'all', + '1, 2', + f"{story_scale(stories, 'S')/stories/2.}", + 'N/A', + ], + f'{FG_NSA}': [ + 'ea', + 'all', + 0, + f"{story_scale(stories, 'NSA')/stories}", + 'N/A', + ], + f'{FG_NSD}': [ + 'ea', + 'all', + '1, 2', + f"{story_scale(stories, 'NSD')/stories/2.}", + 'N/A', + ], + }, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], + ).T # if needed, add components to simulate damage from ground failure if ground_failure: - foundation_type = 'S' FG_GF_H = f'GF.H.{foundation_type}' FG_GF_V = f'GF.V.{foundation_type}' - + CMP_GF = pd.DataFrame( - {f'{FG_GF_H}':[ 'ea', 1, 1, 1, 'N/A'], - f'{FG_GF_V}':[ 'ea', 1, 3, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] + { + f'{FG_GF_H}': ['ea', 1, 1, 1, 'N/A'], + f'{FG_GF_V}': ['ea', 1, 3, 1, 'N/A'], + }, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], ).T CMP = pd.concat([CMP, CMP_GF], axis=0) - # get the occupancy class if GI['OccupancyClass'] in ap_Occupancy.keys(): ot = ap_Occupancy[GI['OccupancyClass']] @@ -236,15 +246,15 @@ def auto_populate(AIM): plan_area = GI.get('PlanArea', 1.0) repair_config = { - "ConsequenceDatabase": "Hazus Earthquake - Stories", - "MapApproach": "Automatic", - "DecisionVariables": { - "Cost": True, - "Carbon": False, - "Energy": False, - "Time": False - } - } + "ConsequenceDatabase": "Hazus Earthquake - Stories", + "MapApproach": "Automatic", + "DecisionVariables": { + "Cost": True, + "Carbon": False, + "Energy": False, + "Time": False, + }, + } DL_ap = { "Asset": { @@ -252,19 +262,17 @@ def auto_populate(AIM): "ComponentDatabase": "Hazus Earthquake - Stories", "NumberOfStories": f"{stories}", "OccupancyType": f"{ot}", - "PlanArea": str(plan_area) - }, - "Damage": { - "DamageProcess": "Hazus Earthquake" + "PlanArea": str(plan_area), }, - "Demands": { - }, - "Losses": { - "Repair": repair_config - } + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, + "Losses": {"Repair": repair_config}, } - + else: - print(f"AssetType: {assetType} is not supported in Hazus Earthquake Story-based DL method") + print( + f"AssetType: {assetType} is not supported " + f"in Hazus Earthquake Story-based DL method" + ) return GI_ap, DL_ap, CMP diff --git a/pelicun/tools/export_DB.py b/pelicun/tools/export_DB.py index 1ec0647ff..d1f87f4d1 100644 --- a/pelicun/tools/export_DB.py +++ b/pelicun/tools/export_DB.py @@ -82,7 +82,7 @@ def export_DB(data_path, target_dir): encoding='utf-8') as f: json.dump(pop_dict, f, indent=2) - except: + except (ValueError, NotImplementedError, FileNotFoundError): pass From bd42efa3c1f65496261c41c5777d304512012a96 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 06:13:00 -0800 Subject: [PATCH 23/48] Reformating & Linting Getting ready to work on `db.py` --- .flake8 | 2 +- .pylintrc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.flake8 b/.flake8 index 06c55845f..54d60194d 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,4 @@ [flake8] max-line-length = 85 ignore = E203, E241, E701, W503 -exclude = db.py,flycheck* \ No newline at end of file +exclude = flycheck* \ No newline at end of file diff --git a/.pylintrc b/.pylintrc index db32e7392..01bccf6ed 100644 --- a/.pylintrc +++ b/.pylintrc @@ -6,7 +6,7 @@ init-hook='import sys; sys.path.append("."); sys.path.append("../"); sys.path.ap # Files or directories to be skipped. They should be base names, not # paths. -ignore=db.py,flycheck_*.py +ignore=flycheck_* # Add files or directories matching the regex patterns to the ignore-list. The # regex matches against paths and can be in Posix or Windows format. From 0e9fff8f75eb71422c6e799bb42b617bd2d5e7ef Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 06:13:53 -0800 Subject: [PATCH 24/48] Refactoring & Linting Reorder imports, silence `too-many...` warnings --- pelicun/db.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pelicun/db.py b/pelicun/db.py index dd0b99ed6..9f527f9f0 100644 --- a/pelicun/db.py +++ b/pelicun/db.py @@ -57,11 +57,11 @@ import re import json +from pathlib import Path +from copy import deepcopy import numpy as np from scipy.stats import norm import pandas as pd -from pathlib import Path -from copy import deepcopy from . import base from .uq import fit_distribution_to_percentiles @@ -69,6 +69,9 @@ idx = base.idx +# pylint: disable=too-many-statements +# pylint: disable=too-many-locals + def parse_DS_Hierarchy(DSH): """ Parses the FEMA P58 DS hierarchy into a set of arrays. From 40fc5fc915bdb328a9090b4e49fef9c2b8f0c463 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 06:15:44 -0800 Subject: [PATCH 25/48] Refactoring & Linting Specify encoding --- pelicun/db.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pelicun/db.py b/pelicun/db.py index 9f527f9f0..808b889a6 100644 --- a/pelicun/db.py +++ b/pelicun/db.py @@ -132,7 +132,7 @@ def create_FEMA_P58_fragility_db(source_file, # parse the extra metadata file if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: + with open(meta_file, 'r', encoding='utf-8') as f: frag_meta = json.load(f) else: frag_meta = {} @@ -1780,7 +1780,7 @@ def create_Hazus_EQ_fragility_db(source_file, # parse the extra metadata file if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: + with open(meta_file, 'r', encoding='utf-8') as f: frag_meta = json.load(f) else: frag_meta = {} @@ -2206,7 +2206,7 @@ def create_Hazus_EQ_fragility_db(source_file, df_db.to_csv(target_data_file) # save the metadata - with open(target_meta_file, 'w+') as f: + with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) print("Successfully parsed and saved the fragility data from Hazus EQ") @@ -2254,7 +2254,7 @@ def create_Hazus_EQ_repair_db(source_file, # parse the extra metadata file if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: + with open(meta_file, 'r', encoding='utf-8') as f: frag_meta = json.load(f) else: frag_meta = {} @@ -2498,7 +2498,7 @@ def create_Hazus_EQ_repair_db(source_file, df_db.to_csv(target_data_file) # save the metadata - later - with open(target_meta_file, 'w+') as f: + with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) print("Successfully parsed and saved the repair consequence data from Hazus " From fbbdae6bc074c480afda0c72bef83607d5132805 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 06:17:45 -0800 Subject: [PATCH 26/48] Refactoring & Linting Remove `== True` after `pd.isna(...)` --- pelicun/db.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pelicun/db.py b/pelicun/db.py index 808b889a6..d4b7a3c42 100644 --- a/pelicun/db.py +++ b/pelicun/db.py @@ -386,7 +386,7 @@ def create_FEMA_P58_fragility_db(source_file, ds_id = ds[2] repair_action = cmp_meta[f"DS_{ds_id}_Repair_Description"] - if pd.isna(repair_action) == True: + if pd.isna(repair_action): repair_action = "" ls_meta.update({f"DS{ds_id}": { @@ -420,8 +420,10 @@ def create_FEMA_P58_fragility_db(source_file, ds_pure_id = ds_map[::-1].find('1') + 1 - repair_action = cmp_meta[f"DS_{ds_pure_id}_Repair_Description"] - if pd.isna(repair_action) == True: + repair_action = cmp_meta[ + f"DS_{ds_pure_id}_Repair_Description" + ] + if pd.isna(repair_action): repair_action = "" ls_meta.update({f"DS{ds_id}": { @@ -465,7 +467,7 @@ def create_FEMA_P58_fragility_db(source_file, theta_1 = getattr(cmp, f"DS_{ds_id}_Total_Dispersion_Beta") repair_action = cmp_meta[f"DS_{ds_id}_Repair_Description"] - if pd.isna(repair_action) == True: + if pd.isna(repair_action): repair_action = "" ls_meta.update({f"DS{ds_id}": { @@ -925,7 +927,7 @@ def create_FEMA_P58_repair_db( ds_pure_id = ds_map[::-1].find('1') + 1 repair_action = cmp_meta[f"DS_{ds_pure_id}_Repair_Description"] - if pd.isna(repair_action) == True: + if pd.isna(repair_action): repair_action = "" meta_data['DamageStates'].update({f"DS{DS_i}": { @@ -979,7 +981,7 @@ def create_FEMA_P58_repair_db( incomplete_cost = True repair_action = cmp_meta[f"DS_{DS_i}_Repair_Description"] - if pd.isna(repair_action) == True: + if pd.isna(repair_action): repair_action = "" meta_data['DamageStates'].update({ From 00815c560f726634011ff37cb0aff2aac5eed6a6 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 06:19:05 -0800 Subject: [PATCH 27/48] Refactoring & Linting Using `is` for `None` checks --- pelicun/db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pelicun/db.py b/pelicun/db.py index d4b7a3c42..1a6f02e73 100644 --- a/pelicun/db.py +++ b/pelicun/db.py @@ -1892,7 +1892,7 @@ def create_Hazus_EQ_fragility_db(source_file, df_db.loc[counter, 'Demand-Offset'] = 0 # add metadata - if hc != None: + if hc is not None: cmp_meta = { "Description": ( frag_meta['Meta']['Collections']['STR']['Description']+", "+ @@ -2074,7 +2074,7 @@ def create_Hazus_EQ_fragility_db(source_file, df_db.loc[counter, 'Demand-Offset'] = 0 # add metadata - if hc != None: + if hc is not None: cmp_meta = { "Description": ( frag_meta['Meta']['Collections']['LF']['Description']+", "+ From c061ac46a7079973fae5dda3a0dd462fb60a0349 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 9 Mar 2024 06:22:16 -0800 Subject: [PATCH 28/48] Refactoring & Linting - Code reformatting - Remove unused `target_meta_file` from `create_Hazus_EQ_bldg_injury_db` --- pelicun/db.py | 1498 +++++++++++++++++++++++++++++-------------------- 1 file changed, 874 insertions(+), 624 deletions(-) diff --git a/pelicun/db.py b/pelicun/db.py index 1a6f02e73..2cad7faaf 100644 --- a/pelicun/db.py +++ b/pelicun/db.py @@ -72,6 +72,7 @@ # pylint: disable=too-many-statements # pylint: disable=too-many-locals + def parse_DS_Hierarchy(DSH): """ Parses the FEMA P58 DS hierarchy into a set of arrays. @@ -87,18 +88,20 @@ def parse_DS_Hierarchy(DSH): DSH = DSH[4:] elif DSH[:5] in {'MutEx', 'Simul'}: closing_pos = DSH.find(')') - subDSH = DSH[:closing_pos + 1] - DSH = DSH[closing_pos + 2:] + subDSH = DSH[: closing_pos + 1] + DSH = DSH[closing_pos + 2 :] DS_setup.append([subDSH[:5]] + subDSH[6:-1].split(',')) return DS_setup -def create_FEMA_P58_fragility_db(source_file, - meta_file='', - target_data_file='damage_DB_FEMA_P58_2nd.csv', - target_meta_file='damage_DB_FEMA_P58_2nd.json'): +def create_FEMA_P58_fragility_db( + source_file, + meta_file='', + target_data_file='damage_DB_FEMA_P58_2nd.csv', + target_meta_file='damage_DB_FEMA_P58_2nd.json', +): """ Create a fragility parameter database based on the FEMA P58 data @@ -126,9 +129,14 @@ def create_FEMA_P58_fragility_db(source_file, """ # parse the source file - df = pd.read_excel(source_file, sheet_name='Summary', header=2, index_col=1, - true_values=["YES", "Yes", "yes"], - false_values=["NO", "No", "no"]) + df = pd.read_excel( + source_file, + sheet_name='Summary', + header=2, + index_col=1, + true_values=["YES", "Yes", "yes"], + false_values=["NO", "No", "no"], + ) # parse the extra metadata file if Path(meta_file).is_file(): @@ -233,10 +241,10 @@ def create_FEMA_P58_fragility_db(source_file, "LS4-Family", "LS4-Theta_0", "LS4-Theta_1", - "LS4-DamageStateWeights" + "LS4-DamageStateWeights", ], index=df_db_source.index, - dtype=float + dtype=float, ) # initialize the dictionary that stores the fragility metadata @@ -244,7 +252,6 @@ def create_FEMA_P58_fragility_db(source_file, # add the general information to the meta dict if "_GeneralInformation" in frag_meta.keys(): - frag_meta = frag_meta["_GeneralInformation"] # remove the decision variable part from the general info @@ -252,9 +259,6 @@ def create_FEMA_P58_fragility_db(source_file, meta_dict.update({"_GeneralInformation": frag_meta}) - - - # conversion dictionary for demand types convert_demand_type = { 'Story Drift Ratio': "Peak Interstory Drift Ratio", @@ -262,7 +266,7 @@ def create_FEMA_P58_fragility_db(source_file, 'Effective Drift': "Peak Effective Drift Ratio", 'Link Beam Chord Rotation': "Peak Link Beam Chord Rotation", 'Peak Floor Acceleration': "Peak Floor Acceleration", - 'Peak Floor Velocity': "Peak Floor Velocity" + 'Peak Floor Velocity': "Peak Floor Velocity", } # conversion dictionary for demand unit names @@ -270,14 +274,13 @@ def create_FEMA_P58_fragility_db(source_file, 'Unit less': 'unitless', 'Radians': 'rad', 'g': 'g', - 'meter/sec': 'mps' + 'meter/sec': 'mps', } # for each component... # (this approach is not efficient, but easy to follow which was considered # more important than efficiency.) for cmp in df_db_source.itertuples(): - # create a dotted component index ID = cmp.Index.split('.') cmpID = f'{ID[0][0]}.{ID[0][1:3]}.{ID[0][3:5]}.{ID[1]}' @@ -289,14 +292,16 @@ def create_FEMA_P58_fragility_db(source_file, incomplete = False # store demand specifications - df_db.loc[cmp.Index, 'Demand-Type'] = ( - convert_demand_type[cmp.Demand_Parameter_value]) - df_db.loc[cmp.Index, 'Demand-Unit'] = ( - convert_demand_unit[cmp.Demand_Parameter_unit]) - df_db.loc[cmp.Index, 'Demand-Offset'] = ( - int(cmp.Demand_Location_use_floor_above_YesNo)) - df_db.loc[cmp.Index, 'Demand-Directional'] = ( - int(cmp.Directional)) + df_db.loc[cmp.Index, 'Demand-Type'] = convert_demand_type[ + cmp.Demand_Parameter_value + ] + df_db.loc[cmp.Index, 'Demand-Unit'] = convert_demand_unit[ + cmp.Demand_Parameter_unit + ] + df_db.loc[cmp.Index, 'Demand-Offset'] = int( + cmp.Demand_Location_use_floor_above_YesNo + ) + df_db.loc[cmp.Index, 'Demand-Directional'] = int(cmp.Directional) # parse the damage state hierarchy DS_setup = parse_DS_Hierarchy(cmp.DS_Hierarchy) @@ -315,13 +320,20 @@ def create_FEMA_P58_fragility_db(source_file, # the additional fields are added to the description if they exist if cmp_meta['Construction_Quality'] != 'Not Specified': - comments += f'\nConstruction Quality: ' \ - f'{cmp_meta["Construction_Quality"]}' + comments += ( + f'\nConstruction Quality: {cmp_meta["Construction_Quality"]}' + ) if cmp_meta['Seismic_Installation_Conditions'] not in [ - 'Not Specified', 'Not applicable', 'Unknown', 'Any']: - comments += f'\nSeismic Installation Conditions: ' \ - f'{cmp_meta["Seismic_Installation_Conditions"]}' + 'Not Specified', + 'Not applicable', + 'Unknown', + 'Any', + ]: + comments += ( + f'\nSeismic Installation Conditions: ' + f'{cmp_meta["Seismic_Installation_Conditions"]}' + ) if cmp_meta['Comments__Notes'] != 'None': comments += f'\nNotes: {cmp_meta["Comments__Notes"]}' @@ -337,12 +349,11 @@ def create_FEMA_P58_fragility_db(source_file, "Comments": comments, "SuggestedComponentBlockSize": ' '.join(block_size), "RoundUpToIntegerQuantity": cmp_meta['Round_to_Integer_Unit'], - "LimitStates": {} + "LimitStates": {}, } # now look at each Limit State for LS_i, LS_contents in enumerate(DS_setup): - LS_i = LS_i + 1 LS_contents = np.atleast_1d(LS_contents) @@ -350,36 +361,39 @@ def create_FEMA_P58_fragility_db(source_file, # start with the special cases with multiple DSs in an LS if LS_contents[0] in {'MutEx', 'Simul'}: - # collect the fragility data for the member DSs median_demands = [] dispersions = [] weights = [] for ds in LS_contents[1:]: - median_demands.append( - getattr(cmp, f"DS_{ds[2]}_Median_Demand")) + median_demands.append(getattr(cmp, f"DS_{ds[2]}_Median_Demand")) dispersions.append( - getattr(cmp, f"DS_{ds[2]}_Total_Dispersion_Beta")) + getattr(cmp, f"DS_{ds[2]}_Total_Dispersion_Beta") + ) weights.append(getattr(cmp, f"DS_{ds[2]}_Probability")) # make sure the specified distribution parameters are appropriate - if ((np.unique(median_demands).size != 1) or ( - np.unique(dispersions).size != 1)): - raise ValueError(f"Incorrect mutually exclusive DS " - f"definition in component {cmp.Index} at " - f"Limit State {LS_i}") + if (np.unique(median_demands).size != 1) or ( + np.unique(dispersions).size != 1 + ): + raise ValueError( + f"Incorrect mutually exclusive DS " + f"definition in component {cmp.Index} at " + f"Limit State {LS_i}" + ) if LS_contents[0] == 'MutEx': - # in mutually exclusive cases, make sure the specified DS # weights sum up to one np.testing.assert_allclose( - np.sum(np.array(weights, dtype=float)), 1.0, + np.sum(np.array(weights, dtype=float)), + 1.0, err_msg=f"Mutually exclusive Damage State weights do " - f"not sum to 1.0 in component {cmp.Index} at " - f"Limit State {LS_i}") + f"not sum to 1.0 in component {cmp.Index} at " + f"Limit State {LS_i}", + ) # and save all DS metadata under this Limit State for ds in LS_contents[1:]: @@ -389,10 +403,16 @@ def create_FEMA_P58_fragility_db(source_file, if pd.isna(repair_action): repair_action = "" - ls_meta.update({f"DS{ds_id}": { - "Description": cmp_meta[f"DS_{ds_id}_Description"], - "RepairAction": repair_action - }}) + ls_meta.update( + { + f"DS{ds_id}": { + "Description": cmp_meta[ + f"DS_{ds_id}_Description" + ], + "RepairAction": repair_action, + } + } + ) else: # in simultaneous cases, convert simultaneous weights into @@ -405,10 +425,16 @@ def create_FEMA_P58_fragility_db(source_file, for ds_id in range(1, ds_count + 1): ds_map = format(ds_id, f'0{sim_ds_count}b') - sim_weights.append(np.product( - [weights[ds_i] - if ds_map[-ds_i - 1] == '1' else 1.0-weights[ds_i] - for ds_i in range(sim_ds_count)])) + sim_weights.append( + np.product( + [ + weights[ds_i] + if ds_map[-ds_i - 1] == '1' + else 1.0 - weights[ds_i] + for ds_i in range(sim_ds_count) + ] + ) + ) # save ds metadata - we need to be clever here # the original metadata is saved for the pure cases @@ -417,7 +443,6 @@ def create_FEMA_P58_fragility_db(source_file, # combination of pure DSs they represent if ds_map.count('1') == 1: - ds_pure_id = ds_map[::-1].find('1') + 1 repair_action = cmp_meta[ @@ -426,23 +451,33 @@ def create_FEMA_P58_fragility_db(source_file, if pd.isna(repair_action): repair_action = "" - ls_meta.update({f"DS{ds_id}": { - "Description": f"Pure DS{ds_pure_id}. " + - cmp_meta[f"DS_{ds_pure_id}_Description"], - "RepairAction": repair_action - }}) + ls_meta.update( + { + f"DS{ds_id}": { + "Description": f"Pure DS{ds_pure_id}. " + + cmp_meta[f"DS_{ds_pure_id}_Description"], + "RepairAction": repair_action, + } + } + ) else: + ds_combo = [ + f'DS{_.start() + 1}' + for _ in re.finditer('1', ds_map[::-1]) + ] - ds_combo = [f'DS{_.start() + 1}' - for _ in re.finditer('1', ds_map[::-1])] - - ls_meta.update({f"DS{ds_id}": { - "Description": 'Combination of ' + - ' & '.join(ds_combo), - "RepairAction": 'Combination of pure DS repair ' - 'actions.' - }}) + ls_meta.update( + { + f"DS{ds_id}": { + "Description": 'Combination of ' + + ' & '.join(ds_combo), + "RepairAction": ( + 'Combination of pure DS repair actions.' + ), + } + } + ) # adjust weights to respect the assumption that at least # one DS will occur (i.e., the case with all DSs returning @@ -459,7 +494,6 @@ def create_FEMA_P58_fragility_db(source_file, # then look at the sequential DS cases elif LS_contents[0].startswith('DS'): - # this is straightforward, store the data in the table and dict ds_id = LS_contents[0][2] @@ -470,10 +504,14 @@ def create_FEMA_P58_fragility_db(source_file, if pd.isna(repair_action): repair_action = "" - ls_meta.update({f"DS{ds_id}": { - "Description": cmp_meta[f"DS_{ds_id}_Description"], - "RepairAction": repair_action - }}) + ls_meta.update( + { + f"DS{ds_id}": { + "Description": cmp_meta[f"DS_{ds_id}_Description"], + "RepairAction": repair_action, + } + } + ) # FEMA P58 assumes lognormal distribution for every fragility df_db.loc[cmp.Index, f'LS{LS_i}-Family'] = 'lognormal' @@ -521,10 +559,11 @@ def create_FEMA_P58_fragility_db(source_file, def create_FEMA_P58_repair_db( - source_file, - meta_file='', - target_data_file='loss_repair_DB_FEMA_P58_2nd.csv', - target_meta_file='loss_repair_DB_FEMA_P58_2nd.json'): + source_file, + meta_file='', + target_data_file='loss_repair_DB_FEMA_P58_2nd.csv', + target_meta_file='loss_repair_DB_FEMA_P58_2nd.json', +): """ Create a repair consequence parameter database based on the FEMA P58 data @@ -548,12 +587,16 @@ def create_FEMA_P58_repair_db( # parse the source file df = pd.concat( - [pd.read_excel(source_file, sheet_name=sheet, header=2, index_col=1) - for sheet in ('Summary', 'Cost Summary', 'Env Summary')], axis=1) + [ + pd.read_excel(source_file, sheet_name=sheet, header=2, index_col=1) + for sheet in ('Summary', 'Cost Summary', 'Env Summary') + ], + axis=1, + ) # parse the extra metadata file if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: + with open(meta_file, 'r', encoding='utf-8') as f: frag_meta = json.load(f) else: frag_meta = {} @@ -579,7 +622,6 @@ def create_FEMA_P58_repair_db( f"Lower Qty Cutoff, DS{DS_i}", f"Upper Qty Cutoff, DS{DS_i}", f"CV / Dispersion, DS{DS_i}", - f"Best Fit, DS{DS_i}.1", f"Lower Qty Mean, DS{DS_i}.1", f"Upper Qty Mean, DS{DS_i}.1", @@ -587,7 +629,6 @@ def create_FEMA_P58_repair_db( f"Upper Qty Cutoff, DS{DS_i}.1", f"CV / Dispersion, DS{DS_i}.2", f"DS {DS_i}, Long Lead Time", - f'Repair Cost, p10, DS{DS_i}', f'Repair Cost, p50, DS{DS_i}', f'Repair Cost, p90, DS{DS_i}', @@ -596,14 +637,11 @@ def create_FEMA_P58_repair_db( f'Time, p90, DS{DS_i}', f'Mean Value, DS{DS_i}', f'Mean Value, DS{DS_i}.1', - # Columns added for the Environmental loss f"DS{DS_i} Best Fit", f"DS{DS_i} CV or Beta", - f"DS{DS_i} Best Fit.1", f"DS{DS_i} CV or Beta.1", - f"DS{DS_i} Embodied Carbon (kg CO2eq)", f"DS{DS_i} Embodied Energy (MJ)", ] @@ -672,32 +710,23 @@ def create_FEMA_P58_repair_db( DVs = ['Cost', 'Time', 'Carbon', 'Energy'] df_MI = pd.MultiIndex.from_product([comps, DVs], names=['ID', 'DV']) - df_db = pd.DataFrame( - columns=out_cols, - index=df_MI, - dtype=float - ) + df_db = pd.DataFrame(columns=out_cols, index=df_MI, dtype=float) # initialize the dictionary that stores the loss metadata meta_dict = {} # add the general information to the meta dict if "_GeneralInformation" in frag_meta.keys(): - frag_meta = frag_meta["_GeneralInformation"] meta_dict.update({"_GeneralInformation": frag_meta}) - convert_family = { - 'LogNormal': 'lognormal', - 'Normal': 'normal' - } + convert_family = {'LogNormal': 'lognormal', 'Normal': 'normal'} # for each component... # (this approach is not efficient, but easy to follow which was considered # more important than efficiency.) for cmp in df_db_source.itertuples(): - ID = cmp.Index.split('.') cmpID = f'{ID[0][0]}.{ID[0][1:3]}.{ID[0][3:5]}.{ID[1]}' @@ -712,8 +741,9 @@ def create_FEMA_P58_repair_db( # store units - df_db.loc[cmp.Index, 'Quantity-Unit'] = ( - ' '.join(cmp.Fragility_Unit_of_Measure.split(' ')[::-1]).strip()) + df_db.loc[cmp.Index, 'Quantity-Unit'] = ' '.join( + cmp.Fragility_Unit_of_Measure.split(' ')[::-1] + ).strip() df_db.loc[(cmp.Index, 'Cost'), 'DV-Unit'] = "USD_2011" df_db.loc[(cmp.Index, 'Time'), 'DV-Unit'] = "worker_day" df_db.loc[(cmp.Index, 'Carbon'), 'DV-Unit'] = "kg" @@ -732,13 +762,20 @@ def create_FEMA_P58_repair_db( # the additional fields are added to the description if they exist if cmp_meta['Construction_Quality'] != 'Not Specified': - comments += f'\nConstruction Quality: ' \ - f'{cmp_meta["Construction_Quality"]}' + comments += ( + f'\nConstruction Quality: ' f'{cmp_meta["Construction_Quality"]}' + ) if cmp_meta['Seismic_Installation_Conditions'] not in [ - 'Not Specified', 'Not applicable', 'Unknown', 'Any']: - comments += f'\nSeismic Installation Conditions: ' \ - f'{cmp_meta["Seismic_Installation_Conditions"]}' + 'Not Specified', + 'Not applicable', + 'Unknown', + 'Any', + ]: + comments += ( + f'\nSeismic Installation Conditions: ' + f'{cmp_meta["Seismic_Installation_Conditions"]}' + ) if cmp_meta['Comments__Notes'] != 'None': comments += f'\nNotes: {cmp_meta["Comments__Notes"]}' @@ -755,12 +792,11 @@ def create_FEMA_P58_repair_db( "SuggestedComponentBlockSize": ' '.join(block_size), "RoundUpToIntegerQuantity": cmp_meta['Round_to_Integer_Unit'], "ControllingDemand": "Damage Quantity", - "DamageStates": {} + "DamageStates": {}, } # Handle components with simultaneous damage states separately if 'Simul' in cmp.DS_Hierarchy: - # Note that we are assuming that all damage states are triggered by # a single limit state in these components. # This assumption holds for the second edition of FEMA P58, but it @@ -773,53 +809,76 @@ def create_FEMA_P58_repair_db( # get the p10, p50, and p90 estimates for all damage states for DS_i in range(1, 6): - if not pd.isna(getattr(cmp, f'Repair_Cost_p10_DS{DS_i}')): - - cost_est.update({f'DS{DS_i}': np.array([ - getattr(cmp, f'Repair_Cost_p10_DS{DS_i}'), - getattr(cmp, f'Repair_Cost_p50_DS{DS_i}'), - getattr(cmp, f'Repair_Cost_p90_DS{DS_i}'), - getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}'), - getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}') - ])}) - - time_est.update({f'DS{DS_i}': np.array([ - getattr(cmp, f'Time_p10_DS{DS_i}'), - getattr(cmp, f'Time_p50_DS{DS_i}'), - getattr(cmp, f'Time_p90_DS{DS_i}'), - getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}_1'), - getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}_1'), - int(getattr(cmp, f'DS_{DS_i}_Long_Lead_Time') == 'YES') - ])}) + cost_est.update( + { + f'DS{DS_i}': np.array( + [ + getattr(cmp, f'Repair_Cost_p10_DS{DS_i}'), + getattr(cmp, f'Repair_Cost_p50_DS{DS_i}'), + getattr(cmp, f'Repair_Cost_p90_DS{DS_i}'), + getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}'), + getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}'), + ] + ) + } + ) + + time_est.update( + { + f'DS{DS_i}': np.array( + [ + getattr(cmp, f'Time_p10_DS{DS_i}'), + getattr(cmp, f'Time_p50_DS{DS_i}'), + getattr(cmp, f'Time_p90_DS{DS_i}'), + getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}_1'), + getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}_1'), + int( + getattr(cmp, f'DS_{DS_i}_Long_Lead_Time') + == 'YES' + ), + ] + ) + } + ) if not pd.isna(getattr(cmp, f'DS{DS_i}_Embodied_Carbon_kg_CO2eq')): - theta_0, theta_1, family = [ getattr(cmp, f'DS{DS_i}_Embodied_Carbon_kg_CO2eq'), getattr(cmp, f'DS{DS_i}_CV_or_Beta'), - getattr(cmp, f'DS{DS_i}_Best_Fit') + getattr(cmp, f'DS{DS_i}_Best_Fit'), ] if family == 'Normal': - p10, p50, p90 = norm.ppf([0.1, 0.5, 0.9], loc=theta_0, scale=theta_0 * theta_1) + p10, p50, p90 = norm.ppf( + [0.1, 0.5, 0.9], loc=theta_0, scale=theta_0 * theta_1 + ) elif family == 'LogNormal': - p10, p50, p90 = np.exp(norm.ppf([0.1, 0.5, 0.9], loc=np.log(theta_0), scale=theta_1)) + p10, p50, p90 = np.exp( + norm.ppf( + [0.1, 0.5, 0.9], loc=np.log(theta_0), scale=theta_1 + ) + ) carbon_est.update({f'DS{DS_i}': np.array([p10, p50, p90])}) if not pd.isna(getattr(cmp, f'DS{DS_i}_Embodied_Energy_MJ')): - theta_0, theta_1, family = [ getattr(cmp, f'DS{DS_i}_Embodied_Energy_MJ'), getattr(cmp, f'DS{DS_i}_CV_or_Beta_1'), - getattr(cmp, f'DS{DS_i}_Best_Fit_1') + getattr(cmp, f'DS{DS_i}_Best_Fit_1'), ] if family == 'Normal': - p10, p50, p90 = norm.ppf([0.1, 0.5, 0.9], loc=theta_0, scale=theta_0 * theta_1) + p10, p50, p90 = norm.ppf( + [0.1, 0.5, 0.9], loc=theta_0, scale=theta_0 * theta_1 + ) elif family == 'LogNormal': - p10, p50, p90 = np.exp(norm.ppf([0.1, 0.5, 0.9], loc=np.log(theta_0), scale=theta_1)) + p10, p50, p90 = np.exp( + norm.ppf( + [0.1, 0.5, 0.9], loc=np.log(theta_0), scale=theta_1 + ) + ) energy_est.update({f'DS{DS_i}': np.array([p10, p50, p90])}) @@ -830,47 +889,78 @@ def create_FEMA_P58_repair_db( for DS_i in range(1, ds_count + 1): ds_map = format(DS_i, f'0{sim_ds_count}b') - cost_vals = np.sum([cost_est[f'DS{ds_i + 1}'] - if ds_map[-ds_i - 1] == '1' else np.zeros(5) - for ds_i in range(sim_ds_count)], - axis=0) - - time_vals = np.sum([time_est[f'DS{ds_i + 1}'] - if ds_map[-ds_i - 1] == '1' else np.zeros(6) - for ds_i in range(sim_ds_count)], - axis=0) - - carbon_vals = np.sum([carbon_est[f'DS{ds_i + 1}'] - if ds_map[-ds_i - 1] == '1' else np.zeros(3) - for ds_i in range(sim_ds_count)], - axis=0) - - energy_vals = np.sum([energy_est[f'DS{ds_i + 1}'] - if ds_map[-ds_i - 1] == '1' else np.zeros(3) - for ds_i in range(sim_ds_count)], - axis=0) + cost_vals = np.sum( + [ + cost_est[f'DS{ds_i + 1}'] + if ds_map[-ds_i - 1] == '1' + else np.zeros(5) + for ds_i in range(sim_ds_count) + ], + axis=0, + ) + + time_vals = np.sum( + [ + time_est[f'DS{ds_i + 1}'] + if ds_map[-ds_i - 1] == '1' + else np.zeros(6) + for ds_i in range(sim_ds_count) + ], + axis=0, + ) + + carbon_vals = np.sum( + [ + carbon_est[f'DS{ds_i + 1}'] + if ds_map[-ds_i - 1] == '1' + else np.zeros(3) + for ds_i in range(sim_ds_count) + ], + axis=0, + ) + + energy_vals = np.sum( + [ + energy_est[f'DS{ds_i + 1}'] + if ds_map[-ds_i - 1] == '1' + else np.zeros(3) + for ds_i in range(sim_ds_count) + ], + axis=0, + ) # fit a distribution family_hat, theta_hat = fit_distribution_to_percentiles( - cost_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal']) + cost_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal'] + ) cost_theta = theta_hat if family_hat == 'normal': cost_theta[1] = cost_theta[1] / cost_theta[0] - time_theta = [time_vals[1], - np.sqrt(cost_theta[1] ** 2.0 + 0.25 ** 2.0)] + time_theta = [ + time_vals[1], + np.sqrt(cost_theta[1] ** 2.0 + 0.25**2.0), + ] # fit distributions to environmental impact consequences - family_hat_carbon, theta_hat_carbon = fit_distribution_to_percentiles( - carbon_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal']) + ( + family_hat_carbon, + theta_hat_carbon, + ) = fit_distribution_to_percentiles( + carbon_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal'] + ) carbon_theta = theta_hat_carbon if family_hat_carbon == 'normal': carbon_theta[1] = carbon_theta[1] / carbon_theta[0] - family_hat_energy, theta_hat_energy = fit_distribution_to_percentiles( - energy_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal']) + ( + family_hat_energy, + theta_hat_energy, + ) = fit_distribution_to_percentiles( + energy_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal'] + ) energy_theta = theta_hat_energy if family_hat_energy == 'normal': @@ -890,92 +980,115 @@ def create_FEMA_P58_repair_db( df_db.loc[(cmp.Index, 'Cost'), f'DS{DS_i}-Theta_0'] = ( f"{cost_vals[3]:g},{cost_vals[4]:g}|" - f"{cost_qnt_low:g},{cost_qnt_up:g}") + f"{cost_qnt_low:g},{cost_qnt_up:g}" + ) - df_db.loc[(cmp.Index, 'Cost'), - f'DS{DS_i}-Theta_1'] = f"{cost_theta[1]:g}" + df_db.loc[ + (cmp.Index, 'Cost'), f'DS{DS_i}-Theta_1' + ] = f"{cost_theta[1]:g}" df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-Family'] = family_hat df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-Theta_0'] = ( f"{time_vals[3]:g},{time_vals[4]:g}|" - f"{time_qnt_low:g},{time_qnt_up:g}") + f"{time_qnt_low:g},{time_qnt_up:g}" + ) - df_db.loc[(cmp.Index, 'Time'), - f'DS{DS_i}-Theta_1'] = f"{time_theta[1]:g}" - - df_db.loc[(cmp.Index, 'Time'), - f'DS{DS_i}-LongLeadTime'] = int(time_vals[5] > 0) + df_db.loc[ + (cmp.Index, 'Time'), f'DS{DS_i}-Theta_1' + ] = f"{time_theta[1]:g}" + df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-LongLeadTime'] = int( + time_vals[5] > 0 + ) - df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Family'] = family_hat_carbon + df_db.loc[ + (cmp.Index, 'Carbon'), f'DS{DS_i}-Family' + ] = family_hat_carbon - df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_0'] = f"{carbon_theta[0]:g}" + df_db.loc[ + (cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_0' + ] = f"{carbon_theta[0]:g}" - df_db.loc[(cmp.Index, 'Carbon'), - f'DS{DS_i}-Theta_1'] = f"{carbon_theta[1]:g}" + df_db.loc[ + (cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_1' + ] = f"{carbon_theta[1]:g}" - df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Family'] = family_hat_energy + df_db.loc[ + (cmp.Index, 'Energy'), f'DS{DS_i}-Family' + ] = family_hat_energy - df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Theta_0'] = f"{energy_theta[0]:g}" + df_db.loc[ + (cmp.Index, 'Energy'), f'DS{DS_i}-Theta_0' + ] = f"{energy_theta[0]:g}" - df_db.loc[(cmp.Index, 'Energy'), - f'DS{DS_i}-Theta_1'] = f"{energy_theta[1]:g}" + df_db.loc[ + (cmp.Index, 'Energy'), f'DS{DS_i}-Theta_1' + ] = f"{energy_theta[1]:g}" if ds_map.count('1') == 1: - ds_pure_id = ds_map[::-1].find('1') + 1 repair_action = cmp_meta[f"DS_{ds_pure_id}_Repair_Description"] if pd.isna(repair_action): repair_action = "" - meta_data['DamageStates'].update({f"DS{DS_i}": { - "Description": f"Pure DS{ds_pure_id}. " + - cmp_meta[f"DS_{ds_pure_id}_Description"], - "RepairAction": repair_action - }}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": f"Pure DS{ds_pure_id}. " + + cmp_meta[f"DS_{ds_pure_id}_Description"], + "RepairAction": repair_action, + } + } + ) else: + ds_combo = [ + f'DS{_.start() + 1}' for _ in re.finditer('1', ds_map[::-1]) + ] - ds_combo = [f'DS{_.start() + 1}' - for _ in re.finditer('1', ds_map[::-1])] - - meta_data['DamageStates'].update({f"DS{DS_i}": { - "Description": 'Combination of ' + - ' & '.join(ds_combo), - "RepairAction": 'Combination of pure DS repair ' - 'actions.' - }}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": 'Combination of ' + + ' & '.join(ds_combo), + "RepairAction": 'Combination of pure DS repair ' + 'actions.', + } + } + ) # for every other component... else: # now look at each Damage State for DS_i in range(1, 6): - # cost if not pd.isna(getattr(cmp, f'Best_Fit_DS{DS_i}')): - df_db.loc[(cmp.Index, 'Cost'), f'DS{DS_i}-Family'] = ( - convert_family[getattr(cmp, f'Best_Fit_DS{DS_i}')]) + df_db.loc[ + (cmp.Index, 'Cost'), f'DS{DS_i}-Family' + ] = convert_family[getattr(cmp, f'Best_Fit_DS{DS_i}')] if not pd.isna(getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}')): - theta_0_low = getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}') theta_0_up = getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}') qnt_low = getattr(cmp, f'Lower_Qty_Cutoff_DS{DS_i}') qnt_up = getattr(cmp, f'Upper_Qty_Cutoff_DS{DS_i}') - if theta_0_low == 0. and theta_0_up == 0.: - df_db.loc[(cmp.Index, 'Cost'), - f'DS{DS_i}-Family'] = np.nan + if theta_0_low == 0.0 and theta_0_up == 0.0: + df_db.loc[ + (cmp.Index, 'Cost'), f'DS{DS_i}-Family' + ] = np.nan else: df_db.loc[(cmp.Index, 'Cost'), f'DS{DS_i}-Theta_0'] = ( f"{theta_0_low:g},{theta_0_up:g}|" - f"{qnt_low:g},{qnt_up:g}") + f"{qnt_low:g},{qnt_up:g}" + ) - df_db.loc[(cmp.Index, 'Cost'), f'DS{DS_i}-Theta_1'] = ( - f"{getattr(cmp, f'CV__Dispersion_DS{DS_i}'):g}") + df_db.loc[ + (cmp.Index, 'Cost'), f'DS{DS_i}-Theta_1' + ] = f"{getattr(cmp, f'CV__Dispersion_DS{DS_i}'):g}" else: incomplete_cost = True @@ -984,60 +1097,76 @@ def create_FEMA_P58_repair_db( if pd.isna(repair_action): repair_action = "" - meta_data['DamageStates'].update({ - f"DS{DS_i}": { - "Description": cmp_meta[f"DS_{DS_i}_Description"], - "RepairAction": repair_action}}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": cmp_meta[f"DS_{DS_i}_Description"], + "RepairAction": repair_action, + } + } + ) # time if not pd.isna(getattr(cmp, f'Best_Fit_DS{DS_i}_1')): - - df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-Family'] = ( - convert_family[getattr(cmp, f'Best_Fit_DS{DS_i}_1')]) + df_db.loc[ + (cmp.Index, 'Time'), f'DS{DS_i}-Family' + ] = convert_family[getattr(cmp, f'Best_Fit_DS{DS_i}_1')] if not pd.isna(getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}_1')): - theta_0_low = getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}_1') theta_0_up = getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}_1') qnt_low = getattr(cmp, f'Lower_Qty_Cutoff_DS{DS_i}_1') qnt_up = getattr(cmp, f'Upper_Qty_Cutoff_DS{DS_i}_1') - if theta_0_low == 0. and theta_0_up == 0.: - df_db.loc[(cmp.Index, 'Time'), - f'DS{DS_i}-Family'] = np.nan + if theta_0_low == 0.0 and theta_0_up == 0.0: + df_db.loc[ + (cmp.Index, 'Time'), f'DS{DS_i}-Family' + ] = np.nan else: df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-Theta_0'] = ( f"{theta_0_low:g},{theta_0_up:g}|" - f"{qnt_low:g},{qnt_up:g}") + f"{qnt_low:g},{qnt_up:g}" + ) - df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-Theta_1'] = ( - f"{getattr(cmp, f'CV__Dispersion_DS{DS_i}_2'):g}") + df_db.loc[ + (cmp.Index, 'Time'), f'DS{DS_i}-Theta_1' + ] = f"{getattr(cmp, f'CV__Dispersion_DS{DS_i}_2'):g}" - df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-LongLeadTime'] = ( - int(getattr(cmp, f'DS_{DS_i}_Long_Lead_Time') == 'YES')) + df_db.loc[ + (cmp.Index, 'Time'), f'DS{DS_i}-LongLeadTime' + ] = int(getattr(cmp, f'DS_{DS_i}_Long_Lead_Time') == 'YES') else: incomplete_time = True # Carbon if not pd.isna(getattr(cmp, f'DS{DS_i}_Best_Fit')): - df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Family'] = ( - convert_family[getattr(cmp, f'DS{DS_i}_Best_Fit')]) + df_db.loc[ + (cmp.Index, 'Carbon'), f'DS{DS_i}-Family' + ] = convert_family[getattr(cmp, f'DS{DS_i}_Best_Fit')] - df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_0'] = getattr(cmp, - f'DS{DS_i}_Embodied_Carbon_kg_CO2eq') + df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_0'] = getattr( + cmp, f'DS{DS_i}_Embodied_Carbon_kg_CO2eq' + ) - df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_1'] = getattr(cmp, f'DS{DS_i}_CV_or_Beta') + df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_1'] = getattr( + cmp, f'DS{DS_i}_CV_or_Beta' + ) # Energy if not pd.isna(getattr(cmp, f'DS{DS_i}_Best_Fit_1')): - df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Family'] = ( - convert_family[getattr(cmp, f'DS{DS_i}_Best_Fit_1')]) + df_db.loc[ + (cmp.Index, 'Energy'), f'DS{DS_i}-Family' + ] = convert_family[getattr(cmp, f'DS{DS_i}_Best_Fit_1')] - df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Theta_0'] = getattr(cmp, f'DS{DS_i}_Embodied_Energy_MJ') + df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Theta_0'] = getattr( + cmp, f'DS{DS_i}_Embodied_Energy_MJ' + ) - df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Theta_1'] = getattr(cmp, f'DS{DS_i}_CV_or_Beta_1') + df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Theta_1'] = getattr( + cmp, f'DS{DS_i}_CV_or_Beta_1' + ) df_db.loc[(cmp.Index, 'Cost'), 'Incomplete'] = int(incomplete_cost) df_db.loc[(cmp.Index, 'Time'), 'Incomplete'] = int(incomplete_time) @@ -1048,14 +1177,14 @@ def create_FEMA_P58_repair_db( # assign the Index column as the new ID df_db.index = pd.MultiIndex.from_arrays( - [df_db['Index'].values, df_db.index.get_level_values(1)]) + [df_db['Index'].values, df_db.index.get_level_values(1)] + ) df_db.drop('Index', axis=1, inplace=True) # review the database and drop rows with no information cmp_to_drop = [] for cmp in df_db.index: - empty = True for DS_i in range(1, 6): @@ -1086,14 +1215,16 @@ def create_FEMA_P58_repair_db( with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) - print("Successfully parsed and saved the repair consequence data from FEMA " - "P58") + print( + "Successfully parsed and saved the repair consequence data from FEMA P58" + ) def create_FEMA_P58_bldg_injury_db( - source_file, - target_data_file='bldg_injury_DB_FEMA_P58_2nd.csv', - target_meta_file='bldg_injury_DB_FEMA_P58_2nd.json'): + source_file, + target_data_file='bldg_injury_DB_FEMA_P58_2nd.csv', + target_meta_file='bldg_injury_DB_FEMA_P58_2nd.json', +): """ Create an injury consequence parameter database based on the FEMA P58 data @@ -1114,9 +1245,14 @@ def create_FEMA_P58_bldg_injury_db( """ # parse the source file - df = pd.read_excel(source_file, sheet_name='Summary', header=2, index_col=1, - true_values=["YES", "Yes", "yes"], - false_values=["NO", "No", "no"]) + df = pd.read_excel( + source_file, + sheet_name='Summary', + header=2, + index_col=1, + true_values=["YES", "Yes", "yes"], + false_values=["NO", "No", "no"], + ) # remove empty rows and columns df.dropna(axis=0, how='all', inplace=True) @@ -1129,7 +1265,6 @@ def create_FEMA_P58_bldg_injury_db( ] for DS_i in range(1, 6): cols_to_db += [ - f'DS {DS_i}, Potential non-collapse casualty?', f'DS {DS_i} - Casualty Affected Area', f'DS {DS_i} Serious Injury Rate - Median', @@ -1199,11 +1334,7 @@ def create_FEMA_P58_bldg_injury_db( DVs = ['S1', 'S2'] df_MI = pd.MultiIndex.from_product([comps, DVs], names=['ID', 'Severity']) - df_db = pd.DataFrame( - columns=out_cols, - index=df_MI, - dtype=float - ) + df_db = pd.DataFrame(columns=out_cols, index=df_MI, dtype=float) # initialize the dictionary that stores the loss metadata meta_dict = {} @@ -1212,7 +1343,6 @@ def create_FEMA_P58_bldg_injury_db( # (this approach is not efficient, but easy to follow which was considered # more important than efficiency.) for cmp in df_db_source.itertuples(): - ID = cmp.Index.split('.') cmpID = f'{ID[0][0]}.{ID[0][1:3]}.{ID[0][3:5]}.{ID[1]}' @@ -1225,8 +1355,9 @@ def create_FEMA_P58_bldg_injury_db( # store units - df_db.loc[cmp.Index, 'Quantity-Unit'] = ( - ' '.join(cmp.Fragility_Unit_of_Measure.split(' ')[::-1]).strip()) + df_db.loc[cmp.Index, 'Quantity-Unit'] = ' '.join( + cmp.Fragility_Unit_of_Measure.split(' ')[::-1] + ).strip() df_db.loc[(cmp.Index, 'S1'), 'DV-Unit'] = "persons" df_db.loc[(cmp.Index, 'S2'), 'DV-Unit'] = "persons" @@ -1243,13 +1374,20 @@ def create_FEMA_P58_bldg_injury_db( # the additional fields are added to the description if they exist if cmp_meta['Construction_Quality'] != 'Not Specified': - comments += f'\nConstruction Quality: ' \ - f'{cmp_meta["Construction_Quality"]}' + comments += ( + f'\nConstruction Quality: ' f'{cmp_meta["Construction_Quality"]}' + ) if cmp_meta['Seismic_Installation_Conditions'] not in [ - 'Not Specified', 'Not applicable', 'Unknown', 'Any']: - comments += f'\nSeismic Installation Conditions: ' \ - f'{cmp_meta["Seismic_Installation_Conditions"]}' + 'Not Specified', + 'Not applicable', + 'Unknown', + 'Any', + ]: + comments += ( + f'\nSeismic Installation Conditions: ' + f'{cmp_meta["Seismic_Installation_Conditions"]}' + ) if cmp_meta['Comments__Notes'] != 'None': comments += f'\nNotes: {cmp_meta["Comments__Notes"]}' @@ -1266,12 +1404,11 @@ def create_FEMA_P58_bldg_injury_db( "SuggestedComponentBlockSize": ' '.join(block_size), "RoundUpToIntegerQuantity": cmp_meta['Round_to_Integer_Unit'], "ControllingDemand": "Damage Quantity", - "DamageStates": {} + "DamageStates": {}, } # Handle components with simultaneous damage states separately if 'Simul' in cmp.DS_Hierarchy: - # Note that we are assuming that all damage states are triggered by # a single limit state in these components. # This assumption holds for the second edition of FEMA P58, but it @@ -1282,19 +1419,41 @@ def create_FEMA_P58_bldg_injury_db( # get the p10, p50, and p90 estimates for all damage states for DS_i in range(1, 6): - casualty_model = getattr( - cmp, f'DS_{DS_i}_Potential_non_collapse_casualty') + cmp, f'DS_{DS_i}_Potential_non_collapse_casualty' + ) if casualty_model is True: - - inj_data.update({f'DS{DS_i}': np.array([ - getattr(cmp, f'DS_{DS_i}___Casualty_Affected_Area'), - getattr(cmp, f'DS_{DS_i}_Serious_Injury_Rate___Median'), - getattr(cmp, f'DS_{DS_i}_Serious_Injury_Rate___Dispersion'), - getattr(cmp, f'DS_{DS_i}_Loss_of_Life_Rate___Median'), - getattr(cmp, f'DS_{DS_i}_Loss_of_Life_Rate___Dispersion') - ])}) + inj_data.update( + { + f'DS{DS_i}': np.array( + [ + getattr( + cmp, f'DS_{DS_i}___Casualty_Affected_Area' + ), + getattr( + cmp, + f'DS_{DS_i}_Serious_Injury_Rate' + f'___Median', + ), + getattr( + cmp, + f'DS_{DS_i}_Serious_Injury_Rate' + f'___Dispersion', + ), + getattr( + cmp, + f'DS_{DS_i}_Loss_of_Life_Rate' f'___Median', + ), + getattr( + cmp, + f'DS_{DS_i}_Loss_of_Life_Rate' + f'___Dispersion', + ), + ] + ) + } + ) ds_tot += 1 elif casualty_model is False: @@ -1322,10 +1481,8 @@ def create_FEMA_P58_bldg_injury_db( ds_map = format(DS_i, f'0{sim_ds_count}b') if ds_map[-ds_trig] == '1': - # store the consequence data for severity in ('S1', 'S2'): - A_affected = inj_data[0] if severity == 'S1': @@ -1336,93 +1493,112 @@ def create_FEMA_P58_bldg_injury_db( theta_1 = inj_data[4] if theta_0 != 0.0: + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Family' + ] = 'lognormal' - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Family'] = 'lognormal' - - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Theta_0'] = theta_0 + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Theta_0' + ] = theta_0 - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Theta_1'] = theta_1 + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Theta_1' + ] = theta_1 - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-AffectedArea'] = A_affected + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-AffectedArea' + ] = A_affected # store the metadata if ds_map.count('1') == 1: - ds_pure_id = ds_map[::-1].find('1') + 1 - meta_data['DamageStates'].update({f"DS{DS_i}": { - "Description": f"Pure DS{ds_pure_id}. " + - cmp_meta[ - f"DS_{ds_pure_id}_Description"] - }}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": f"Pure DS{ds_pure_id}. " + + cmp_meta[f"DS_{ds_pure_id}_Description"] + } + } + ) else: + ds_combo = [ + f'DS{_.start() + 1}' for _ in re.finditer('1', ds_map[::-1]) + ] - ds_combo = [f'DS{_.start() + 1}' - for _ in re.finditer('1', ds_map[::-1])] - - meta_data['DamageStates'].update({f"DS{DS_i}": { - "Description": 'Combination of ' + - ' & '.join(ds_combo) - }}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": 'Combination of ' + + ' & '.join(ds_combo) + } + } + ) # for every other component... else: # now look at each Damage State for DS_i in range(1, 6): - casualty_flag = getattr( - cmp, f'DS_{DS_i}_Potential_non_collapse_casualty') + cmp, f'DS_{DS_i}_Potential_non_collapse_casualty' + ) if casualty_flag is True: - - A_affected = getattr(cmp, - f'DS_{DS_i}___Casualty_Affected_Area') + A_affected = getattr(cmp, f'DS_{DS_i}___Casualty_Affected_Area') for severity in ('S1', 'S2'): - if severity == 'S1': - theta_0 = getattr(cmp, f'DS_{DS_i}_Serious_Injury_' - f'Rate___Median') - theta_1 = getattr(cmp, f'DS_{DS_i}_Serious_Injury_' - f'Rate___Dispersion') + theta_0 = getattr( + cmp, f'DS_{DS_i}_Serious_Injury_' f'Rate___Median' + ) + theta_1 = getattr( + cmp, + f'DS_{DS_i}_Serious_Injury_' f'Rate___Dispersion', + ) elif severity == 'S2': - theta_0 = getattr(cmp, f'DS_{DS_i}_Loss_of_Life_' - f'Rate___Median') - theta_1 = getattr(cmp, f'DS_{DS_i}_Loss_of_Life_' - f'Rate___Dispersion') + theta_0 = getattr( + cmp, f'DS_{DS_i}_Loss_of_Life_' f'Rate___Median' + ) + theta_1 = getattr( + cmp, f'DS_{DS_i}_Loss_of_Life_' f'Rate___Dispersion' + ) if theta_0 != 0.0: - - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Family'] = 'lognormal' - - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Theta_0'] = theta_0 - - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Theta_1'] = theta_1 - - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-AffectedArea'] = A_affected - - if (pd.isna(theta_0) or pd.isna( - theta_1) or pd.isna(A_affected)): - + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Family' + ] = 'lognormal' + + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Theta_0' + ] = theta_0 + + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Theta_1' + ] = theta_1 + + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-AffectedArea' + ] = A_affected + + if ( + pd.isna(theta_0) + or pd.isna(theta_1) + or pd.isna(A_affected) + ): if severity == 'S1': incomplete_S1 = True else: incomplete_S2 = True if ~np.isnan(casualty_flag): - - meta_data['DamageStates'].update({ - f"DS{DS_i}": {"Description": - cmp_meta[f"DS_{DS_i}_Description"]}}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": cmp_meta[f"DS_{DS_i}_Description"] + } + } + ) df_db.loc[(cmp.Index, 'S1'), 'Incomplete'] = int(incomplete_S1) df_db.loc[(cmp.Index, 'S2'), 'Incomplete'] = int(incomplete_S2) @@ -1432,14 +1608,14 @@ def create_FEMA_P58_bldg_injury_db( # assign the Index column as the new ID df_db.index = pd.MultiIndex.from_arrays( - [df_db['Index'].values, df_db.index.get_level_values(1)]) + [df_db['Index'].values, df_db.index.get_level_values(1)] + ) df_db.drop('Index', axis=1, inplace=True) # review the database and drop rows with no information cmp_to_drop = [] for cmp in df_db.index: - empty = True for DS_i in range(1, 16): @@ -1476,14 +1652,16 @@ def create_FEMA_P58_bldg_injury_db( with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) - print("Successfully parsed and saved the injury consequence data from FEMA " - "P58") + print( + "Successfully parsed and saved the injury consequence data from FEMA P58" + ) def create_FEMA_P58_bldg_redtag_db( - source_file, - target_data_file='bldg_redtag_DB_FEMA_P58_2nd.csv', - target_meta_file='bldg_redtag_DB_FEMA_P58_2nd.json'): + source_file, + target_data_file='bldg_redtag_DB_FEMA_P58_2nd.csv', + target_meta_file='bldg_redtag_DB_FEMA_P58_2nd.json', +): """ Create an red tag consequence parameter database based on the FEMA P58 data @@ -1504,9 +1682,14 @@ def create_FEMA_P58_bldg_redtag_db( """ # parse the source file - df = pd.read_excel(source_file, sheet_name='Summary', header=2, index_col=1, - true_values=["YES", "Yes", "yes"], - false_values=["NO", "No", "no"]) + df = pd.read_excel( + source_file, + sheet_name='Summary', + header=2, + index_col=1, + true_values=["YES", "Yes", "yes"], + false_values=["NO", "No", "no"], + ) # take another pass with booleans because the first does not always work for true_str in ("YES", "Yes", "yes"): @@ -1527,7 +1710,7 @@ def create_FEMA_P58_bldg_redtag_db( cols_to_db += [ f'DS {DS_i}, Unsafe Placard Trigger Flag', f'DS {DS_i}, Unsafe Placard Damage Median', - f'DS {DS_i}, Unsafe Placard Damage Dispersion' + f'DS {DS_i}, Unsafe Placard Damage Dispersion', ] # filter the columns that we need for the metadata @@ -1577,20 +1760,12 @@ def create_FEMA_P58_bldg_redtag_db( "Incomplete", ] for DS_i in range(1, 6): - out_cols += [ - f"DS{DS_i}-Family", - f"DS{DS_i}-Theta_0", - f"DS{DS_i}-Theta_1" - ] + out_cols += [f"DS{DS_i}-Family", f"DS{DS_i}-Theta_0", f"DS{DS_i}-Theta_1"] # create the database index comps = df_db_source.index.values - df_db = pd.DataFrame( - columns=out_cols, - index=comps, - dtype=float - ) + df_db = pd.DataFrame(columns=out_cols, index=comps, dtype=float) # initialize the dictionary that stores the loss metadata meta_dict = {} @@ -1599,7 +1774,6 @@ def create_FEMA_P58_bldg_redtag_db( # (this approach is not efficient, but easy to follow which was considered # more important than efficiency.) for cmp in df_db_source.itertuples(): - ID = cmp.Index.split('.') cmpID = f'{ID[0][0]}.{ID[0][1:3]}.{ID[0][3:5]}.{ID[1]}' @@ -1622,13 +1796,20 @@ def create_FEMA_P58_bldg_redtag_db( # the additional fields are added to the description if they exist if cmp_meta['Construction_Quality'] != 'Not Specified': - comments += f'\nConstruction Quality: ' \ - f'{cmp_meta["Construction_Quality"]}' + comments += ( + f'\nConstruction Quality: ' f'{cmp_meta["Construction_Quality"]}' + ) if cmp_meta['Seismic_Installation_Conditions'] not in [ - 'Not Specified', 'Not applicable', 'Unknown', 'Any']: - comments += f'\nSeismic Installation Conditions: ' \ - f'{cmp_meta["Seismic_Installation_Conditions"]}' + 'Not Specified', + 'Not applicable', + 'Unknown', + 'Any', + ]: + comments += ( + f'\nSeismic Installation Conditions: ' + f'{cmp_meta["Seismic_Installation_Conditions"]}' + ) if cmp_meta['Comments__Notes'] != 'None': comments += f'\nNotes: {cmp_meta["Comments__Notes"]}' @@ -1645,12 +1826,11 @@ def create_FEMA_P58_bldg_redtag_db( "SuggestedComponentBlockSize": ' '.join(block_size), "RoundUpToIntegerQuantity": cmp_meta['Round_to_Integer_Unit'], "ControllingDemand": "Damage Quantity", - "DamageStates": {} + "DamageStates": {}, } # Handle components with simultaneous damage states separately if 'Simul' in cmp.DS_Hierarchy: - pass # Note that we are assuming that components with simultaneous # damage states do not have damage that would trigger a red tag. @@ -1661,34 +1841,34 @@ def create_FEMA_P58_bldg_redtag_db( else: # now look at each Damage State for DS_i in range(1, 6): - - redtag_flag = getattr( - cmp, f'DS_{DS_i}_Unsafe_Placard_Trigger_Flag') + redtag_flag = getattr(cmp, f'DS_{DS_i}_Unsafe_Placard_Trigger_Flag') if redtag_flag is True: - - theta_0 = getattr(cmp, f'DS_{DS_i}_Unsafe_Placard_Damage_' - f'Median') - theta_1 = getattr(cmp, f'DS_{DS_i}_Unsafe_Placard_Damage_' - f'Dispersion') + theta_0 = getattr( + cmp, f'DS_{DS_i}_Unsafe_Placard_Damage_' f'Median' + ) + theta_1 = getattr( + cmp, f'DS_{DS_i}_Unsafe_Placard_Damage_' f'Dispersion' + ) if theta_0 != 0.0: - df_db.loc[cmp.Index, f'DS{DS_i}-Family'] = 'lognormal' df_db.loc[cmp.Index, f'DS{DS_i}-Theta_0'] = theta_0 df_db.loc[cmp.Index, f'DS{DS_i}-Theta_1'] = theta_1 - if (pd.isna(theta_0) or pd.isna(theta_1)): - + if pd.isna(theta_0) or pd.isna(theta_1): incomplete = True if ~np.isnan(redtag_flag): - - meta_data['DamageStates'].update({ - f"DS{DS_i}": {"Description": - cmp_meta[f"DS_{DS_i}_Description"]}}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": cmp_meta[f"DS_{DS_i}_Description"] + } + } + ) df_db.loc[cmp.Index, 'Incomplete'] = int(incomplete) @@ -1701,7 +1881,6 @@ def create_FEMA_P58_bldg_redtag_db( # review the database and drop rows with no information cmp_to_drop = [] for cmp in df_db.index: - empty = True for DS_i in range(1, 6): @@ -1736,15 +1915,18 @@ def create_FEMA_P58_bldg_redtag_db( with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) - print("Successfully parsed and saved the red tag consequence data from FEMA " - "P58") + print( + "Successfully parsed and saved the red tag consequence data from FEMA P58" + ) -def create_Hazus_EQ_fragility_db(source_file, - meta_file='', - target_data_file='damage_DB_Hazus_EQ_bldg.csv', - target_meta_file='damage_DB_Hazus_EQ_bldg.json', - resolution='building'): +def create_Hazus_EQ_fragility_db( + source_file, + meta_file='', + target_data_file='damage_DB_Hazus_EQ_bldg.csv', + target_meta_file='damage_DB_Hazus_EQ_bldg.json', + resolution='building', +): """ Create a database file based on the HAZUS EQ Technical Manual @@ -1764,9 +1946,9 @@ def create_Hazus_EQ_fragility_db(source_file, target_meta_file: string Path where the fragility metadata should be saved. A json file is expected. - resoltuion: string - If building, the function produces the conventional Hazus - fragilities. If story, the function produces story-level + resolution: string + If building, the function produces the conventional Hazus + fragilities. If story, the function produces story-level fragilities. """ @@ -1789,16 +1971,18 @@ def create_Hazus_EQ_fragility_db(source_file, # prepare lists of labels for various building features design_levels = list( - raw_data['Structural_Fragility_Groups']['EDP_limits'].keys()) + raw_data['Structural_Fragility_Groups']['EDP_limits'].keys() + ) building_types = list( - raw_data['Structural_Fragility_Groups']['P_collapse'].keys()) + raw_data['Structural_Fragility_Groups']['P_collapse'].keys() + ) convert_design_level = { 'High_code': 'HC', 'Moderate_code': 'MC', 'Low_code': 'LC', - 'Pre_code': 'PC' + 'Pre_code': 'PC', } # initialize the fragility table @@ -1825,10 +2009,10 @@ def create_Hazus_EQ_fragility_db(source_file, "LS4-Family", "LS4-Theta_0", "LS4-Theta_1", - "LS4-DamageStateWeights" + "LS4-DamageStateWeights", ], index=np.arange(len(building_types) * len(design_levels) * 5), - dtype=float + dtype=float, ) # initialize the dictionary that stores the fragility metadata @@ -1836,14 +2020,12 @@ def create_Hazus_EQ_fragility_db(source_file, # add the general information to the meta dict if "_GeneralInformation" in frag_meta.keys(): - GI = frag_meta["_GeneralInformation"] # remove the decision variable part from the general info GI.pop("DecisionVariables", None) for key, item in deepcopy(GI).items(): - if key == 'ComponentGroups_Damage': GI.update({'ComponentGroups': item}) @@ -1860,9 +2042,8 @@ def create_Hazus_EQ_fragility_db(source_file, for bt in building_types: for dl in design_levels: if bt in S_data['EDP_limits'][dl].keys(): - # add a dot in bt between structure and height labels, if needed - if ((len(bt)>2) and (bt[-1] in ['L','M','H'])): + if (len(bt) > 2) and (bt[-1] in {'L', 'M', 'H'}): bt_exp = f'{bt[:-1]}.{bt[-1]}' st = bt[:-1] hc = bt[-1] @@ -1873,9 +2054,9 @@ def create_Hazus_EQ_fragility_db(source_file, # story-level fragilities are based only on the low rise archetypes if resolution == 'story': - if hc in ['M', 'H']: + if hc in {'M', 'H'}: continue - elif hc == 'L': + if hc == 'L': bt_exp = st # create the component id @@ -1895,65 +2076,99 @@ def create_Hazus_EQ_fragility_db(source_file, if hc is not None: cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['STR']['Description']+", "+ - frag_meta['Meta']['StructuralSystems'][st]['Description']+", "+ - frag_meta['Meta']['HeightClasses'][hc]['Description'] + ", "+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Description'] - ), + frag_meta['Meta']['Collections']['STR']['Description'] + + ", " + + frag_meta['Meta']['StructuralSystems'][st][ + 'Description' + ] + + ", " + + frag_meta['Meta']['HeightClasses'][hc]['Description'] + + ", " + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['STR']['Comment']+"\n"+ - frag_meta['Meta']['StructuralSystems'][st]['Comment']+"\n"+ - frag_meta['Meta']['HeightClasses'][hc]['Comment'] + "\n"+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Comment'] - ), + frag_meta['Meta']['Collections']['STR']['Comment'] + + "\n" + + frag_meta['Meta']['StructuralSystems'][st]['Comment'] + + "\n" + + frag_meta['Meta']['HeightClasses'][hc]['Comment'] + + "\n" + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } else: cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['STR']['Description']+", "+ - frag_meta['Meta']['StructuralSystems'][st]['Description']+", "+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Description'] - ), + frag_meta['Meta']['Collections']['STR']['Description'] + + ", " + + frag_meta['Meta']['StructuralSystems'][st][ + 'Description' + ] + + ", " + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['STR']['Comment']+"\n"+ - frag_meta['Meta']['StructuralSystems'][st]['Comment']+"\n"+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Comment'] - ), + frag_meta['Meta']['Collections']['STR']['Comment'] + + "\n" + + frag_meta['Meta']['StructuralSystems'][st]['Comment'] + + "\n" + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } # store the Limit State parameters ds_meta = frag_meta['Meta']['StructuralSystems'][st]['DamageStates'] for LS_i in range(1, 5): - df_db.loc[counter, f'LS{LS_i}-Family'] = 'lognormal' - df_db.loc[counter, f'LS{LS_i}-Theta_0'] = \ - S_data['EDP_limits'][dl][bt][LS_i - 1] - df_db.loc[counter, f'LS{LS_i}-Theta_1'] = \ - S_data['Fragility_beta'][dl] + df_db.loc[counter, f'LS{LS_i}-Theta_0'] = S_data['EDP_limits'][ + dl + ][bt][LS_i - 1] + df_db.loc[counter, f'LS{LS_i}-Theta_1'] = S_data[ + 'Fragility_beta' + ][dl] if LS_i == 4: p_coll = S_data['P_collapse'][bt] - df_db.loc[counter, f'LS{LS_i}-DamageStateWeights'] = ( - f'{1.0 - p_coll} | {p_coll}') - - cmp_meta["LimitStates"].update({"LS4": { - "DS4": {"Description": ds_meta['DS4']}, - "DS5": {"Description": ds_meta['DS5']} - }}) + df_db.loc[ + counter, f'LS{LS_i}-DamageStateWeights' + ] = f'{1.0 - p_coll} | {p_coll}' + + cmp_meta["LimitStates"].update( + { + "LS4": { + "DS4": {"Description": ds_meta['DS4']}, + "DS5": {"Description": ds_meta['DS5']}, + } + } + ) else: - cmp_meta["LimitStates"].update({f"LS{LS_i}": { - f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]} - }}) + cmp_meta["LimitStates"].update( + { + f"LS{LS_i}": { + f"DS{LS_i}": { + "Description": ds_meta[f"DS{LS_i}"] + } + } + } + ) # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) counter += 1 @@ -1978,24 +2193,23 @@ def create_Hazus_EQ_fragility_db(source_file, "Comments": frag_meta['Meta']['Collections']['NSD']['Comment'], "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } # store the Limit State parameters ds_meta = frag_meta['Meta']['Collections']['NSD']['DamageStates'] for LS_i in range(1, 5): df_db.loc[counter, f'LS{LS_i}-Family'] = 'lognormal' - df_db.loc[counter, f'LS{LS_i}-Theta_0'] = NSD_data['EDP_limits'][ - LS_i - 1] + df_db.loc[counter, f'LS{LS_i}-Theta_0'] = NSD_data['EDP_limits'][LS_i - 1] df_db.loc[counter, f'LS{LS_i}-Theta_1'] = NSD_data['Fragility_beta'] # add limit state metadata - cmp_meta["LimitStates"].update({f"LS{LS_i}": - {f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]} - }}) + cmp_meta["LimitStates"].update( + {f"LS{LS_i}": {f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]}}} + ) # store metadata - meta_dict.update({'NSD':cmp_meta}) + meta_dict.update({'NSD': cmp_meta}) counter += 1 @@ -2003,7 +2217,6 @@ def create_Hazus_EQ_fragility_db(source_file, NSA_data = raw_data['NonStructural_Acceleration_Sensitive_Fragility_Groups'] for dl in design_levels: - # create the component id cmp_id = f'NSA.{convert_design_level[dl]}' df_db.loc[counter, 'ID'] = cmp_id @@ -2016,33 +2229,40 @@ def create_Hazus_EQ_fragility_db(source_file, # add metadata cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['NSA']['Description']+", "+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Description'] - ), + frag_meta['Meta']['Collections']['NSA']['Description'] + + ", " + + frag_meta['Meta']['DesignLevels'][convert_design_level[dl]][ + 'Description' + ] + ), "Comments": ( - frag_meta['Meta']['Collections']['NSA']['Comment']+"\n"+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Comment'] - ), + frag_meta['Meta']['Collections']['NSA']['Comment'] + + "\n" + + frag_meta['Meta']['DesignLevels'][convert_design_level[dl]][ + 'Comment' + ] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } # store the Limit State parameters ds_meta = frag_meta['Meta']['Collections']['NSA']['DamageStates'] for LS_i in range(1, 5): df_db.loc[counter, f'LS{LS_i}-Family'] = 'lognormal' - df_db.loc[counter, f'LS{LS_i}-Theta_0'] = \ - NSA_data['EDP_limits'][dl][LS_i - 1] + df_db.loc[counter, f'LS{LS_i}-Theta_0'] = NSA_data['EDP_limits'][dl][ + LS_i - 1 + ] df_db.loc[counter, f'LS{LS_i}-Theta_1'] = NSA_data['Fragility_beta'] # add limit state metadata - cmp_meta["LimitStates"].update({f"LS{LS_i}": - {f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]} - }}) + cmp_meta["LimitStates"].update( + {f"LS{LS_i}": {f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]}}} + ) # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) counter += 1 @@ -2053,9 +2273,8 @@ def create_Hazus_EQ_fragility_db(source_file, for bt in building_types: for dl in design_levels: if bt in LF_data['EDP_limits'][dl].keys(): - # add a dot in bt between structure and height labels, if needed - if ((len(bt)>2) and (bt[-1] in ['L','M','H'])): + if (len(bt) > 2) and (bt[-1] in {'L', 'M', 'H'}): bt_exp = f'{bt[:-1]}.{bt[-1]}' st = bt[:-1] hc = bt[-1] @@ -2077,65 +2296,107 @@ def create_Hazus_EQ_fragility_db(source_file, if hc is not None: cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['LF']['Description']+", "+ - frag_meta['Meta']['StructuralSystems'][st]['Description']+", "+ - frag_meta['Meta']['HeightClasses'][hc]['Description'] + ", "+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Description'] - ), + frag_meta['Meta']['Collections']['LF']['Description'] + + ", " + + frag_meta['Meta']['StructuralSystems'][st][ + 'Description' + ] + + ", " + + frag_meta['Meta']['HeightClasses'][hc][ + 'Description' + ] + + ", " + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['LF']['Comment']+"\n"+ - frag_meta['Meta']['StructuralSystems'][st]['Comment']+"\n"+ - frag_meta['Meta']['HeightClasses'][hc]['Comment'] + "\n"+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Comment'] - ), + frag_meta['Meta']['Collections']['LF']['Comment'] + + "\n" + + frag_meta['Meta']['StructuralSystems'][st][ + 'Comment' + ] + + "\n" + + frag_meta['Meta']['HeightClasses'][hc]['Comment'] + + "\n" + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } else: cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['LF']['Description']+", "+ - frag_meta['Meta']['StructuralSystems'][st]['Description']+", "+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Description'] - ), + frag_meta['Meta']['Collections']['LF']['Description'] + + ", " + + frag_meta['Meta']['StructuralSystems'][st][ + 'Description' + ] + + ", " + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['LF']['Comment']+"\n"+ - frag_meta['Meta']['StructuralSystems'][st]['Comment']+"\n"+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Comment'] - ), + frag_meta['Meta']['Collections']['LF']['Comment'] + + "\n" + + frag_meta['Meta']['StructuralSystems'][st][ + 'Comment' + ] + + "\n" + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } # store the Limit State parameters - ds_meta = frag_meta['Meta']['StructuralSystems'][st]['DamageStates'] + ds_meta = frag_meta['Meta']['StructuralSystems'][st][ + 'DamageStates' + ] for LS_i in range(1, 5): - df_db.loc[counter, f'LS{LS_i}-Family'] = 'lognormal' - df_db.loc[counter, f'LS{LS_i}-Theta_0'] = \ - LF_data['EDP_limits'][dl][bt][LS_i - 1] - df_db.loc[counter, f'LS{LS_i}-Theta_1'] = \ - LF_data['Fragility_beta'][dl] + df_db.loc[counter, f'LS{LS_i}-Theta_0'] = LF_data[ + 'EDP_limits' + ][dl][bt][LS_i - 1] + df_db.loc[counter, f'LS{LS_i}-Theta_1'] = LF_data[ + 'Fragility_beta' + ][dl] if LS_i == 4: p_coll = LF_data['P_collapse'][bt] - df_db.loc[counter, f'LS{LS_i}-DamageStateWeights'] = ( - f'{1.0 - p_coll} | {p_coll}') - - cmp_meta["LimitStates"].update({"LS4": { - "DS4": {"Description": ds_meta['DS4']}, - "DS5": {"Description": ds_meta['DS5']} - }}) + df_db.loc[ + counter, f'LS{LS_i}-DamageStateWeights' + ] = f'{1.0 - p_coll} | {p_coll}' + + cmp_meta["LimitStates"].update( + { + "LS4": { + "DS4": {"Description": ds_meta['DS4']}, + "DS5": {"Description": ds_meta['DS5']}, + } + } + ) else: - cmp_meta["LimitStates"].update({f"LS{LS_i}": { - f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]} - }}) + cmp_meta["LimitStates"].update( + { + f"LS{LS_i}": { + f"DS{LS_i}": { + "Description": ds_meta[f"DS{LS_i}"] + } + } + } + ) # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) counter += 1 @@ -2156,36 +2417,41 @@ def create_Hazus_EQ_fragility_db(source_file, # add metadata cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['GF']['Description']+ - f", {direction} Direction, {f_depth} Foundation" - ), - "Comments": ( - frag_meta['Meta']['Collections']['GF']['Comment'] - ), + frag_meta['Meta']['Collections']['GF']['Description'] + + f", {direction} Direction, {f_depth} Foundation" + ), + "Comments": (frag_meta['Meta']['Collections']['GF']['Comment']), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } # store the Limit State parameters ds_meta = frag_meta['Meta']['Collections']['GF']['DamageStates'] df_db.loc[counter, 'LS1-Family'] = 'lognormal' - df_db.loc[counter, 'LS1-Theta_0'] = \ - GF_data['EDP_limits'][direction][f_depth] - df_db.loc[counter, 'LS1-Theta_1'] = \ - GF_data['Fragility_beta'][direction][f_depth] + df_db.loc[counter, 'LS1-Theta_0'] = GF_data['EDP_limits'][direction][ + f_depth + ] + df_db.loc[counter, 'LS1-Theta_1'] = GF_data['Fragility_beta'][direction][ + f_depth + ] p_complete = GF_data['P_Complete'] - df_db.loc[counter, 'LS1-DamageStateWeights'] = ( - f'{1.0 - p_complete} | {p_complete}') - - cmp_meta["LimitStates"].update({"LS1": { - "DS1": {"Description": ds_meta['DS1']}, - "DS2": {"Description": ds_meta['DS2']} - }}) + df_db.loc[ + counter, 'LS1-DamageStateWeights' + ] = f'{1.0 - p_complete} | {p_complete}' + + cmp_meta["LimitStates"].update( + { + "LS1": { + "DS1": {"Description": ds_meta['DS1']}, + "DS2": {"Description": ds_meta['DS2']}, + } + } + ) # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) counter += 1 @@ -2214,11 +2480,13 @@ def create_Hazus_EQ_fragility_db(source_file, print("Successfully parsed and saved the fragility data from Hazus EQ") -def create_Hazus_EQ_repair_db(source_file, - meta_file='', - target_data_file='loss_repair_DB_Hazus_EQ_bldg.csv', - target_meta_file='loss_repair_DB_Hazus_EQ_bldg.json', - resolution='building'): +def create_Hazus_EQ_repair_db( + source_file, + meta_file='', + target_data_file='loss_repair_DB_Hazus_EQ_bldg.csv', + target_meta_file='loss_repair_DB_Hazus_EQ_bldg.json', + resolution='building', +): """ Create a database file based on the HAZUS EQ Technical Manual @@ -2238,9 +2506,9 @@ def create_Hazus_EQ_repair_db(source_file, target_meta_file: string Path where the repair DB metadata should be saved. A json file is expected. - resoltuion: string - If building, the function produces the conventional Hazus - fragilities. If story, the function produces story-level + resolution: string + If building, the function produces the conventional Hazus + fragilities. If story, the function produces story-level fragilities. """ @@ -2262,8 +2530,7 @@ def create_Hazus_EQ_repair_db(source_file, frag_meta = {} # prepare lists of labels for various building features - occupancies = list( - raw_data['Structural_Fragility_Groups']['Repair_cost'].keys()) + occupancies = list(raw_data['Structural_Fragility_Groups']['Repair_cost'].keys()) # initialize the output loss table # define the columns @@ -2279,27 +2546,24 @@ def create_Hazus_EQ_repair_db(source_file, # create the MultiIndex cmp_types = ['STR', 'NSD', 'NSA', 'LF'] - comps = [f'{cmp_type}.{occ_type}' - for cmp_type in cmp_types for occ_type in occupancies] + comps = [ + f'{cmp_type}.{occ_type}' + for cmp_type in cmp_types + for occ_type in occupancies + ] DVs = ['Cost', 'Time'] df_MI = pd.MultiIndex.from_product([comps, DVs], names=['ID', 'DV']) - df_db = pd.DataFrame( - columns=out_cols, - index=df_MI, - dtype=float - ) + df_db = pd.DataFrame(columns=out_cols, index=df_MI, dtype=float) # initialize the dictionary that stores the loss metadata meta_dict = {} # add the general information to the meta dict if "_GeneralInformation" in frag_meta.keys(): - GI = frag_meta["_GeneralInformation"] for key, item in deepcopy(GI).items(): - if key == 'ComponentGroups_Loss_Repair': GI.update({'ComponentGroups': item}) @@ -2312,30 +2576,31 @@ def create_Hazus_EQ_repair_db(source_file, S_data = raw_data['Structural_Fragility_Groups'] for occ_type in occupancies: - # create the component id cmp_id = f'STR.{occ_type}' cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['STR']['Description']+", "+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] - ), + frag_meta['Meta']['Collections']['STR']['Description'] + + ", " + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['STR']['Comment']+"\n"+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] - ), + frag_meta['Meta']['Collections']['STR']['Comment'] + + "\n" + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "DamageStates": {} + "DamageStates": {}, } # store the consequence values for each Damage State ds_meta = frag_meta['Meta']['Collections']['STR']['DamageStates'] for DS_i in range(1, 6): - - cmp_meta["DamageStates"].update({f"DS{DS_i}": - {"Description": ds_meta[f"DS{DS_i}"]}}) + cmp_meta["DamageStates"].update( + {f"DS{DS_i}": {"Description": ds_meta[f"DS{DS_i}"]}} + ) # DS4 and DS5 have identical repair consequences if DS_i == 5: @@ -2343,142 +2608,145 @@ def create_Hazus_EQ_repair_db(source_file, else: ds_i = DS_i - df_db.loc[ - (cmp_id, 'Cost'), - f'DS{DS_i}-Theta_0'] = S_data['Repair_cost'][occ_type][ds_i-1] + df_db.loc[(cmp_id, 'Cost'), f'DS{DS_i}-Theta_0'] = S_data['Repair_cost'][ + occ_type + ][ds_i - 1] - df_db.loc[ - (cmp_id, 'Time'), - f'DS{DS_i}-Theta_0'] = S_data['Repair_time'][occ_type][ds_i-1] + df_db.loc[(cmp_id, 'Time'), f'DS{DS_i}-Theta_0'] = S_data['Repair_time'][ + occ_type + ][ds_i - 1] # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) # Second, the non-structural drift sensitive one NSD_data = raw_data['NonStructural_Drift_Sensitive_Fragility_Groups'] for occ_type in occupancies: - # create the component id cmp_id = f'NSD.{occ_type}' cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['NSD']['Description']+", "+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] - ), + frag_meta['Meta']['Collections']['NSD']['Description'] + + ", " + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['NSD']['Comment']+"\n"+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] - ), + frag_meta['Meta']['Collections']['NSD']['Comment'] + + "\n" + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "DamageStates": {} + "DamageStates": {}, } # store the consequence values for each Damage State ds_meta = frag_meta['Meta']['Collections']['NSD']['DamageStates'] for DS_i in range(1, 5): + cmp_meta["DamageStates"].update( + {f"DS{DS_i}": {"Description": ds_meta[f"DS{DS_i}"]}} + ) - cmp_meta["DamageStates"].update({f"DS{DS_i}": - {"Description": ds_meta[f"DS{DS_i}"]}}) - - df_db.loc[ - (cmp_id, 'Cost'), - f'DS{DS_i}-Theta_0'] = NSD_data['Repair_cost'][occ_type][DS_i-1] + df_db.loc[(cmp_id, 'Cost'), f'DS{DS_i}-Theta_0'] = NSD_data[ + 'Repair_cost' + ][occ_type][DS_i - 1] # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) # Third, the non-structural acceleration sensitive fragilities NSA_data = raw_data['NonStructural_Acceleration_Sensitive_Fragility_Groups'] for occ_type in occupancies: - # create the component id cmp_id = f'NSA.{occ_type}' cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['NSA']['Description']+", "+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] - ), + frag_meta['Meta']['Collections']['NSA']['Description'] + + ", " + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['NSA']['Comment']+"\n"+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] - ), + frag_meta['Meta']['Collections']['NSA']['Comment'] + + "\n" + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "DamageStates": {} + "DamageStates": {}, } # store the consequence values for each Damage State ds_meta = frag_meta['Meta']['Collections']['NSA']['DamageStates'] for DS_i in range(1, 5): + cmp_meta["DamageStates"].update( + {f"DS{DS_i}": {"Description": ds_meta[f"DS{DS_i}"]}} + ) - cmp_meta["DamageStates"].update({f"DS{DS_i}": - {"Description": ds_meta[f"DS{DS_i}"]}}) - - df_db.loc[ - (cmp_id, 'Cost'), - f'DS{DS_i}-Theta_0'] = NSA_data['Repair_cost'][occ_type][DS_i-1] + df_db.loc[(cmp_id, 'Cost'), f'DS{DS_i}-Theta_0'] = NSA_data[ + 'Repair_cost' + ][occ_type][DS_i - 1] # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) # Fourth, the lifeline facilities - only at the building-level resolution if resolution == 'building': LF_data = raw_data['Lifeline_Facilities'] for occ_type in occupancies: - # create the component id cmp_id = f'LF.{occ_type}' cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['LF']['Description']+", "+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] - ), + frag_meta['Meta']['Collections']['LF']['Description'] + + ", " + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['LF']['Comment']+"\n"+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] - ), + frag_meta['Meta']['Collections']['LF']['Comment'] + + "\n" + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "DamageStates": {} + "DamageStates": {}, } # store the consequence values for each Damage State ds_meta = frag_meta['Meta']['Collections']['LF']['DamageStates'] for DS_i in range(1, 6): - # DS4 and DS5 have identical repair consequences if DS_i == 5: ds_i = 4 else: ds_i = DS_i - cmp_meta["DamageStates"].update({f"DS{DS_i}": - {"Description": ds_meta[f"DS{DS_i}"]}}) + cmp_meta["DamageStates"].update( + {f"DS{DS_i}": {"Description": ds_meta[f"DS{DS_i}"]}} + ) - df_db.loc[ - (cmp_id, 'Cost'), - f'DS{DS_i}-Theta_0'] = LF_data['Repair_cost'][occ_type][ds_i - 1] + df_db.loc[(cmp_id, 'Cost'), f'DS{DS_i}-Theta_0'] = LF_data[ + 'Repair_cost' + ][occ_type][ds_i - 1] - df_db.loc[ - (cmp_id, 'Time'), - f'DS{DS_i}-Theta_0'] = LF_data['Repair_time'][occ_type][ds_i - 1] + df_db.loc[(cmp_id, 'Time'), f'DS{DS_i}-Theta_0'] = LF_data[ + 'Repair_time' + ][occ_type][ds_i - 1] # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) # remove empty rows (from the end) df_db.dropna(how='all', inplace=True) # All Hazus components have complete fragility info, df_db['Incomplete'] = 0 - #df_db.loc[:, 'Incomplete'] = 0 + # df_db.loc[:, 'Incomplete'] = 0 # The damage quantity unit is the same for all consequence values df_db.loc[:, 'Quantity-Unit'] = "1 EA" @@ -2503,13 +2771,15 @@ def create_Hazus_EQ_repair_db(source_file, with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) - print("Successfully parsed and saved the repair consequence data from Hazus " - "EQ") + print( + "Successfully parsed and saved the repair consequence data from Hazus EQ" + ) -def create_Hazus_EQ_bldg_injury_db(source_file, - target_data_file='bldg_injury_DB_Hazus_EQ.csv', - target_meta_file='bldg_injury_DB_Hazus_EQ.json'): +def create_Hazus_EQ_bldg_injury_db( + source_file, + target_data_file='bldg_injury_DB_Hazus_EQ.csv', +): """ Create a database file based on the HAZUS EQ Technical Manual @@ -2524,9 +2794,6 @@ def create_Hazus_EQ_bldg_injury_db(source_file, target_data_file: string Path where the injury DB file should be saved. A csv file is expected. - target_meta_file: string - Path where the injury DB metadata should be saved. A json file is - expected. """ @@ -2534,16 +2801,10 @@ def create_Hazus_EQ_bldg_injury_db(source_file, with open(source_file, 'r', encoding='utf-8') as f: raw_data = json.load(f) - # parse the extra metadata file - if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: - frag_meta = json.load(f) - else: - frag_meta = {} - # prepare lists of labels for various building features building_types = list( - raw_data['Structural_Fragility_Groups']['P_collapse'].keys()) + raw_data['Structural_Fragility_Groups']['P_collapse'].keys() + ) # initialize the output loss table # define the columns @@ -2559,28 +2820,21 @@ def create_Hazus_EQ_bldg_injury_db(source_file, # create the MultiIndex cmp_types = ['STR', 'LF'] - comps = [f'{cmp_type}.{bt}' - for cmp_type in cmp_types for bt in building_types] + comps = [f'{cmp_type}.{bt}' for cmp_type in cmp_types for bt in building_types] DVs = ['S1', 'S2', 'S3', 'S4'] df_MI = pd.MultiIndex.from_product([comps, DVs], names=['ID', 'DV']) - df_db = pd.DataFrame( - columns=out_cols, - index=df_MI, - dtype=float - ) + df_db = pd.DataFrame(columns=out_cols, index=df_MI, dtype=float) # First, prepare the structural damage consequences S_data = raw_data['Structural_Fragility_Groups'] for bt in building_types: - # create the component id cmp_id = f'STR.{bt}' # store the consequence values for each Damage State for DS_i in range(1, 6): - # DS5 is stored under 'collapse' if DS_i == 5: ds_i = 'Collapse' @@ -2589,20 +2843,18 @@ def create_Hazus_EQ_bldg_injury_db(source_file, for S_i in range(1, 5): s_label = f'S{S_i}' - df_db.loc[(cmp_id, s_label), f'DS{DS_i}-Theta_0'] = ( - S_data['Injury_rates'][ds_i][bt][S_i-1]) + df_db.loc[(cmp_id, s_label), f'DS{DS_i}-Theta_0'] = S_data[ + 'Injury_rates' + ][ds_i][bt][S_i - 1] # Second, the lifeline facilities - LF_data = raw_data['Lifeline_Facilities'] for bt in building_types: - # create the component id cmp_id = f'STR.{bt}' # store the consequence values for each Damage State for DS_i in range(1, 6): - # DS5 is stored under 'collapse' if DS_i == 5: ds_i = 'Collapse' @@ -2611,8 +2863,9 @@ def create_Hazus_EQ_bldg_injury_db(source_file, for S_i in range(1, 5): s_label = f'S{S_i}' - df_db.loc[(cmp_id, s_label), f'DS{DS_i}-Theta_0'] = ( - S_data['Injury_rates'][ds_i][bt][S_i - 1]) + df_db.loc[(cmp_id, s_label), f'DS{DS_i}-Theta_0'] = S_data[ + 'Injury_rates' + ][ds_i][bt][S_i - 1] # remove empty rows df_db.dropna(how='all', inplace=True) @@ -2638,9 +2891,6 @@ def create_Hazus_EQ_bldg_injury_db(source_file, # save the consequence data df_db.to_csv(target_data_file) - # save the metadata - later - # with open(target_meta_file, 'w+') as f: - # json.dump(meta_dict, f, indent=2) - - print("Successfully parsed and saved the injury consequence data from Hazus " - "EQ") + print( + "Successfully parsed and saved the injury consequence data from Hazus EQ" + ) From 8e48d0be326b6dc885cababaf6863b17445aca9e Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 23 Mar 2024 02:54:06 -0700 Subject: [PATCH 29/48] Change Error to Warning --- pelicun/model/demand_model.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pelicun/model/demand_model.py b/pelicun/model/demand_model.py index 9c50472b4..bd5d8ff3b 100644 --- a/pelicun/model/demand_model.py +++ b/pelicun/model/demand_model.py @@ -317,7 +317,12 @@ def calibrate_model(self, config): """ if self.calibrated: - raise ValueError('DemandModel has been previously calibrated.') + self.log_msg( + 'WARNING: DemandModel has been previously calibrated.', + prepend_timestamp=False, + ) + + raise ValueError() def parse_settings(settings, demand_type): def parse_str_to_float(in_str, context_string): From d1303530b941a0f75ee9041ddfc1c0ef98e9c9cc Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 23 Mar 2024 04:25:40 -0700 Subject: [PATCH 30/48] More strict `pylint` checks for docstrings - Enables pylint warnings on docstring issues. - Fixes the resulting warnings for `uq.py`. --- .pylintrc | 10 + pelicun/uq.py | 504 +++++++++++++++++++++++++++++++------------------- 2 files changed, 321 insertions(+), 193 deletions(-) diff --git a/.pylintrc b/.pylintrc index 01bccf6ed..26b7a9985 100644 --- a/.pylintrc +++ b/.pylintrc @@ -335,6 +335,16 @@ docstring-min-length=-1 # List of decorators that define properties, such as abc.abstractproperty. property-classes=abc.abstractproperty +# +# Docstring parameter documentation: +# https://pylint.pycqa.org/en/1.7/technical_reference/extensions.html +# + +accept-no-raise-doc = no +accept-no-param-doc = no +accept-no-return-doc = no +accept-no-yields-doc = no + [TYPECHECK] diff --git a/pelicun/uq.py b/pelicun/uq.py index 90026cf54..8c06de48d 100644 --- a/pelicun/uq.py +++ b/pelicun/uq.py @@ -89,6 +89,17 @@ def scale_distribution(scale_factor, family, theta, truncation_limits=None): Defines the [a,b] truncation limits for the distribution. Use None to assign no limit in one direction. + Returns + ------- + tuple + A tuple containing the scaled parameters and truncation + limits: + - theta_new (float ndarray of length 2): Scaled parameters of + the distribution. + - truncation_limits (float ndarray of length 2 or None): + Scaled truncation limits for the distribution, or None if no + truncation is applied. + Raises ------ ValueError @@ -155,12 +166,14 @@ def mvn_orthotope_density(mu, COV, lower=np.nan, upper=np.nan): multivariate cases. If the distribution is non-truncated from above in a subset of the dimensions, use either `None` or assign an infinite value (i.e. numpy.inf) to those dimensions. + Returns ------- - alpha: float - Estimate of the probability density within the hyperrectangle - eps_alpha: float - Estimate of the error in alpha. + tuple + alpha: float + Estimate of the probability density within the hyperrectangle. + eps_alpha: float + Estimate of the error in the calculated probability density. """ @@ -232,16 +245,16 @@ def _get_theta(params, inits, dist_list): dist_list: list of str List of strings containing the names of the distributions. + Returns + ------- + Theta + The estimated parameters. + Raises ------ ValueError If any of the distributions is unsupported. - Returns - ------- - Theta: - The estimated parameters. - """ theta = np.zeros(inits.shape) @@ -277,15 +290,16 @@ def _get_limit_probs(limits, distribution, theta): theta: float ndarray The parameters of the specified distribution. + Returns + ------- + tuple + The CDF values. + Raises ------ ValueError If any of the distributions is unsupported. - Returns - ------- - The CDF values. - """ if distribution in {'normal', 'normal-stdev', 'lognormal'}: @@ -315,27 +329,29 @@ def _get_std_samples(samples, theta, tr_limits, dist_list): Parameters ---------- - samples: float ndarray, DxN + samples: float ndarray DxN 2D array of samples. Each row represents a sample. - theta: float ndarray, Dx2 + theta: float ndarray Dx2 2D array of theta values that represent each dimension of the samples - tr_limits: float ndarray, Dx2 + tr_limits: float ndarray Dx2 2D array with rows that represent [a, b] pairs of truncation limits dist_list: str ndarray of length D 1D array containing the names of the distributions + Returns + ------- + ndarray + float DxN ndarray of the samples transformed to standard normal + space, with each row representing a transformed sample in + standard normal space. + Raises ------ ValueError If any of the distributions is unsupported. - Returns - ------- - std_samples: float ndarray, DxN - The samples transformed to standard normal space. - """ std_samples = np.zeros(samples.shape) @@ -388,15 +404,16 @@ def _get_std_corr_matrix(std_samples): Array containing the standard normal samples. Each column is a sample. It should not contain Inf or NaN values. + Returns + ------- + ndarray + Correlation matrix. float ndarray, DxD + Raises ------ ValueError If any of the elements of std_samples is np.inf or np.nan - Returns - ------- - rho_hat: float ndarray, DxD - Correlation matrix. """ if True in np.isinf(std_samples) or True in np.isnan(std_samples): @@ -452,9 +469,21 @@ def _get_std_corr_matrix(std_samples): def _mvn_scale(x, rho): """ - Utility function used in _neg_log_likelihood - """ + Scaling utility function + + Parameters + ---------- + x: ndarray + Input array + rho: ndarray + Covariance matrix + Returns + ------- + ndarray + Scaled values + + """ x = np.atleast_2d(x) n_dims = x.shape[1] @@ -699,20 +728,22 @@ def fit_distribution_to_sample( Returns ------- - theta: float ndarray - Estimates of the parameters of the fitted probability distribution in - each dimension. The following parameters are returned for the supported - distributions: - normal - mean, coefficient of variation; - lognormal - median, log standard deviation; - Rho: float 2D ndarray, optional - In the multivariate case, returns the estimate of the correlation - matrix. + tuple + theta: float ndarray + Estimates of the parameters of the fitted probability + distribution in each dimension. The following parameters + are returned for the supported distributions: normal - + mean, coefficient of variation; lognormal - median, log + standard deviation; + Rho: float 2D ndarray, optional + In the multivariate case, returns the estimate of the + correlation matrix. Raises ------ ValueError If NaN values are produced during standard normal space transformation + """ samples = np.atleast_2d(raw_samples) @@ -956,6 +987,7 @@ def _OLS_percentiles(params, values, perc, family): ------ ValueError If `family` is not 'normal' or 'lognormal'. + """ if family == 'normal': @@ -1002,10 +1034,13 @@ def fit_distribution_to_percentiles(values, percentiles, families): Returns ------- - family: string - The optimal choice of family among the provided list of families - theta: array of float - Parameters of the fitted distribution. + tuple + family: string + The optimal choice of family among the provided list of + families + theta: array of float + Parameters of the fitted distribution. + """ out_list = [] @@ -1054,32 +1089,6 @@ class BaseRandomVariable(ABC): """ Base abstract class for different types of random variables. - Parameters - ---------- - name: string - A unique string that identifies the random variable. - theta: float scalar or ndarray, optional - Set of parameters that define the Cumulative Distribution - Function (CDF) of the variable given its distribution - type. The following parameters are expected currently for the - supported distribution types: - normal - mean, standard deviation; - lognormal - median, log standard deviation; - uniform - a, b, the lower and upper bounds of the distribution; - multinomial - ; - custom - according to the custom expression provided; - empirical and coupled_empirical - N/A; - deterministic - the deterministic value assigned to the variable. - multilinear_CDF - - f_map: function, optional - A user-defined function that is applied on the realizations before - returning a sample. - anchor: RandomVariable, optional - Anchors this to another variable. If the anchor is not None, this - variable will be perfectly correlated with its anchor. Note that - the attributes of this variable and its anchor do not have to be - identical. - """ def __init__( @@ -1093,7 +1102,16 @@ def __init__( Parameters ---------- - see the attributes of the RandomVariable class + name: string + A unique string that identifies the random variable. + f_map: function, optional + A user-defined function that is applied on the realizations before + returning a sample. + anchor: RandomVariable, optional + Anchors this to another variable. If the anchor is not None, this + variable will be perfectly correlated with its anchor. Note that + the attributes of this variable and its anchor do not have to be + identical. Raises ------ @@ -1105,9 +1123,6 @@ def __init__( self.name = name self.distribution = None - # self.theta = np.atleast_1d(theta) - # self.truncation_limits = truncation_limits - # self._raw_samples = np.atleast_1d(raw_samples) self.f_map = f_map self._uni_samples = None self.RV_set = None @@ -1122,6 +1137,12 @@ def __init__( def sample(self): """ Return the empirical or generated sample. + + Returns + ------- + ndarray + The empirical or generated sample. + """ if self.f_map is not None: return self.f_map(self._sample) @@ -1130,7 +1151,13 @@ def sample(self): @sample.setter def sample(self, value): """ - Assign a sample to the random variable + Assign a sample to the random variable. + + Parameters + ---------- + value: ndarray + Sample to assign + """ self._sample = value self._sample_DF = pd.Series(value) @@ -1139,6 +1166,12 @@ def sample(self, value): def sample_DF(self): """ Return the empirical or generated sample in a pandas Series. + + Returns + ------- + ndarray + The empirical or generated sample in a pandas Series. + """ if self.f_map is not None: return self._sample_DF.apply(self.f_map) @@ -1149,6 +1182,12 @@ def sample_DF(self): def uni_sample(self): """ Return the sample from the controlling uniform distribution. + + Returns + ------- + ndarray + The sample from the controlling uniform distribution. + """ return self.anchor._uni_samples @@ -1161,6 +1200,7 @@ def uni_sample(self, value): ---------- value: float ndarray An array of floating point values in the [0, 1] domain. + """ self._uni_samples = value @@ -1170,6 +1210,42 @@ class CommonRandomVariable(BaseRandomVariable): Random variable that needs `values` in `inverse_transform` """ + # pylint: disable=super-init-not-called + @abstractmethod + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): + """ + Instantiates a normal random variable. + + Parameters + ---------- + name: string + A unique string that identifies the random variable. + theta: 2-element float ndarray + Set of parameters that define the Cumulative Distribution + Function (CDF) of the variable: Mean, coefficient of + variation. + truncation_limits: float ndarray, optional + Defines the np.array((a, b)) truncation limits for the + distribution. Use np.nan to assign no limit in one direction, + like so: np.array((a, np.nan)), or np.array((np.nan, b)). + f_map: function, optional + A user-defined function that is applied on the realizations before + returning a sample. + anchor: RandomVariable, optional + Anchors this to another variable. If the anchor is not None, this + variable will be perfectly correlated with its anchor. Note that + the attributes of this variable and its anchor do not have to be + identical. + + """ + @abstractmethod def inverse_transform(self, values): """ @@ -1182,6 +1258,11 @@ def inverse_transform_sampling(self): """ Creates a sample using inverse probability integral transformation. + + Raises + ------ + ValueError + If there is no available uniform sample. """ if self.uni_sample is None: raise ValueError('No available uniform sample.') @@ -1193,6 +1274,42 @@ class SampleSizeRandomVariable(BaseRandomVariable): Random variable that needs `sample_size` in `inverse_transform` """ + # pylint: disable=super-init-not-called + @abstractmethod + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): + """ + Instantiates a normal random variable. + + Parameters + ---------- + name: string + A unique string that identifies the random variable. + theta: 2-element float ndarray + Set of parameters that define the Cumulative Distribution + Function (CDF) of the variable: Mean, coefficient of + variation. + truncation_limits: float ndarray, optional + Defines the np.array((a, b)) truncation limits for the + distribution. Use np.nan to assign no limit in one direction, + like so: np.array((a, np.nan)), or np.array((np.nan, b)). + f_map: function, optional + A user-defined function that is applied on the realizations before + returning a sample. + anchor: RandomVariable, optional + Anchors this to another variable. If the anchor is not None, this + variable will be perfectly correlated with its anchor. Note that + the attributes of this variable and its anchor do not have to be + identical. + + """ + @abstractmethod def inverse_transform(self, sample_size): """ @@ -1223,21 +1340,6 @@ def __init__( f_map=None, anchor=None, ): - """ - Instantiates a normal random variable. - - Parameters - ---------- - theta: 2-element float ndarray - Set of parameters that define the Cumulative Distribution - Function (CDF) of the variable: Mean, coefficient of - variation. - truncation_limits: float ndarray, optional - Defines the np.array((a, b)) truncation limits for the - distribution. Use np.nan to assign no limit in one direction, - like so: np.array((a, np.nan)), or np.array((np.nan, b)). - - """ super().__init__( name, f_map, @@ -1259,9 +1361,8 @@ def cdf(self, values): Returns ------- - - 1D float ndarray - CDF values + ndarray + 1D float ndarray containing CDF values """ mu, cov = self.theta[:2] @@ -1304,7 +1405,7 @@ def inverse_transform(self, values): Returns ------- - 1D float ndarray + ndarray Inverse CDF values Raises @@ -1359,20 +1460,6 @@ def __init__( f_map=None, anchor=None, ): - """ - Instantiates a lognormal random variable. - - Parameters - ---------- - theta: 2-element float ndarray - Set of parameters that define the Cumulative Distribution - Function (CDF) of the variable: Median, dispersion. - truncation_limits: float ndarray, optional - Defines the np.array((a, b)) truncation limits for the - distribution. Use np.nan to assign no limit in one direction, - like so: np.array((a, np.nan)), or np.array((np.nan, b)). - - """ super().__init__( name, f_map, @@ -1394,8 +1481,7 @@ def cdf(self, values): Returns ------- - - 1D float ndarray + ndarray CDF values """ @@ -1442,7 +1528,7 @@ def inverse_transform(self, values): Returns ------- - 1D float ndarray + ndarray Inverse CDF values """ @@ -1488,20 +1574,6 @@ def __init__( f_map=None, anchor=None, ): - """ - Instantiates a uniform random variable. - - Parameters - ---------- - theta: 2-element float ndarray - Set of parameters that define the Cumulative Distribution - Function (CDF) of the variable: min, max. - truncation_limits: float ndarray, optional - Defines the np.array((a, b)) truncation limits for the - distribution. Use np.nan to assign no limit in one direction, - like so: np.array((a, np.nan)), or np.array((np.nan, b)). - - """ super().__init__( name, f_map, @@ -1523,8 +1595,7 @@ def cdf(self, values): Returns ------- - - 1D float ndarray + ndarray CDF values """ @@ -1555,7 +1626,7 @@ def inverse_transform(self, values): Returns ------- - 1D float ndarray + ndarray Inverse CDF values """ @@ -1590,22 +1661,6 @@ def __init__( f_map=None, anchor=None, ): - """ - Instantiates a "multilinear CDF" random variable. - - Parameters - ---------- - theta: 2D float ndarray - A Nx2 numpy array defining the vertices of a multilinear CDF - curve in the form ((X_0, 0.00), (X_1, Y_1), ..., (X_n, - 1.00)). The first Y value has to be 0.00 and the last 1.00 - for a valid CDF, and the X_i's as well as the Y_i's should - be in increasing order, otherwise an error is raised. - truncation_limits: 2D float ndarray - Not supported for multilinear CDF. - Should be np.array((np.nan, np.nan)) - - """ super().__init__( name, f_map, @@ -1668,8 +1723,7 @@ def cdf(self, values): Returns ------- - - 1D float ndarray + ndarray CDF values """ @@ -1694,15 +1748,9 @@ def inverse_transform(self, values): Returns ------- - 1D float ndarray + ndarray Inverse CDF values - Raises - ------ - ValueError - If the probability massss within the truncation limits is - too small - """ x_i = [x[0] for x in self.theta] @@ -1734,19 +1782,6 @@ def __init__( f_map=None, anchor=None, ): - """ - Instantiates an empirical random variable. - - Parameters - ---------- - raw_samples: 1D float ndarray - Samples from which to draw empirical realizations. - truncation_limits: 2D float ndarray - Not supported for Empirical RVs. - Should be np.array((np.nan, np.nan)) - - """ - super().__init__( name, f_map, @@ -1776,7 +1811,7 @@ def inverse_transform(self, values): Returns ------- - 1D float ndarray + ndarray The empirical data points corresponding to the given normalized positions. @@ -1805,11 +1840,26 @@ def __init__( Parameters ---------- + name: string + A unique string that identifies the random variable. raw_samples: 1D float ndarray Samples from which to draw empirical realizations. truncation_limits: 2D float ndarray Not supported for CoupledEmpirical RVs. Should be np.array((np.nan, np.nan)) + f_map: function, optional + A user-defined function that is applied on the realizations before + returning a sample. + anchor: RandomVariable, optional + Anchors this to another variable. If the anchor is not None, this + variable will be perfectly correlated with its anchor. Note that + the attributes of this variable and its anchor do not have to be + identical. + + Raises + ------ + NotImplementedError + When truncation limits are provided """ super().__init__( @@ -1841,7 +1891,7 @@ def inverse_transform(self, sample_size): Returns ------- - 1D float ndarray + ndarray A new sample array derived from repeating the original dataset. @@ -1876,11 +1926,26 @@ def __init__( Parameters ---------- + name: string + A unique string that identifies the random variable. theta: 1-element float ndarray The value. truncation_limits: 2D float ndarray Not supported for Deterministic RVs. Should be np.array((np.nan, np.nan)) + f_map: function, optional + A user-defined function that is applied on the realizations before + returning a sample. + anchor: RandomVariable, optional + Anchors this to another variable. If the anchor is not None, this + variable will be perfectly correlated with its anchor. Note that + the attributes of this variable and its anchor do not have to be + identical. + + Raises + ------ + NotImplementedError + When truncation limits are provided """ super().__init__( @@ -1907,7 +1972,7 @@ def inverse_transform(self, sample_size): Returns ------- - 1D float ndarray + ndarray Sample array containing the deterministic value. """ @@ -1930,20 +1995,6 @@ def __init__( f_map=None, anchor=None, ): - """ - Instantiates a multinomial random variable. - - Parameters - ---------- - theta: 2-element float ndarray - Likelihood of each unique event (the last event's likelihood - is adjusted automatically to ensure the likelihoods sum up - to one) - truncation_limits: 2D float ndarray - Not supported for Multinomial RVs. - Should be np.array((np.nan, np.nan)) - - """ super().__init__( name, f_map, @@ -1979,7 +2030,7 @@ def inverse_transform(self, values): Returns ------- - 1D int ndarray + ndarray Discrete events corresponding to the input values. """ @@ -2038,27 +2089,51 @@ def __init__(self, name, RV_list, Rho): @property def RV(self): """ - Return the random variable(s) assigned to the set + Returns the random variable(s) assigned to the set. + + Returns + ------- + ndarray + The random variable(s) assigned to the set. + """ return self._variables @property def size(self): """ - Return the size (i.e., number of variables in the) RV set + Returns the size (i.e., number of variables in the) RV set. + + Returns + ------- + ndarray + The size (i.e., number of variables in the) RV set. + """ return len(self._variables) @property def sample(self): """ - Return the sample of the variables in the set + Returns the sample of the variables in the set. + + Returns + ------- + ndarray + The sample of the variables in the set. + """ return {name: rv.sample for name, rv in self._variables.items()} def Rho(self, var_subset=None): """ - Return the (subset of the) correlation matrix. + Returns the (subset of the) correlation matrix. + + Returns + ------- + ndarray + The (subset of the) correlation matrix. + """ if var_subset is None: return self._Rho @@ -2134,10 +2209,11 @@ def orthotope_density(self, lower=np.nan, upper=np.nan, var_subset=None): Returns ------- - alpha: float - Estimate of the probability density within the orthotope. - eps_alpha: float - Estimate of the error in alpha. + tuple + alpha: float + Estimate of the probability density within the orthotope. + eps_alpha: float + Estimate of the error in alpha. """ @@ -2206,19 +2282,42 @@ def __init__(self, rng): @property def RV(self): """ - Return all random variable(s) in the registry + Returns all random variable(s) in the registry. + + Returns + ------- + dict + all random variable(s) in the registry. + """ return self._variables def RVs(self, keys): """ - Return a subset of the random variables in the registry + Returns a subset of the random variables in the registry + + Parameters + ---------- + keys: list of str + Keys that define the subset. + + Returns + ------- + dict + A subset random variable(s) in the registry. + """ return {name: self._variables[name] for name in keys} def add_RV(self, RV): """ Add a new random variable to the registry. + + Raises + ------ + ValueError + When the RV already exists in the registry + """ if RV.name in self._variables: raise ValueError(f'RV {RV.name} already exists in the registry.') @@ -2228,6 +2327,12 @@ def add_RV(self, RV): def RV_set(self): """ Return the random variable set(s) in the registry. + + Returns + ------- + dict + The random variable set(s) in the registry. + """ return self._sets @@ -2240,7 +2345,13 @@ def add_RV_set(self, RV_set): @property def RV_sample(self): """ - Return the sample for every random variable in the registry + Return the sample for every random variable in the registry. + + Returns + ------- + dict + The sample for every random variable in the registry. + """ return {name: rv.sample for name, rv in self.RV.items()} @@ -2260,6 +2371,12 @@ def generate_sample(self, sample_size, method): with random sample location within each bin of the hypercube; 'LHS_midpoint' is like LHS, but the samples are assigned to the midpoints of the hypercube bins. + + Raises + ------ + NotImplementedError + When the RV parent class is Unknown + """ # Generate a dictionary with IDs of the free (non-anchored and @@ -2325,11 +2442,12 @@ def rv_class_map(distribution_name): Returns ------- - RandomVariable ojbect. + RandomVariable + RandomVariable class. Raises ------ - ValueError: + ValueError If the given distribution name does not correspond to a distribution class. From 608e251e81c07c5a2ead00097728c56684669935 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 23 Mar 2024 04:43:48 -0700 Subject: [PATCH 31/48] Improve formatting - Roll back formatting changes on DataFrame definition --- pelicun/resources/auto/Hazus_Earthquake_IM.py | 34 +++++++++++-------- .../resources/auto/Hazus_Earthquake_Story.py | 9 ++--- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/pelicun/resources/auto/Hazus_Earthquake_IM.py b/pelicun/resources/auto/Hazus_Earthquake_IM.py index 75f2ba7e0..b80f6a376 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_IM.py +++ b/pelicun/resources/auto/Hazus_Earthquake_IM.py @@ -327,10 +327,12 @@ def auto_populate(AIM): else: LF = f'LF.{bt}.{dl}' + # fmt: off CMP = pd.DataFrame( - {f'{LF}': ['ea', 1, 1, 1, 'N/A']}, - index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], - ).T + {f'{LF}': ['ea', 1, 1, 1, 'N/A']}, + index = ['Units','Location','Direction','Theta_0','Family'] + ).T + # fmt: on # if needed, add components to simulate damage from ground failure if ground_failure: @@ -385,13 +387,13 @@ def auto_populate(AIM): bt = convertBridgeToHAZUSclass(GI) GI_ap['BridgeHazusClass'] = bt + # fmt: off CMP = pd.DataFrame( - { - f'HWB.GS.{bt[3:]}': ['ea', 1, 1, 1, 'N/A'], - 'HWB.GF': ['ea', 1, 1, 1, 'N/A'], - }, - index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], + {f'HWB.GS.{bt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], + f'HWB.GF': [ 'ea', 1, 1, 1, 'N/A']}, + index = [ 'Units','Location','Direction','Theta_0','Family'] ).T + # fmt: on DL_ap = { "Asset": { @@ -415,13 +417,13 @@ def auto_populate(AIM): tt = convertTunnelToHAZUSclass(GI) GI_ap['TunnelHazusClass'] = tt + # fmt: off CMP = pd.DataFrame( - { - f'HTU.GS.{tt[3:]}': ['ea', 1, 1, 1, 'N/A'], - 'HTU.GF': ['ea', 1, 1, 1, 'N/A'], - }, - index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], + {f'HTU.GS.{tt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], + f'HTU.GF': [ 'ea', 1, 1, 1, 'N/A']}, + index = [ 'Units','Location','Direction','Theta_0','Family'] ).T + # fmt: on DL_ap = { "Asset": { @@ -444,10 +446,12 @@ def auto_populate(AIM): rt = convertRoadToHAZUSclass(GI) GI_ap['RoadHazusClass'] = rt + # fmt: off CMP = pd.DataFrame( - {f'HRD.GF.{rt[3:]}': ['ea', 1, 1, 1, 'N/A']}, - index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], + {f'HRD.GF.{rt[3:]}':[ 'ea', 1, 1, 1, 'N/A']}, + index = [ 'Units','Location','Direction','Theta_0','Family'] ).T + # fmt: on DL_ap = { "Asset": { diff --git a/pelicun/resources/auto/Hazus_Earthquake_Story.py b/pelicun/resources/auto/Hazus_Earthquake_Story.py index ef72836ea..7587aa1a9 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_Story.py +++ b/pelicun/resources/auto/Hazus_Earthquake_Story.py @@ -226,13 +226,10 @@ def auto_populate(AIM): FG_GF_H = f'GF.H.{foundation_type}' FG_GF_V = f'GF.V.{foundation_type}' - CMP_GF = pd.DataFrame( - { - f'{FG_GF_H}': ['ea', 1, 1, 1, 'N/A'], - f'{FG_GF_V}': ['ea', 1, 3, 1, 'N/A'], - }, - index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], + {f'{FG_GF_H}':[ 'ea', 1, 1, 1, 'N/A'], + f'{FG_GF_V}':[ 'ea', 1, 3, 1, 'N/A']}, + index = [ 'Units','Location','Direction','Theta_0','Family'] ).T CMP = pd.concat([CMP, CMP_GF], axis=0) From 622a1cdd15fc60688e444fd991f7cab0af58af91 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 23 Mar 2024 05:10:39 -0700 Subject: [PATCH 32/48] Roll back certain `db.py` changes --- pelicun/db.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/pelicun/db.py b/pelicun/db.py index 2cad7faaf..381101597 100644 --- a/pelicun/db.py +++ b/pelicun/db.py @@ -622,6 +622,7 @@ def create_FEMA_P58_repair_db( f"Lower Qty Cutoff, DS{DS_i}", f"Upper Qty Cutoff, DS{DS_i}", f"CV / Dispersion, DS{DS_i}", + # -------------------------- f"Best Fit, DS{DS_i}.1", f"Lower Qty Mean, DS{DS_i}.1", f"Upper Qty Mean, DS{DS_i}.1", @@ -629,6 +630,7 @@ def create_FEMA_P58_repair_db( f"Upper Qty Cutoff, DS{DS_i}.1", f"CV / Dispersion, DS{DS_i}.2", f"DS {DS_i}, Long Lead Time", + # -------------------------- f'Repair Cost, p10, DS{DS_i}', f'Repair Cost, p50, DS{DS_i}', f'Repair Cost, p90, DS{DS_i}', @@ -637,11 +639,14 @@ def create_FEMA_P58_repair_db( f'Time, p90, DS{DS_i}', f'Mean Value, DS{DS_i}', f'Mean Value, DS{DS_i}.1', + # -------------------------- # Columns added for the Environmental loss f"DS{DS_i} Best Fit", f"DS{DS_i} CV or Beta", + # -------------------------- f"DS{DS_i} Best Fit.1", f"DS{DS_i} CV or Beta.1", + # -------------------------- f"DS{DS_i} Embodied Carbon (kg CO2eq)", f"DS{DS_i} Embodied Energy (MJ)", ] @@ -2779,6 +2784,7 @@ def create_Hazus_EQ_repair_db( def create_Hazus_EQ_bldg_injury_db( source_file, target_data_file='bldg_injury_DB_Hazus_EQ.csv', + target_meta_file='bldg_injury_DB_Hazus_EQ.json', ): """ Create a database file based on the HAZUS EQ Technical Manual @@ -2794,6 +2800,9 @@ def create_Hazus_EQ_bldg_injury_db( target_data_file: string Path where the injury DB file should be saved. A csv file is expected. + target_meta_file: string + Path where the injury DB metadata should be saved. A json file is + expected. """ @@ -2801,6 +2810,13 @@ def create_Hazus_EQ_bldg_injury_db( with open(source_file, 'r', encoding='utf-8') as f: raw_data = json.load(f) + # parse the extra metadata file + if Path(meta_file).is_file(): + with open(meta_file, 'r') as f: + frag_meta = json.load(f) + else: + frag_meta = {} + # prepare lists of labels for various building features building_types = list( raw_data['Structural_Fragility_Groups']['P_collapse'].keys() @@ -2891,6 +2907,9 @@ def create_Hazus_EQ_bldg_injury_db( # save the consequence data df_db.to_csv(target_data_file) - print( - "Successfully parsed and saved the injury consequence data from Hazus EQ" - ) + # save the metadata - later + # with open(target_meta_file, 'w+') as f: + # json.dump(meta_dict, f, indent=2) + + print("Successfully parsed and saved the injury consequence data from Hazus " + "EQ") From 1243a74463469072fdd9eefc01bb7683b67a9b14 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 23 Mar 2024 10:16:45 -0700 Subject: [PATCH 33/48] manually merge `Hazus_Earthquake_IM.py` --- pelicun/resources/auto/Hazus_Earthquake_IM.py | 280 ++++++++++++++++-- 1 file changed, 261 insertions(+), 19 deletions(-) diff --git a/pelicun/resources/auto/Hazus_Earthquake_IM.py b/pelicun/resources/auto/Hazus_Earthquake_IM.py index b80f6a376..44cb2139d 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_IM.py +++ b/pelicun/resources/auto/Hazus_Earthquake_IM.py @@ -36,8 +36,10 @@ # # Contributors: # Adam Zsarnóczay - +import os +import json import pandas as pd +import pelicun ap_DesignLevel = {1940: 'LC', 1975: 'MC', 2100: 'HC'} # original: @@ -67,6 +69,35 @@ } +# Convert common length units +def convertUnits(value, unit_in, unit_out): + aval_types = ['m', 'mm', 'cm', 'km', 'inch', 'ft', 'mile'] + m = 1.0 + mm = 0.001 * m + cm = 0.01 * m + km = 1000 * m + inch = 0.0254 * m + ft = 12.0 * inch + mile = 5280.0 * ft + scale_map = { + 'm': m, + 'mm': mm, + 'cm': cm, + 'km': km, + 'inch': inch, + 'ft': ft, + 'mile': mile, + } + if (unit_in not in aval_types) or (unit_out not in aval_types): + print( + f"The unit {unit_in} or {unit_out} " + f"are used in auto_population but not supported" + ) + return + value = value * scale_map[unit_in] / scale_map[unit_out] + return value + + def convertBridgeToHAZUSclass(AIM): # TODO: replace labels in AIM with standard CamelCase versions structureType = AIM["BridgeClass"] @@ -82,6 +113,8 @@ def convertBridgeToHAZUSclass(AIM): yr_built = AIM["YearBuilt"] num_span = AIM["NumOfSpans"] len_max_span = AIM["MaxSpanLength"] + len_unit = AIM["units"]["length"] + len_max_span = convertUnits(len_max_span, len_unit, "m") seismic = (int(state) == 6 and int(yr_built) >= 1975) or ( int(state) != 6 and int(yr_built) >= 1990 @@ -328,10 +361,10 @@ def auto_populate(AIM): LF = f'LF.{bt}.{dl}' # fmt: off - CMP = pd.DataFrame( - {f'{LF}': ['ea', 1, 1, 1, 'N/A']}, - index = ['Units','Location','Direction','Theta_0','Family'] - ).T + CMP = pd.DataFrame( # noqa + {f'{LF}': ['ea', 1, 1, 1, 'N/A']}, # noqa + index = ['Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa # fmt: on # if needed, add components to simulate damage from ground failure @@ -388,11 +421,11 @@ def auto_populate(AIM): GI_ap['BridgeHazusClass'] = bt # fmt: off - CMP = pd.DataFrame( - {f'HWB.GS.{bt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], - f'HWB.GF': [ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T + CMP = pd.DataFrame( # noqa + {f'HWB.GS.{bt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], # noqa + f'HWB.GF': [ 'ea', 1, 1, 1, 'N/A']}, # noqa + index = [ 'Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa # fmt: on DL_ap = { @@ -418,11 +451,11 @@ def auto_populate(AIM): GI_ap['TunnelHazusClass'] = tt # fmt: off - CMP = pd.DataFrame( - {f'HTU.GS.{tt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], - f'HTU.GF': [ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T + CMP = pd.DataFrame( # noqa + {f'HTU.GS.{tt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], # noqa + f'HTU.GF': [ 'ea', 1, 1, 1, 'N/A']}, # noqa + index = [ 'Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa # fmt: on DL_ap = { @@ -447,10 +480,10 @@ def auto_populate(AIM): GI_ap['RoadHazusClass'] = rt # fmt: off - CMP = pd.DataFrame( - {f'HRD.GF.{rt[3:]}':[ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T + CMP = pd.DataFrame( # noqa + {f'HRD.GF.{rt[3:]}':[ 'ea', 1, 1, 1, 'N/A']}, # noqa + index = [ 'Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa # fmt: on DL_ap = { @@ -471,6 +504,215 @@ def auto_populate(AIM): } else: print("subtype not supported in HWY") + + elif assetType == "WaterDistributionNetwork": + + pipe_material_map = { + "CI": "B", + "AC": "B", + "RCC": "B", + "DI": "D", + "PVC": "D", + "DS": "B", + "BS": "D", + } + + # GI = AIM.get("GeneralInformation", None) + # if GI==None: + + # initialize the auto-populated GI + wdn_element_type = GI_ap.get("type", "MISSING") + asset_name = GI_ap.get("AIM_id", None) + + if wdn_element_type == "Pipe": + pipe_construction_year = GI_ap.get("year", None) + pipe_diameter = GI_ap.get("Diam", None) + # diamaeter value is a fundamental part of hydraulic + # performance assessment + if pipe_diameter is None: + raise ValueError( + f"pipe diamater in asset type {assetType}, \ + asset id \"{asset_name}\" has no diameter \ + value." + ) + + pipe_length = GI_ap.get("Len", None) + # length value is a fundamental part of hydraulic performance assessment + if pipe_diameter is None: + raise ValueError( + f"pipe length in asset type {assetType}, \ + asset id \"{asset_name}\" has no diameter \ + value." + ) + + pipe_material = GI_ap.get("material", None) + + # pipe material can be not available or named "missing" in + # both case, pipe flexibility will be set to "missing" + + """ + The assumed logic (rullset) is that if the material is + missing, if the pipe is smaller than or equal to 20 + inches, the material is Cast Iron (CI) otherwise the pipe + material is steel. + If the material is steel (ST), either based on user specified + input or the assumption due to the lack of the user-input, the year + that the pipe is constructed define the flexibility status per HAZUS + instructions. If the pipe is built in 1935 or after, it is, the pipe + is Ductile Steel (DS), and otherwise it is Brittle Steel (BS). + If the pipe is missing construction year and is built by steel, + we assume consevatively that the pipe is brittle (i.e., BS) + """ + if pipe_material is None: + if pipe_diameter > 20 * 0.0254: # 20 inches in meter + print( + f"Asset {asset_name} is missing material. Material is\ + assumed to be Cast Iron" + ) + pipe_material = "CI" + else: + print( + f"Asset {asset_name} is missing material. Material is " + f"assumed to be Steel (ST)" + ) + pipe_material = "ST" + + if pipe_material == "ST": + if (pipe_construction_year is not None) and ( + pipe_construction_year >= 1935 + ): + print( + f"Asset {asset_name} has material of \"ST\" is assumed to be\ + Ductile Steel" + ) + pipe_material = "DS" + else: + print( + f'Asset {asset_name} has material of "ST" is assumed to be ' + f'Brittle Steel' + ) + pipe_material = "BS" + + pipe_flexibility = pipe_material_map.get(pipe_material, "missing") + + GI_ap["material flexibility"] = pipe_flexibility + GI_ap["material"] = pipe_material + + # Pipes are broken into 20ft segments (rounding up) and + # each segment is represented by an individual entry in + # the performance model, `CMP`. The damage capcity of each + # segment is assumed to be independent and driven by the + # same EDP. We therefore replicate the EDP associated with + # the pipe to the various locations assgined to the + # segments. + + # Determine number of segments + with open( + os.path.join( + os.path.dirname(pelicun.__file__), 'settings/default_units.json' + ), + 'r', + encoding='utf-8', + ) as f: + units = json.load(f) + pipe_length_unit = GI_ap['units']['length'] + pipe_length_unit_factor = units['length'][pipe_length_unit] + pipe_length_in_base_unit = pipe_length * pipe_length_unit_factor + reference_length_in_base_unit = 20.00 * units['length']['ft'] + if pipe_length_in_base_unit % reference_length_in_base_unit < 1e-2: + # If the lengths are equal, then that's one segment, not two. + num_segments = int( + pipe_length_in_base_unit / reference_length_in_base_unit + ) + else: + # In all other cases, round up. + num_segments = ( + int(pipe_length_in_base_unit / reference_length_in_base_unit) + 1 + ) + if num_segments > 1: + location_string = f'1--{num_segments}' + else: + location_string = '1' + + # Define performance model + CMP = pd.DataFrame( + { + f'PWP.{pipe_flexibility}.GS': [ + 'ea', + location_string, + '0', + 1, + 'N/A', + ], + f'PWP.{pipe_flexibility}.GF': [ + 'ea', + location_string, + '0', + 1, + 'N/A', + ], + 'aggregate': ['ea', location_string, '0', 1, 'N/A'], + }, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], + ).T + + # Set up the demand cloning configuration for the pipe + # segments, if required. + demand_config = {} + if num_segments > 1: + # determine the EDP tags available for cloning + response_data = pelicun.file_io.load_data('response.csv', None) + num_header_entries = len(response_data.columns.names) + # if 4, assume a hazard level tag is present and remove it + if num_header_entries == 4: + response_data.columns = pd.MultiIndex.from_tuples( + [x[1::] for x in response_data.columns] + ) + demand_cloning_config = {} + for edp in response_data.columns: + tag, location, direction = edp + + demand_cloning_config['-'.join(edp)] = [ + f'{tag}-{x}-{direction}' + for x in [f'{i+1}' for i in range(num_segments)] + ] + demand_config = {'DemandCloning': demand_cloning_config} + + # Create damage process + dmg_process = { + f"1_PWP.{pipe_flexibility}.GS": {"DS1": "aggregate_DS1"}, + f"2_PWP.{pipe_flexibility}.GF": {"DS1": "aggregate_DS1"}, + f"3_PWP.{pipe_flexibility}.GS": {"DS2": "aggregate_DS2"}, + f"4_PWP.{pipe_flexibility}.GF": {"DS2": "aggregate_DS2"}, + } + dmg_process_filename = 'dmg_process.json' + with open(dmg_process_filename, 'w', encoding='utf-8') as f: + json.dump(dmg_process, f, indent=2) + + # Define the auto-populated config + DL_ap = { + "Asset": { + "ComponentAssignmentFile": "CMP_QNT.csv", + "ComponentDatabase": "Hazus Earthquake - Water", + "Material Flexibility": pipe_flexibility, + "PlanArea": "1", # Sina: does not make sense for water. + # Kept it here since itw as also + # kept here for Transportation + }, + "Damage": { + "DamageProcess": "User Defined", + "DamageProcessFilePath": "dmg_process.json", + }, + "Demands": demand_config, + } + else: + print( + f"Water Distribution network element type {wdn_element_type} " + f"is not supported in Hazus Earthquake IM DL method" + ) + DL_ap = None + CMP = None + else: print( f"AssetType: {assetType} is not supported " From 313af7198bebe40e5736e156d6eb5641750537dd Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 23 Mar 2024 10:16:59 -0700 Subject: [PATCH 34/48] ignore flake8 warnings for specific lines --- pelicun/resources/auto/Hazus_Earthquake_Story.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pelicun/resources/auto/Hazus_Earthquake_Story.py b/pelicun/resources/auto/Hazus_Earthquake_Story.py index 7587aa1a9..0b2bd34eb 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_Story.py +++ b/pelicun/resources/auto/Hazus_Earthquake_Story.py @@ -224,13 +224,13 @@ def auto_populate(AIM): if ground_failure: foundation_type = 'S' - FG_GF_H = f'GF.H.{foundation_type}' - FG_GF_V = f'GF.V.{foundation_type}' - CMP_GF = pd.DataFrame( - {f'{FG_GF_H}':[ 'ea', 1, 1, 1, 'N/A'], - f'{FG_GF_V}':[ 'ea', 1, 3, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T + FG_GF_H = f'GF.H.{foundation_type}' # noqa + FG_GF_V = f'GF.V.{foundation_type}' # noqa + CMP_GF = pd.DataFrame( # noqa + {f'{FG_GF_H}':[ 'ea', 1, 1, 1, 'N/A'], # noqa + f'{FG_GF_V}':[ 'ea', 1, 3, 1, 'N/A']}, # noqa + index = [ 'Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa CMP = pd.concat([CMP, CMP_GF], axis=0) From 1af9056acdbef6ff5767066e5dd209586ed0ce67 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sat, 23 Mar 2024 12:26:03 -0700 Subject: [PATCH 35/48] Add `super.()__init__()` calls in parent classes. --- pelicun/uq.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pelicun/uq.py b/pelicun/uq.py index 8c06de48d..eeef4b3a9 100644 --- a/pelicun/uq.py +++ b/pelicun/uq.py @@ -1245,6 +1245,11 @@ def __init__( identical. """ + super().__init__( + name, + f_map, + anchor, + ) @abstractmethod def inverse_transform(self, values): @@ -1309,6 +1314,11 @@ def __init__( identical. """ + super().__init__( + name, + f_map, + anchor, + ) @abstractmethod def inverse_transform(self, sample_size): From 15467d58444d98e48ccb78cbf001e4f902433300 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sun, 24 Mar 2024 09:47:20 -0700 Subject: [PATCH 36/48] Simplify test Removes unnecessary code --- pelicun/tests/test_model.py | 53 ++++++++++++------------------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index 8cddf29ba..83c3e0f24 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -706,7 +706,7 @@ def test_load_cmp_model_1(self, asset_model): asset_model.cmp_marginal_params, check_index_type=False, check_column_type=False, - check_dtype=False + check_dtype=False, ) expected_cmp_units = pd.Series( @@ -1377,19 +1377,7 @@ def test__perform_dmg_task(self, assessment_instance): demand_model = assessment_instance.demand asset_model = assessment_instance.asset - data = [ - ['rad', 1e-11], - ['rad', 1e11], - ] - - index = pd.MultiIndex.from_tuples( - (('PID', '1', '1'), ('PID', '1', '2')), names=['type', 'loc', 'dir'] - ) - - demand_marginals = pd.DataFrame(data, index, columns=['Units', 'Theta_0']) - demand_model.load_model({'marginals': demand_marginals}) - sample_size = 5 - demand_model.generate_sample({"SampleSize": sample_size}) + sample_size = 3 cmp_marginals = pd.read_csv( 'pelicun/tests/data/model/' @@ -1406,26 +1394,15 @@ def test__perform_dmg_task(self, assessment_instance): ] ) - block_batch_size = 5 - qnt_samples = [] - pg_batch = damage_model._get_pg_batches(block_batch_size) - batches = pg_batch.index.get_level_values(0).unique() - for PGB_i in batches: - PGB = pg_batch.loc[PGB_i] - capacity_sample, lsds_sample = damage_model._generate_dmg_sample( - sample_size, PGB - ) - EDP_req = damage_model._get_required_demand_type(PGB) - demand_dict = damage_model._assemble_required_demand_data(EDP_req) - ds_sample = damage_model._evaluate_damage_state( - demand_dict, EDP_req, capacity_sample, lsds_sample - ) - qnt_sample = damage_model._prepare_dmg_quantities( - PGB, ds_sample, dropzero=False - ) - qnt_samples.append(qnt_sample) - qnt_sample = pd.concat(qnt_samples, axis=1) - qnt_sample.sort_index(axis=1, inplace=True) + qnt_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0', '0'): [1.0, 1.0, 0.0], + ('CMP.A', '1', '1', '0', '1'): [0.0, 0.0, 1.0], + ('CMP.B', '1', '1', '0', '0'): [0.0, 0.0, 1.0], + ('CMP.B', '1', '1', '0', '1'): [1.0, 1.0, 0.0], + }, + ) + qnt_sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'ds'] before = qnt_sample.copy() dmg_process = {"1_CMP.B": {"DS1": "CMP.A_DS1"}} @@ -1434,8 +1411,12 @@ def test__perform_dmg_task(self, assessment_instance): damage_model._perform_dmg_task(task, qnt_sample) after = qnt_sample - assert ('CMP.A', '1', '1', '0', '1') not in before.columns - assert ('CMP.A', '1', '1', '0', '1') in after.columns + assert after.to_dict() == { + ('CMP.A', '1', '1', '0', '0'): {0: 0.0, 1: 0.0, 2: 0.0}, + ('CMP.A', '1', '1', '0', '1'): {0: 1.0, 1: 1.0, 2: 1.0}, + ('CMP.B', '1', '1', '0', '0'): {0: 0.0, 1: 0.0, 2: 1.0}, + ('CMP.B', '1', '1', '0', '1'): {0: 1.0, 1: 1.0, 2: 0.0}, + } def test__get_pg_batches_1(self, assessment_instance): damage_model = assessment_instance.damage From d59f85e6fc225d1fc966155e443a803bf15df8c2 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sun, 24 Mar 2024 09:47:39 -0700 Subject: [PATCH 37/48] Remove unnecessary pylint error suppression --- pelicun/uq.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pelicun/uq.py b/pelicun/uq.py index eeef4b3a9..67b51a66b 100644 --- a/pelicun/uq.py +++ b/pelicun/uq.py @@ -1210,7 +1210,6 @@ class CommonRandomVariable(BaseRandomVariable): Random variable that needs `values` in `inverse_transform` """ - # pylint: disable=super-init-not-called @abstractmethod def __init__( self, @@ -1279,7 +1278,6 @@ class SampleSizeRandomVariable(BaseRandomVariable): Random variable that needs `sample_size` in `inverse_transform` """ - # pylint: disable=super-init-not-called @abstractmethod def __init__( self, From 35f6145756be385b7a16c6150c70e0f559a5b1af Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sun, 24 Mar 2024 09:52:51 -0700 Subject: [PATCH 38/48] Remove `bldg` term from variables This merge was a bit messy, but I'll be more careful during the next one. --John --- pelicun/tools/DL_calculation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pelicun/tools/DL_calculation.py b/pelicun/tools/DL_calculation.py index eab06fd46..2eef2ab9a 100644 --- a/pelicun/tools/DL_calculation.py +++ b/pelicun/tools/DL_calculation.py @@ -1530,10 +1530,10 @@ def run_pelicun( loss_models, columns=['Repair'], index=drivers ) - elif bldg_repair_config['MapApproach'] == "User Defined": + elif repair_config['MapApproach'] == "User Defined": - if bldg_repair_config.get('MapFilePath', False) is not False: - loss_map_path = bldg_repair_config['MapFilePath'] + if repair_config.get('MapFilePath', False) is not False: + loss_map_path = repair_config['MapFilePath'] loss_map_path = loss_map_path.replace( 'CustomDLDataFolder', custom_dl_file_path) @@ -1733,7 +1733,7 @@ def run_pelicun( if loss_config is not None: if 'agg_repair' not in locals(): - agg_repair = PAL.bldg_repair.aggregate_losses() + agg_repair = PAL.repair.aggregate_losses() agg_repair_s = convert_to_SimpleIndex(agg_repair, axis=1) From b4e72aede90323da542de7c150134d39d94f07a8 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sun, 24 Mar 2024 10:09:36 -0700 Subject: [PATCH 39/48] Remove unnecessary lines from test --- pelicun/db.py | 12 ++++++------ pelicun/tests/test_model.py | 2 -- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pelicun/db.py b/pelicun/db.py index 381101597..86a18bf52 100644 --- a/pelicun/db.py +++ b/pelicun/db.py @@ -2810,12 +2810,12 @@ def create_Hazus_EQ_bldg_injury_db( with open(source_file, 'r', encoding='utf-8') as f: raw_data = json.load(f) - # parse the extra metadata file - if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: - frag_meta = json.load(f) - else: - frag_meta = {} + # # parse the extra metadata file + # if Path(meta_file).is_file(): + # with open(meta_file, 'r') as f: + # frag_meta = json.load(f) + # else: + # frag_meta = {} # prepare lists of labels for various building features building_types = list( diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index 83c3e0f24..197517554 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -1374,7 +1374,6 @@ def test__evaluate_damage_state_and_prepare_dmg_quantities( def test__perform_dmg_task(self, assessment_instance): damage_model = assessment_instance.damage - demand_model = assessment_instance.demand asset_model = assessment_instance.asset sample_size = 3 @@ -1403,7 +1402,6 @@ def test__perform_dmg_task(self, assessment_instance): }, ) qnt_sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'ds'] - before = qnt_sample.copy() dmg_process = {"1_CMP.B": {"DS1": "CMP.A_DS1"}} dmg_process = {key: dmg_process[key] for key in sorted(dmg_process)} From 6f7e87c307685981e0ec354510b7521e3e755e9f Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sun, 24 Mar 2024 10:10:48 -0700 Subject: [PATCH 40/48] Formatting changes --- pelicun/tools/DL_calculation.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pelicun/tools/DL_calculation.py b/pelicun/tools/DL_calculation.py index 2eef2ab9a..c95687bfe 100644 --- a/pelicun/tools/DL_calculation.py +++ b/pelicun/tools/DL_calculation.py @@ -366,7 +366,8 @@ def run_pelicun( with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) - custom_dl_file_path = custom_model_dir #f"{config['commonFileDir']}/CustomDLModels/" + # f"{config['commonFileDir']}/CustomDLModels/" + custom_dl_file_path = custom_model_dir DL_config = config.get('DL', None) if not DL_config: @@ -857,7 +858,9 @@ def run_pelicun( if asset_config.get('ComponentDatabasePath', False) is not False: extra_comps = asset_config['ComponentDatabasePath'] - extra_comps = extra_comps.replace('CustomDLDataFolder', custom_dl_file_path) + extra_comps = extra_comps.replace( + 'CustomDLDataFolder', custom_dl_file_path + ) component_db += [ extra_comps, @@ -988,11 +991,13 @@ def run_pelicun( adf.loc['irreparable', ('LS1', 'Theta_0')] = 1e10 adf.loc['irreparable', 'Incomplete'] = 0 - # TODO: we can improve this by creating a water network-specific assessment class + # TODO: we can improve this by creating a water + # network-specific assessment class if "Water" in asset_config['ComponentDatabase']: # add a placeholder aggregate fragility that will never trigger - # damage, but allow damage processes to aggregate the various pipeline damages + # damage, but allow damage processes to aggregate the + # various pipeline damages adf.loc['aggregate', ('Demand', 'Directional')] = 1 adf.loc['aggregate', ('Demand', 'Offset')] = 0 adf.loc['aggregate', ('Demand', 'Type')] = 'Peak Ground Velocity' @@ -1302,7 +1307,9 @@ def run_pelicun( if repair_config.get('ConsequenceDatabasePath', False) is not False: extra_comps = repair_config['ConsequenceDatabasePath'] - extra_comps = extra_comps.replace('CustomDLDataFolder', custom_dl_file_path) + extra_comps = extra_comps.replace( + 'CustomDLDataFolder', custom_dl_file_path + ) consequence_db += [ extra_comps, From 3079ff1127456674eed8dc0bb3802b82e12976e8 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Sun, 24 Mar 2024 11:22:12 -0700 Subject: [PATCH 41/48] manually merge `Hazus_Earthquake_IM.py` (2) --- pelicun/resources/auto/Hazus_Earthquake_IM.py | 158 ++++++++++++++---- 1 file changed, 128 insertions(+), 30 deletions(-) diff --git a/pelicun/resources/auto/Hazus_Earthquake_IM.py b/pelicun/resources/auto/Hazus_Earthquake_IM.py index 44cb2139d..5b38ca7c2 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_IM.py +++ b/pelicun/resources/auto/Hazus_Earthquake_IM.py @@ -119,7 +119,6 @@ def convertBridgeToHAZUSclass(AIM): seismic = (int(state) == 6 and int(yr_built) >= 1975) or ( int(state) != 6 and int(yr_built) >= 1990 ) - # Use a catch-all, other class by default bridge_class = "HWB28" @@ -374,13 +373,13 @@ def auto_populate(AIM): FG_GF_H = f'GF.H.{foundation_type}' FG_GF_V = f'GF.V.{foundation_type}' - CMP_GF = pd.DataFrame( - { - f'{FG_GF_H}': ['ea', 1, 1, 1, 'N/A'], - f'{FG_GF_V}': ['ea', 1, 3, 1, 'N/A'], - }, - index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], - ).T + # fmt: off + CMP_GF = pd.DataFrame( # noqa + {f'{FG_GF_H}':[ 'ea', 1, 1, 1, 'N/A'], # noqa + f'{FG_GF_V}':[ 'ea', 1, 3, 1, 'N/A']}, # noqa + index = [ 'Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa + # fmt: on CMP = pd.concat([CMP, CMP_GF], axis=0) @@ -635,26 +634,14 @@ def auto_populate(AIM): location_string = '1' # Define performance model - CMP = pd.DataFrame( - { - f'PWP.{pipe_flexibility}.GS': [ - 'ea', - location_string, - '0', - 1, - 'N/A', - ], - f'PWP.{pipe_flexibility}.GF': [ - 'ea', - location_string, - '0', - 1, - 'N/A', - ], - 'aggregate': ['ea', location_string, '0', 1, 'N/A'], - }, - index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], - ).T + # fmt: off + CMP = pd.DataFrame( # noqa + {f'PWP.{pipe_flexibility}.GS': ['ea', location_string, '0', 1, 'N/A'], # noqa + f'PWP.{pipe_flexibility}.GF': ['ea', location_string, '0', 1, 'N/A'], # noqa + f'aggregate': ['ea', location_string, '0', 1, 'N/A']}, # noqa + index = ['Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa + # fmt: on # Set up the demand cloning configuration for the pipe # segments, if required. @@ -696,8 +683,8 @@ def auto_populate(AIM): "ComponentDatabase": "Hazus Earthquake - Water", "Material Flexibility": pipe_flexibility, "PlanArea": "1", # Sina: does not make sense for water. - # Kept it here since itw as also - # kept here for Transportation + # Kept it here since itw as also + # kept here for Transportation }, "Damage": { "DamageProcess": "User Defined", @@ -705,6 +692,117 @@ def auto_populate(AIM): }, "Demands": demand_config, } + + elif wdn_element_type == "Tank": + + tank_cmp_lines = { + ("OG", "C", 1): {'PST.G.C.A.GS': ['ea', 1, 1, 1, 'N/A']}, + ("OG", "C", 0): {'PST.G.C.U.GS': ['ea', 1, 1, 1, 'N/A']}, + ("OG", "S", 1): {'PST.G.S.A.GS': ['ea', 1, 1, 1, 'N/A']}, + ("OG", "S", 0): {'PST.G.S.U.GS': ['ea', 1, 1, 1, 'N/A']}, + # Anchored status and Wood is not defined for On Ground tanks + ("OG", "W", 0): {'PST.G.W.GS': ['ea', 1, 1, 1, 'N/A']}, + # Anchored status and Steel is not defined for Above Ground tanks + ("AG", "S", 0): {'PST.A.S.GS': ['ea', 1, 1, 1, 'N/A']}, + # Anchored status and Concrete is not defined for Buried tanks. + ("B", "C", 0): {'PST.B.C.GF': ['ea', 1, 1, 1, 'N/A']}, + } + + # The default values are assumed: material = Concrete (C), + # location= On Ground (OG), and Anchored = 1 + tank_material = GI_ap.get("material", "C") + tank_location = GI_ap.get("location", "OG") + tank_anchored = GI_ap.get("anchored", int(1)) + + tank_material_allowable = {"C", "S"} + if tank_material not in tank_material_allowable: + raise ValueError( + f"Tank's material = \"{tank_material}\" is \ + not allowable in tank {asset_name}. The \ + material must be either C for concrete or S \ + for steel." + ) + + tank_location_allowable = {"AG", "OG", "B"} + if tank_location not in tank_location_allowable: + raise ValueError( + f"Tank's location = \"{tank_location}\" is \ + not allowable in tank {asset_name}. The \ + location must be either \"AG\" for Above \ + ground, \"OG\" for On Ground or \"BG\" for \ + Bellow Ground (burried) Tanks." + ) + + tank_anchored_allowable = {int(0), int(1)} + if tank_anchored not in tank_anchored_allowable: + raise ValueError( + f"Tank's anchored status = \"{tank_location}\ + \" is not allowable in tank {asset_name}. \ + The anchored status must be either integer\ + value 0 for unachored, or 1 for anchored" + ) + + if tank_location == "AG" and tank_material == "C": + print( + f"The tank {asset_name} is Above Ground (i.e., AG), but \ + the material type is Concrete (\"C\"). Tank type \"C\" is not \ + defiend for AG tanks. The tank is assumed to be Steel (\"S\")" + ) + tank_material = "S" + + if tank_location == "AG" and tank_material == "W": + print( + f"The tank {asset_name} is Above Ground (i.e., AG), but \ + the material type is Wood (\"W\"). Tank type \"W\" is not \ + defiend for AG tanks. The tank is assumed to be Steel (\"S\")" + ) + tank_material = "S" + + if tank_location == "B" and tank_material == "S": + print( + f"The tank {asset_name} is burried (i.e., B), but the\ + material type is Steel (\"S\"). \ + Tank type \"S\" is not defiend for\ + B tanks. The tank is assumed to be Concrete (\"C\")" + ) + tank_material = "C" + + if tank_location == "B" and tank_material == "W": + print( + f"The tank {asset_name} is burried (i.e., B), but the\ + material type is Wood (\"W\"). Tank type \"W\" is not defiend \ + for B tanks. The tank is assumed to be Concrete (\"C\")" + ) + tank_material = "C" + + if tank_anchored == 1: + # Since anchore status does nto matter, there is no need to + # print a warning + tank_anchored = 0 + + cur_tank_cmp_line = tank_cmp_lines[ + (tank_location, tank_material, tank_anchored) + ] + + CMP = pd.DataFrame( + cur_tank_cmp_line, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], + ).T + + DL_ap = { + "Asset": { + "ComponentAssignmentFile": "CMP_QNT.csv", + "ComponentDatabase": "Hazus Earthquake - Water", + "Material": tank_material, + "Location": tank_location, + "Anchored": tank_anchored, + "PlanArea": "1", # Sina: does not make sense for water. + # Kept it here since itw as also kept here for Transportation + }, + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, + } + else: print( f"Water Distribution network element type {wdn_element_type} " From 2def079aea78b8c81c6a897e28f7eb5f290f0e91 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Mon, 25 Mar 2024 16:47:34 -0700 Subject: [PATCH 42/48] `-LOC` damage process, refactor damage model - Implemented a -LOC keyword for location-specific damage processes. - Redesigned damage evaluation to use a damage states when applying damage processes for more intuitive logic and to reduce if/else calls. - Modified the unit tests to align with the new approach and extended them to cover the -LOC damage process. - Updated loss estimation to bypass calculations if no damage is present, *subject to review*. --- pelicun/model/damage_model.py | 474 +++++++++--------- pelicun/model/loss_model.py | 12 + .../CMP_marginals_2.csv | 5 + pelicun/tests/test_model.py | 162 ++++-- 4 files changed, 375 insertions(+), 278 deletions(-) create mode 100755 pelicun/tests/data/model/test_DamageModel_perform_dmg_task/CMP_marginals_2.csv diff --git a/pelicun/model/damage_model.py b/pelicun/model/damage_model.py index 9c3d73b61..a2fdc817b 100644 --- a/pelicun/model/damage_model.py +++ b/pelicun/model/damage_model.py @@ -49,13 +49,12 @@ """ -from copy import deepcopy import numpy as np import pandas as pd -from .pelicun_model import PelicunModel -from .. import base -from .. import uq -from .. import file_io +from pelicun.model.pelicun_model import PelicunModel +from pelicun import base +from pelicun import uq +from pelicun import file_io idx = base.idx @@ -894,8 +893,12 @@ def _evaluate_damage_state( # initialize the DataFrames that store the damage states and # quantities - ds_sample = capacity_sample.groupby(level=[0, 1, 2, 3, 4], axis=1).first() - ds_sample.loc[:, :] = np.zeros(ds_sample.shape, dtype=int) + ds_sample = pd.DataFrame( + 0, # fill value + columns=capacity_sample.columns.droplevel('ls').unique(), + index=capacity_sample.index, + dtype='int32', + ) # get a list of limit state ids among all components in the damage model ls_list = dmg_eval.columns.get_level_values(5).unique() @@ -932,7 +935,7 @@ def _evaluate_damage_state( return ds_sample - def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): + def _prepare_dmg_quantities(self, component_blocks, ds_sample, dropzero=True): """ Combine component quantity and damage state information in one DataFrame. @@ -943,8 +946,8 @@ def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): Parameters ---------- - PGB: DataFrame - A DataFrame that contains the Block identifier for each + component_blocks: DataFrame + A DataFrame that contains the number of blocks for each component. ds_sample: DataFrame A DataFrame that assigns a damage state to each component @@ -961,114 +964,88 @@ def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): """ - # Log a message indicating that the calculation of damage - # quantities is starting if self._asmnt.log.verbose: self.log_msg('Calculating damage quantities...', prepend_timestamp=True) - # Store the damage state sample as a local variable - dmg_ds = ds_sample - - # Retrieve the component quantity information from the asset - # model - cmp_qnt = self._asmnt.asset.cmp_sample # .values - # Retrieve the component marginal parameters from the asset - # model - cmp_params = self._asmnt.asset.cmp_marginal_params + # Retrieve the component quantity information and component + # marginal parameters from the asset model - # Combine the component quantity information for the columns - # in the damage state sample - dmg_qnt = pd.concat( - [cmp_qnt[PG[:4]] for PG in dmg_ds.columns], axis=1, keys=dmg_ds.columns - ) + # ('cmp', 'loc', 'dir', 'uid') -> component quantity series + component_quantities = self._asmnt.asset.cmp_sample.to_dict('series') + component_marginal_parameters = self._asmnt.asset.cmp_marginal_params - # Initialize a list to store the block weights - block_weights = [] + if (component_marginal_parameters is not None) and ( + 'Blocks' in component_marginal_parameters.columns + ): + # if this information is available, use it - # For each component in the list of PG blocks - for PG in PGB.index: - # Set the number of blocks to 1, unless specified - # otherwise in the component marginal parameters - blocks = 1 - if cmp_params is not None: - if 'Blocks' in cmp_params.columns: - blocks = cmp_params.loc[PG, 'Blocks'] - - # Calculate the weights as the reciprocal of the number of - # blocks - blocks_array = np.full(int(blocks), 1.0 / blocks) - block_weights += blocks_array.tolist() - - # Broadcast the block weights to match the shape of the damage - # quantity DataFrame - block_weights = np.broadcast_to( - block_weights, (dmg_qnt.shape[0], len(block_weights)) - ) + # ('cmp', 'loc', 'dir', 'uid) -> number of blocks + num_blocks = component_marginal_parameters['Blocks'].to_dict() - # Multiply the damage quantities by the block weights - dmg_qnt *= block_weights - - # Get the unique damage states from the damage state sample - # Note that these might be fewer than all possible Damage - # States - ds_list = np.unique(dmg_ds.values) - # Filter out any NaN values from the list of damage states - ds_list = ds_list[pd.notna(ds_list)].astype(int) - - # If the dropzero option is True, remove the zero damage state - # from the list of damage states - if dropzero: - ds_list = ds_list[ds_list != 0] - - # Only proceed with the calculation if there is at least one - # damage state in the list - if len(ds_list) > 0: - # Create a list of DataFrames, where each DataFrame stores - # the damage quantities for a specific damage state - res_list = [ - pd.DataFrame( - np.where(dmg_ds == ds_i, dmg_qnt, 0), - columns=dmg_ds.columns, - index=dmg_ds.index, - ) - for ds_i in ds_list - ] + def get_num_blocks(key): + return float(num_blocks[key]) - # Combine the damage quantity DataFrames into a single - # DataFrame - res_df = pd.concat( - res_list, axis=1, keys=[f'{ds_i:g}' for ds_i in ds_list] - ) - res_df.columns.names = ['ds', *res_df.columns.names[1::]] - # remove the block level from the columns - res_df.columns = res_df.columns.reorder_levels([1, 2, 3, 4, 0, 5]) - res_df = res_df.groupby(level=[0, 1, 2, 3, 4], axis=1).sum() + else: + # otherwise assume 1 block regardless of + # ('cmp', 'loc', 'dir', 'uid) key + def get_num_blocks(key): + return 1.00 + + # ('cmp', 'loc', 'dir', 'uid', 'block') -> damage state series + ds_sample_dict = ds_sample.to_dict('series') + + dmg_qnt_series_collection = {} + for key, ds_series in ds_sample_dict.items(): + component, location, direction, uid, block = key + ds_set = set(ds_series.values) + for ds in ds_set: + if ds == -1: + continue + if dropzero and ds == 0: + continue + else: + dmg_qnt_vals = np.where( + ds_series.values == ds, + component_quantities[ + component, location, direction, uid + ].values + / get_num_blocks((component, location, direction, uid)), + 0.00, + ) + if -1 in ds_set: + dmg_qnt_vals = np.where( + ds_series.values != -1, dmg_qnt_vals, np.nan + ) + dmg_qnt_series = pd.Series(dmg_qnt_vals) + dmg_qnt_series_collection[ + (component, location, direction, uid, block, str(ds)) + ] = dmg_qnt_series + + damage_quantities = pd.concat( + dmg_qnt_series_collection.values(), + axis=1, + keys=dmg_qnt_series_collection.keys(), + ) + damage_quantities.columns.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ds'] - # The damage states with no damaged quantities are dropped - # Note that some of these are not even valid DSs at the given PG - res_df = res_df.iloc[:, np.where(res_df.sum(axis=0) != 0)[0]] + # sum up block quantities + damage_quantities = damage_quantities.groupby( + ['cmp', 'loc', 'dir', 'uid', 'ds'], axis=1 + ).sum() - return res_df + return damage_quantities - def _perform_dmg_task(self, task, qnt_sample): + def _perform_dmg_task(self, task, ds_sample): """ Perform a task from a damage process. The method performs a task from a damage process on a given - quantity sample. The method first checks if the source - component specified in the task exists among the available - components in the quantity sample. If the source component is - not found, a warning message is logged and the method returns - the original quantity sample unchanged. Otherwise, the method - executes the events specified in the task. The events can be - triggered by a limit state exceedance or a damage state - occurrence. If the event is triggered by a damage state, the - method moves all quantities of the target component(s) into - the target damage state in pre-selected realizations. If the - target event is "NA", the method removes quantity information - from the target components in the pre-selected - realizations. After executing the events, the method returns - the updated quantity sample. + damage state sample. The events of the task are triggered by a + damage state occurrence. The method assigns target + component(s) into the target damage state based on the damage + state of the source component. If the target event is "NA", + the method removes damage state information from the target + components. Parameters ---------- @@ -1076,50 +1053,46 @@ def _perform_dmg_task(self, task, qnt_sample): A list representing a task from the damage process. The list contains two elements: - The first element is a string representing the source - component, e.g., `'CMP_A'`. + component, e.g., `'1_CMP_A'`. The number in the beginning + is used to order the tasks and is not considered here. - The second element is a dictionary representing the events triggered by the damage state of the source component. The keys of the dictionary are strings that represent the damage state of the source component, e.g., `'DS1'`. The values are lists of strings representing the target component(s) and event(s), e.g., - `['CMP_B', 'CMP_C']`. - qnt_sample : pandas DataFrame - A DataFrame representing the quantities of the components - in the damage sample. It is modified in place to represent - the quantities of the components in the damage sample - after the task has been performed. - - Raises - ------ - ValueError - If the source component is not found among the components - in the damage sample - ValueError - If the source event is not a limit state (LS) or damage - state (DS) - ValueError - If the target event is not a limit state (LS), damage - state (DS), or not available (NA) - ValueError - If the target event is a limit state (LS) + `['CMP_B.DS1', 'CMP_C.DS1']`. They could also be a + single element instead of a list. + Examples of a task: + ['1_CMP.A', {'DS1': ['CMP.B_DS1', 'CMP.C_DS2']}] + ['1_CMP.A', {'DS1': 'CMP.B_DS1', 'DS2': 'CMP.B_DS2'}] + ['1_CMP.A-LOC', {'DS1': 'CMP.B_DS1'}] + ds_sample : pandas DataFrame + A DataFrame representing the damage state of the + components. It is modified in place to represent the + damage states of the components after the task has been + performed. """ if self._asmnt.log.verbose: - self.log_msg('Applying task...', prepend_timestamp=True) + self.log_msg(f'Applying task {task}...', prepend_timestamp=True) - # get the list of available components - cmp_list = qnt_sample.columns.get_level_values(0).unique().tolist() + # parse task + source_cmp = task[0].split('_')[1] # source component + events = task[1] # prescribed events - # get the component quantities - cmp_qnt = self._asmnt.asset.cmp_sample - - # get the source component - source_cmp = task[0].split('_')[1] + # check for the `-LOC` suffix. If this is the case, we need to + # match locations. + if source_cmp.endswith('-LOC'): + source_cmp = source_cmp.replace('-LOC', '') + match_locations = True + else: + match_locations = False - # check if it exists among the available ones - if source_cmp not in cmp_list: + # check if the source component exists in the damage state + # dataframe + if source_cmp not in ds_sample.columns.get_level_values('cmp'): self.log_msg( f"WARNING: Source component {source_cmp} in the prescribed " "damage process not found among components in the damage " @@ -1127,119 +1100,58 @@ def _perform_dmg_task(self, task, qnt_sample): "skipped.", prepend_timestamp=False, ) - return - # get the damage quantities for the source component - source_cmp_df = qnt_sample.loc[:, source_cmp] - - # execute the prescribed events - for source_event, target_infos in task[1].items(): - # events triggered by limit state exceedance - if source_event.startswith('LS'): - # ls_i = int(source_event[2:]) - # TODO: implement source LS support - raise ValueError('LS not supported yet.') - - # events triggered by damage state occurrence - if source_event.startswith('DS'): - # get the ID of the damage state that triggers the event - ds_list = [ - source_event[2:], - ] - - # if we are only looking for a single DS - if len(ds_list) == 1: - ds_target = ds_list[0] - - # get the realizations with non-zero quantity of the target DS - source_ds_vals = source_cmp_df.groupby(level=[3], axis=1).max() - - if ds_target in source_ds_vals.columns: - source_ds_vals = source_ds_vals[ds_target] - source_mask = source_cmp_df.loc[source_ds_vals > 0.0].index - else: - # if tge source_cmp is not in ds_target in any of the - # realizations, the prescribed event is not triggered - continue - - else: - pass # TODO: implement multiple DS support + # execute the events pres prescribed in the damage task + for source_event, target_infos in events.items(): - else: + # events can only be triggered by damage state occurrence + if not source_event.startswith('DS'): raise ValueError( f"Unable to parse source event in damage " f"process: {source_event}" ) + # get the ID of the damage state that triggers the event + ds_source = int(source_event[2:]) - # get the information about the events - target_infos = np.atleast_1d(target_infos) + # turn the target_infos into a list if it is a single + # argument, for consistency + if not isinstance(target_infos, list): + target_infos = [target_infos] - # for each event for target_info in target_infos: + # get the target component and event type target_cmp, target_event = target_info.split('_') - # ALL means all, but the source component - if target_cmp == 'ALL': - # copy the list of available components - target_cmp = deepcopy(cmp_list) - - # remove the source component - if source_cmp in target_cmp: - target_cmp.remove(source_cmp) - - # otherwise we target a specific component - elif target_cmp in cmp_list: - target_cmp = [ - target_cmp, - ] - - # trigger a limit state - if target_event.startswith('LS'): - # ls_i = int(target_event[2:]) - # TODO: implement target LS support - raise ValueError('LS not supported yet.') + if (target_cmp != 'ALL') and ( + target_cmp not in ds_sample.columns.get_level_values('cmp') + ): + self.log_msg( + f"WARNING: Target component {target_cmp} in the prescribed " + "damage process not found among components in the damage " + "sample. The corresponding part of the damage process is " + "skipped.", + prepend_timestamp=False, + ) + continue # trigger a damage state if target_event.startswith('DS'): - # get the target damage state ID - ds_i = target_event[2:] - - # move all quantities of the target component(s) into the - # target damage state in the pre-selected realizations - qnt_sample.loc[source_mask, target_cmp] = 0.0 - - for target_cmp_i in target_cmp: - locs = cmp_qnt[target_cmp_i].columns.get_level_values(0) - dirs = cmp_qnt[target_cmp_i].columns.get_level_values(1) - uids = cmp_qnt[target_cmp_i].columns.get_level_values(2) - for loc, direction, uid in zip(locs, dirs, uids): - # because we cannot be certain that ds_i had been - # triggered earlier, we have to add this damage - # state manually for each PG of each component, if needed - if ( - ds_i - not in qnt_sample[ - (target_cmp_i, loc, direction, uid) - ].columns - ): - qnt_sample[ - (target_cmp_i, loc, direction, uid, ds_i) - ] = 0.0 - - qnt_sample.loc[ - source_mask, - (target_cmp_i, loc, direction, uid, ds_i), - ] = cmp_qnt.loc[ - source_mask, (target_cmp_i, loc, direction, uid) - ].values - - # clear all damage information + + # get the ID of the damage state to switch the target + # components to + ds_target = int(target_event[2:]) + + # clear damage state information elif target_event == 'NA': - # remove quantity information from the target components - # in the pre-selected realizations - qnt_sample.loc[source_mask, target_cmp] = np.nan + if match_locations: + raise ValueError( + 'Invalid damage task configuration. Cannot match ' + 'locations when the target event is set to NA.' + ) + ds_target = -1 + # -1 stands for nan (ints don'ts support nan) else: raise ValueError( @@ -1247,11 +1159,79 @@ def _perform_dmg_task(self, task, qnt_sample): f"process: {target_event}" ) + if match_locations: + self._perform_dmg_event_loc( + ds_sample, source_cmp, ds_source, target_cmp, ds_target + ) + + else: + self._perform_dmg_event( + ds_sample, source_cmp, ds_source, target_cmp, ds_target + ) + if self._asmnt.log.verbose: self.log_msg( 'Damage process task successfully applied.', prepend_timestamp=False ) + def _perform_dmg_event( + self, ds_sample, source_cmp, ds_source, target_cmp, ds_target + ): + """ + Perform a damage event. + See `_perform_dmg_task`. + + """ + + # affected rows + row_selection = np.where( + # for many instances of source_cmp, we + # consider the highest damage state + ds_sample[source_cmp].max(axis=1).values + == ds_source + )[0] + # affected columns + if target_cmp == 'ALL': + column_selection = np.where( + ds_sample.columns.get_level_values('cmp') != source_cmp + )[0] + else: + column_selection = np.where( + ds_sample.columns.get_level_values('cmp') == target_cmp + )[0] + ds_sample.iloc[row_selection, column_selection] = ds_target + + def _perform_dmg_event_loc( + self, ds_sample, source_cmp, ds_source, target_cmp, ds_target + ): + """ + Perform a damage event matching locations. + See `_perform_dmg_task`. + + """ + + # get locations of source component + source_locs = set(ds_sample[source_cmp].columns.get_level_values('loc')) + for loc in source_locs: + # apply damage task matching locations + row_selection = np.where( + # for many instances of source_cmp, we + # consider the highest damage state + ds_sample[source_cmp, loc].max(axis=1).values + == ds_source + )[0] + + # affected columns + if target_cmp == 'ALL': + raise ValueError('Cannot combine `-LOC` with `ALL` keywords') + column_selection = np.where( + np.logical_and( + ds_sample.columns.get_level_values('cmp') == target_cmp, + ds_sample.columns.get_level_values('loc') == loc, + ) + )[0] + ds_sample.iloc[row_selection, column_selection] = ds_target + def _get_pg_batches(self, block_batch_size): """ Group performance groups into batches for efficient damage assessment. @@ -1508,8 +1488,6 @@ def calculate( # computing. # get the list of performance groups - qnt_samples = [] - self.log_msg( f'Number of Performance Groups in Asset Model:' f' {self._asmnt.asset.cmp_sample.shape[1]}', @@ -1531,12 +1509,14 @@ def calculate( ) # for PG_i in self._asmnt.asset.cmp_sample.columns: + ds_samples = [] for PGB_i in batches: - PGB = pg_batch.loc[PGB_i] + + component_blocks = pg_batch.loc[PGB_i] self.log_msg( f"Calculating damage for PG batch {PGB_i} with " - f"{int(PGB['Blocks'].sum())} blocks" + f"{int(component_blocks['Blocks'].sum())} blocks" ) # Generate an array with component capacities for each block and @@ -1544,11 +1524,11 @@ def calculate( # each component limit state. The latter is primarily needed to # handle limit states with multiple, mutually exclusive DS options capacity_sample, lsds_sample = self._generate_dmg_sample( - sample_size, PGB, scaling_specification + sample_size, component_blocks, scaling_specification ) # Get the required demand types for the analysis - EDP_req = self._get_required_demand_type(PGB) + EDP_req = self._get_required_demand_type(component_blocks) # Create the demand vector demand_dict = self._assemble_required_demand_data(EDP_req) @@ -1557,36 +1537,30 @@ def calculate( ds_sample = self._evaluate_damage_state( demand_dict, EDP_req, capacity_sample, lsds_sample ) - qnt_sample = self._prepare_dmg_quantities(PGB, ds_sample, dropzero=False) - qnt_samples.append(qnt_sample) - - qnt_sample = pd.concat(qnt_samples, axis=1) - - # Create a comprehensive table with all possible DSs to have a robust - # input for the damage processes evaluation below - qnt_sample = self._complete_ds_cols(qnt_sample) - qnt_sample.sort_index(axis=1, inplace=True) + ds_samples.append(ds_sample) + ds_sample = pd.concat(ds_samples, axis=1) self.log_msg("Raw damage calculation successful.", prepend_timestamp=False) # Apply the prescribed damage process, if any if dmg_process is not None: self.log_msg("Applying damage processes...") - # sort the processes + # Sort the damage processes tasks dmg_process = {key: dmg_process[key] for key in sorted(dmg_process)} + # Perform damage tasks in the sorted order for task in dmg_process.items(): - self._perform_dmg_task(task, qnt_sample) + self._perform_dmg_task(task, ds_sample) self.log_msg( "Damage processes successfully applied.", prepend_timestamp=False ) - # If requested, remove columns with no damage from the sample - if self._asmnt.options.list_all_ds is False: - qnt_sample = qnt_sample.iloc[:, np.where(qnt_sample.sum(axis=0) != 0)[0]] + qnt_sample = self._prepare_dmg_quantities( + pg_batch.reset_index('Batch', drop=True), ds_sample, dropzero=False + ) self.sample = qnt_sample diff --git a/pelicun/model/loss_model.py b/pelicun/model/loss_model.py index 16cbce25b..eb0b2d4de 100644 --- a/pelicun/model/loss_model.py +++ b/pelicun/model/loss_model.py @@ -758,6 +758,9 @@ def aggregate_losses(self): DV = self.sample + if DV is None: + return + # group results by DV type and location DVG = DV.groupby(level=[0, 4], axis=1).sum() @@ -866,6 +869,15 @@ def _generate_DV_sample(self, dmg_quantities, sample_size): # calculate the quantities for economies of scale self.log_msg("\nAggregating damage quantities...", prepend_timestamp=False) + # If everything is undamaged there are no losses + if set(dmg_quantities.columns.get_level_values('ds')) == {'0'}: + self._sample = None + self.log_msg( + "There is no damage---DV sample is set to None.", + prepend_timestamp=False, + ) + return + if self._asmnt.options.eco_scale["AcrossFloors"]: if self._asmnt.options.eco_scale["AcrossDamageStates"]: eco_levels = [ diff --git a/pelicun/tests/data/model/test_DamageModel_perform_dmg_task/CMP_marginals_2.csv b/pelicun/tests/data/model/test_DamageModel_perform_dmg_task/CMP_marginals_2.csv new file mode 100755 index 000000000..46aed067a --- /dev/null +++ b/pelicun/tests/data/model/test_DamageModel_perform_dmg_task/CMP_marginals_2.csv @@ -0,0 +1,5 @@ +,Units,Location,Direction,Theta_0,Blocks +CMP.A,ea,1,1,1,1 +CMP.A,ea,2,1,1,1 +CMP.B,ea,1,2,1,1 +CMP.B,ea,2,2,1,1 diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index 197517554..51745c0d3 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -1077,7 +1077,7 @@ def test_save_load_sample(self, damage_model_with_sample, assessment_instance): _, units_from_variable = damage_model_with_sample.save_sample( save_units=True ) - assert units_from_variable.to_list() == ['ea'] * 20 + assert np.all(units_from_variable.to_numpy() == 'ea') def test_load_damage_model(self, damage_model_model_loaded): # should no longer be None @@ -1373,47 +1373,153 @@ def test__evaluate_damage_state_and_prepare_dmg_quantities( assert list(qnt_sample.columns)[0] == ('B.10.31.001', '2', '2', '0', '0') def test__perform_dmg_task(self, assessment_instance): + + x = assessment.Assessment() + x.log.verbose = False + assessment_instance = x + damage_model = assessment_instance.damage - asset_model = assessment_instance.asset - sample_size = 3 + # + # when CMP.B reaches DS1, CMP.A should be DS4 + # - cmp_marginals = pd.read_csv( - 'pelicun/tests/data/model/' - 'test_DamageModel_perform_dmg_task/CMP_marginals.csv', - index_col=0, + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 0], + ('CMP.A', '1', '1', '1'): [0, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 1], + ('CMP.B', '1', '1', '1'): [1, 0, 0], + }, + dtype='int32', ) - asset_model.load_cmp_model({'marginals': cmp_marginals}) - asset_model.generate_cmp_sample(sample_size) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] - damage_model.load_damage_model( - [ - 'pelicun/tests/data/model/' - 'test_DamageModel_perform_dmg_task/fragility_DB_test.csv' - ] + dmg_process = {"1_CMP.B": {"DS1": "CMP.A_DS4"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: 4, 1: 0, 2: 4}, + ('CMP.A', '1', '1', '1'): {0: 4, 1: 0, 2: 4}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.B', '1', '1', '1'): {0: 1, 1: 0, 2: 0}, + } + + # + # when CMP.B reaches DS1, CMP.A should be NA (-1) + # + + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 0], + ('CMP.A', '1', '1', '1'): [0, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 1], + ('CMP.B', '1', '1', '1'): [1, 0, 0], + }, + dtype='int32', ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + dmg_process = {"1_CMP.B": {"DS1": "CMP.A_NA"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: -1, 1: 0, 2: -1}, + ('CMP.A', '1', '1', '1'): {0: -1, 1: 0, 2: -1}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.B', '1', '1', '1'): {0: 1, 1: 0, 2: 0}, + } + + # + # `-LOC` keyword + # when CMP.B reaches DS1, CMP.A should be DS4 + # matching locations + # + + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 0], + ('CMP.A', '2', '1', '0'): [0, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 1], + ('CMP.B', '2', '1', '0'): [1, 0, 0], + }, + dtype='int32', + ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + dmg_process = {"1_CMP.B-LOC": {"DS1": "CMP.A_DS4"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: 0, 1: 0, 2: 4}, + ('CMP.A', '2', '1', '0'): {0: 4, 1: 0, 2: 0}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.B', '2', '1', '0'): {0: 1, 1: 0, 2: 0}, + } + + # + # ALL keyword + # + # Whenever CMP.A reaches DS1, all other components should be + # set to DS2. + # + + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [1, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 0], + ('CMP.C', '1', '1', '0'): [0, 0, 0], + ('CMP.D', '1', '1', '0'): [0, 0, 0], + }, + dtype='int32', + ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + dmg_process = {"1_CMP.A": {"DS1": "ALL_DS2"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: 1, 1: 0, 2: 0}, + ('CMP.B', '1', '1', '0'): {0: 2, 1: 0, 2: 0}, + ('CMP.C', '1', '1', '0'): {0: 2, 1: 0, 2: 0}, + ('CMP.D', '1', '1', '0'): {0: 2, 1: 0, 2: 0}, + } + + # + # NA keyword + # + # NA translates to -1 representing nan + # - qnt_sample = pd.DataFrame( + ds_sample = pd.DataFrame( { - ('CMP.A', '1', '1', '0', '0'): [1.0, 1.0, 0.0], - ('CMP.A', '1', '1', '0', '1'): [0.0, 0.0, 1.0], - ('CMP.B', '1', '1', '0', '0'): [0.0, 0.0, 1.0], - ('CMP.B', '1', '1', '0', '1'): [1.0, 1.0, 0.0], + ('CMP.A', '1', '1', '0'): [0, 0, 0], + ('CMP.A', '1', '1', '1'): [0, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 1], + ('CMP.B', '1', '1', '1'): [1, 0, 0], }, + dtype='int32', ) - qnt_sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'ds'] + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] - dmg_process = {"1_CMP.B": {"DS1": "CMP.A_DS1"}} - dmg_process = {key: dmg_process[key] for key in sorted(dmg_process)} + dmg_process = {"1_CMP.B": {"DS1": "CMP.A_NA"}} for task in dmg_process.items(): - damage_model._perform_dmg_task(task, qnt_sample) - after = qnt_sample + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample assert after.to_dict() == { - ('CMP.A', '1', '1', '0', '0'): {0: 0.0, 1: 0.0, 2: 0.0}, - ('CMP.A', '1', '1', '0', '1'): {0: 1.0, 1: 1.0, 2: 1.0}, - ('CMP.B', '1', '1', '0', '0'): {0: 0.0, 1: 0.0, 2: 1.0}, - ('CMP.B', '1', '1', '0', '1'): {0: 1.0, 1: 1.0, 2: 0.0}, + ('CMP.A', '1', '1', '0'): {0: -1, 1: 0, 2: -1}, + ('CMP.A', '1', '1', '1'): {0: -1, 1: 0, 2: -1}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.B', '1', '1', '1'): {0: 1, 1: 0, 2: 0}, } def test__get_pg_batches_1(self, assessment_instance): From 9d5233beee5bc20dd534eccd03a117e5f5f6013d Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Thu, 28 Mar 2024 06:20:50 -0700 Subject: [PATCH 43/48] Use `convert_units` for pipe segment calculation --- pelicun/resources/auto/Hazus_Earthquake_IM.py | 26 ++++++------------- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/pelicun/resources/auto/Hazus_Earthquake_IM.py b/pelicun/resources/auto/Hazus_Earthquake_IM.py index 5b38ca7c2..266132cff 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_IM.py +++ b/pelicun/resources/auto/Hazus_Earthquake_IM.py @@ -606,28 +606,18 @@ def auto_populate(AIM): # segments. # Determine number of segments - with open( - os.path.join( - os.path.dirname(pelicun.__file__), 'settings/default_units.json' - ), - 'r', - encoding='utf-8', - ) as f: - units = json.load(f) + pipe_length_unit = GI_ap['units']['length'] - pipe_length_unit_factor = units['length'][pipe_length_unit] - pipe_length_in_base_unit = pipe_length * pipe_length_unit_factor - reference_length_in_base_unit = 20.00 * units['length']['ft'] - if pipe_length_in_base_unit % reference_length_in_base_unit < 1e-2: + pipe_length_feet = pelicun.base.convert_units( + pipe_length, unit=pipe_length_unit, to_unit='ft', category='length' + ) + reference_length = 20.00 # 20 ft + if pipe_length_feet % reference_length < 1e-2: # If the lengths are equal, then that's one segment, not two. - num_segments = int( - pipe_length_in_base_unit / reference_length_in_base_unit - ) + num_segments = int(pipe_length_feet / reference_length) else: # In all other cases, round up. - num_segments = ( - int(pipe_length_in_base_unit / reference_length_in_base_unit) + 1 - ) + num_segments = int(pipe_length_feet / reference_length) + 1 if num_segments > 1: location_string = f'1--{num_segments}' else: From 4bb6f5bd65c8699730afe63446f15d28ea2e1b67 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Thu, 28 Mar 2024 06:22:10 -0700 Subject: [PATCH 44/48] Update imports and formatting --- pelicun/resources/auto/Hazus_Earthquake_IM.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pelicun/resources/auto/Hazus_Earthquake_IM.py b/pelicun/resources/auto/Hazus_Earthquake_IM.py index 266132cff..308f998b7 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_IM.py +++ b/pelicun/resources/auto/Hazus_Earthquake_IM.py @@ -36,7 +36,6 @@ # # Contributors: # Adam Zsarnóczay -import os import json import pandas as pd import pelicun @@ -628,7 +627,7 @@ def auto_populate(AIM): CMP = pd.DataFrame( # noqa {f'PWP.{pipe_flexibility}.GS': ['ea', location_string, '0', 1, 'N/A'], # noqa f'PWP.{pipe_flexibility}.GF': ['ea', location_string, '0', 1, 'N/A'], # noqa - f'aggregate': ['ea', location_string, '0', 1, 'N/A']}, # noqa + 'aggregate': ['ea', location_string, '0', 1, 'N/A']}, # noqa index = ['Units','Location','Direction','Theta_0','Family'] # noqa ).T # noqa # fmt: on From 5a5fa077b4a84c90d481dc6e05db33edcf9c9273 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Thu, 28 Mar 2024 06:22:34 -0700 Subject: [PATCH 45/48] Location matching Utilizes location matching damage process tasks to aggregate pipe damage states. --- pelicun/resources/auto/Hazus_Earthquake_IM.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pelicun/resources/auto/Hazus_Earthquake_IM.py b/pelicun/resources/auto/Hazus_Earthquake_IM.py index 308f998b7..970f51fa3 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_IM.py +++ b/pelicun/resources/auto/Hazus_Earthquake_IM.py @@ -656,10 +656,10 @@ def auto_populate(AIM): # Create damage process dmg_process = { - f"1_PWP.{pipe_flexibility}.GS": {"DS1": "aggregate_DS1"}, - f"2_PWP.{pipe_flexibility}.GF": {"DS1": "aggregate_DS1"}, - f"3_PWP.{pipe_flexibility}.GS": {"DS2": "aggregate_DS2"}, - f"4_PWP.{pipe_flexibility}.GF": {"DS2": "aggregate_DS2"}, + f"1_PWP.{pipe_flexibility}.GS-LOC": {"DS1": "aggregate_DS1"}, + f"2_PWP.{pipe_flexibility}.GF-LOC": {"DS1": "aggregate_DS1"}, + f"3_PWP.{pipe_flexibility}.GS-LOC": {"DS2": "aggregate_DS2"}, + f"4_PWP.{pipe_flexibility}.GF-LOC": {"DS2": "aggregate_DS2"}, } dmg_process_filename = 'dmg_process.json' with open(dmg_process_filename, 'w', encoding='utf-8') as f: From 22adc77a67aa5bb681132d70610083bf07b0a965 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Thu, 28 Mar 2024 06:23:43 -0700 Subject: [PATCH 46/48] Don't add `collapse` column for water networks --- pelicun/tools/DL_calculation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pelicun/tools/DL_calculation.py b/pelicun/tools/DL_calculation.py index c95687bfe..a7d1758f6 100644 --- a/pelicun/tools/DL_calculation.py +++ b/pelicun/tools/DL_calculation.py @@ -947,7 +947,7 @@ def run_pelicun( adf.loc['collapse', ('LS1', 'Theta_0')] = 1e10 adf.loc['collapse', 'Incomplete'] = 0 - else: + elif "Water" not in asset_config['ComponentDatabase']: # add a placeholder collapse fragility that will never trigger # collapse, but allow damage processes to work with collapse From c1f3d3ab8840e2c98cb6fc218e685cd7781ebbaa Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Thu, 28 Mar 2024 06:24:02 -0700 Subject: [PATCH 47/48] Improve formatting --- pelicun/tools/DL_calculation.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pelicun/tools/DL_calculation.py b/pelicun/tools/DL_calculation.py index a7d1758f6..768850824 100644 --- a/pelicun/tools/DL_calculation.py +++ b/pelicun/tools/DL_calculation.py @@ -1005,12 +1005,7 @@ def run_pelicun( adf.loc['aggregate', ('LS1', 'Theta_0')] = 1e10 adf.loc['aggregate', 'Incomplete'] = 0 - PAL.damage.load_damage_model( - component_db - + [ - adf, - ] - ) + PAL.damage.load_damage_model(component_db + [adf]) # load the damage process if needed dmg_process = None From 64ef134e704b7c8a325cef5d3b26adbf2f7e56a3 Mon Sep 17 00:00:00 2001 From: John Vouvakis Manousakis Date: Thu, 28 Mar 2024 09:23:18 -0700 Subject: [PATCH 48/48] Add support for more location matching cases Adds more cases and tests them: - `-LOC` + `NA` - `-LOC` + `ALL` + `NA` --- pelicun/model/damage_model.py | 25 +++++++------- pelicun/tests/test_model.py | 62 ++++++++++++++++++++++++++++++++--- 2 files changed, 71 insertions(+), 16 deletions(-) diff --git a/pelicun/model/damage_model.py b/pelicun/model/damage_model.py index a2fdc817b..ec1ba0d2c 100644 --- a/pelicun/model/damage_model.py +++ b/pelicun/model/damage_model.py @@ -1145,11 +1145,6 @@ def _perform_dmg_task(self, task, ds_sample): # clear damage state information elif target_event == 'NA': - if match_locations: - raise ValueError( - 'Invalid damage task configuration. Cannot match ' - 'locations when the target event is set to NA.' - ) ds_target = -1 # -1 stands for nan (ints don'ts support nan) @@ -1223,13 +1218,19 @@ def _perform_dmg_event_loc( # affected columns if target_cmp == 'ALL': - raise ValueError('Cannot combine `-LOC` with `ALL` keywords') - column_selection = np.where( - np.logical_and( - ds_sample.columns.get_level_values('cmp') == target_cmp, - ds_sample.columns.get_level_values('loc') == loc, - ) - )[0] + column_selection = np.where( + np.logical_and( + ds_sample.columns.get_level_values('cmp') != source_cmp, + ds_sample.columns.get_level_values('loc') == loc, + ) + )[0] + else: + column_selection = np.where( + np.logical_and( + ds_sample.columns.get_level_values('cmp') == target_cmp, + ds_sample.columns.get_level_values('loc') == loc, + ) + )[0] ds_sample.iloc[row_selection, column_selection] = ds_target def _get_pg_batches(self, block_batch_size): diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index 51745c0d3..459b955a8 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -1374,10 +1374,6 @@ def test__evaluate_damage_state_and_prepare_dmg_quantities( def test__perform_dmg_task(self, assessment_instance): - x = assessment.Assessment() - x.log.verbose = False - assessment_instance = x - damage_model = assessment_instance.damage # @@ -1522,6 +1518,64 @@ def test__perform_dmg_task(self, assessment_instance): ('CMP.B', '1', '1', '1'): {0: 1, 1: 0, 2: 0}, } + # + # NA keyword combined with `-LOC` + # + + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 0], + ('CMP.A', '2', '1', '0'): [0, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 1], + ('CMP.B', '2', '1', '0'): [1, 0, 0], + }, + dtype='int32', + ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + dmg_process = {"1_CMP.B-LOC": {"DS1": "CMP.A_NA"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: 0, 1: 0, 2: -1}, + ('CMP.A', '2', '1', '0'): {0: -1, 1: 0, 2: 0}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.B', '2', '1', '0'): {0: 1, 1: 0, 2: 0}, + } + + # + # NA keyword combined with `-LOC` and `ALL` + # + + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 1], + ('CMP.A', '2', '1', '0'): [1, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 0], + ('CMP.B', '2', '1', '0'): [0, 0, 0], + ('CMP.C', '1', '1', '0'): [0, 0, 0], + ('CMP.C', '2', '1', '0'): [0, 0, 0], + }, + dtype='int32', + ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + dmg_process = {"1_CMP.A-LOC": {"DS1": "ALL_NA"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.A', '2', '1', '0'): {0: 1, 1: 0, 2: 0}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: -1}, + ('CMP.B', '2', '1', '0'): {0: -1, 1: 0, 2: 0}, + ('CMP.C', '1', '1', '0'): {0: 0, 1: 0, 2: -1}, + ('CMP.C', '2', '1', '0'): {0: -1, 1: 0, 2: 0}, + } + def test__get_pg_batches_1(self, assessment_instance): damage_model = assessment_instance.damage asset_model = assessment_instance.asset