diff --git a/petab/parameters.py b/petab/parameters.py index f794530c..8dbf441f 100644 --- a/petab/parameters.py +++ b/petab/parameters.py @@ -55,18 +55,8 @@ def get_parameter_df( return None parameter_df = pd.concat(dfs) - # Remove identical parameter definitions - parameter_df.drop_duplicates(inplace=True, ignore_index=False) # Check for contradicting parameter definitions - parameter_duplicates = set(parameter_df.index.values[ - parameter_df.index.duplicated()]) - if parameter_duplicates: - raise ValueError( - f'The values of {PARAMETER_ID} must be unique or' - ' identical between all parameter subset files. The' - ' following duplicates were found:\n' - f'{parameter_duplicates}' - ) + _check_for_contradicting_parameter_definitions(parameter_df) return parameter_df @@ -81,10 +71,24 @@ def get_parameter_df( except KeyError as e: raise KeyError( f"Parameter table missing mandatory field {PARAMETER_ID}.") from e + _check_for_contradicting_parameter_definitions(parameter_df) return parameter_df +def _check_for_contradicting_parameter_definitions(parameter_df: pd.DataFrame): + """ + Raises a ValueError for non-unique parameter IDs + """ + parameter_duplicates = set(parameter_df.index.values[ + parameter_df.index.duplicated()]) + if parameter_duplicates: + raise ValueError( + f'The values of `{PARAMETER_ID}` must be unique. The ' + f'following duplicates were found:\n{parameter_duplicates}' + ) + + def write_parameter_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: """Write PEtab parameter table diff --git a/tests/test_parameters.py b/tests/test_parameters.py index 1727a736..5527133c 100644 --- a/tests/test_parameters.py +++ b/tests/test_parameters.py @@ -81,11 +81,11 @@ def test_get_parameter_df(): PARAMETER_ID: ['id3'], PARAMETER_NAME: ['name3'] }) - parameter_dfs['subset2_overlap'] = pd.DataFrame(data={ + parameter_dfs['subset2_redundance'] = pd.DataFrame(data={ PARAMETER_ID: ['id2', 'id3'], PARAMETER_NAME: ['name2', 'name3'] }) - parameter_dfs['subset2_error'] = pd.DataFrame(data={ + parameter_dfs['subset2_contradiction'] = pd.DataFrame(data={ PARAMETER_ID: ['id2', 'id3'], PARAMETER_NAME: ['different_name2', 'name3'] }) @@ -98,15 +98,52 @@ def test_get_parameter_df(): assert(petab.get_parameter_df(parameter_files['complete']).equals( petab.get_parameter_df([parameter_files['subset1'], parameter_files['subset2_strict']]))) - # Check that identical parameter definitions are correctly combined - assert(petab.get_parameter_df(parameter_files['complete']).equals( - petab.get_parameter_df([parameter_files['subset1'], - parameter_files['subset2_overlap']]))) # Ensure an error is raised if there exist parameterId duplicates + # with identical parameter definitions + with pytest.raises(ValueError): + petab.get_parameter_df( + [parameter_files["subset1"], + parameter_files["subset2_redundance"]] + ) # with non-identical parameter definitions with pytest.raises(ValueError): - petab.get_parameter_df([parameter_files['subset1'], - parameter_files['subset2_error']]) + petab.get_parameter_df( + [parameter_files["subset1"], + parameter_files["subset2_contradiction"], + ] + ) + + # Ensure that parameters that differ only by parameterId + # are recognized as distinct + with tempfile.TemporaryDirectory() as directory: + parameter_dfs, parameter_files = ({}, {}) + parameter_dfs["complete"] = pd.DataFrame( + data={ + PARAMETER_ID: ["id1", "id2", "id3", "id4"], + NOMINAL_VALUE: [1, 1, 1, 1], + } + ) + parameter_dfs["subset1"] = pd.DataFrame( + data={PARAMETER_ID: ["id1", "id2"], NOMINAL_VALUE: [1, 1]} + ) + parameter_dfs["subset2"] = pd.DataFrame( + data={PARAMETER_ID: ["id3", "id4"], NOMINAL_VALUE: [1, 1]} + ) + for name, df in parameter_dfs.items(): + with tempfile.NamedTemporaryFile( + mode="w", delete=False, dir=directory + ) as fh: + parameter_files[name] = fh.name + parameter_dfs[name].to_csv(fh, sep="\t", index=False) + # from one parameter file + df_template = parameter_dfs["complete"].set_index(PARAMETER_ID) + df_test = petab.get_parameter_df(parameter_files["complete"]) + assert (df_template == df_test).all().all() + # several parameter files + assert petab.get_parameter_df(parameter_files["complete"]).equals( + petab.get_parameter_df([parameter_files["subset1"], + parameter_files["subset2"]]) + ) def test_write_parameter_df():