Skip to content

Commit

Permalink
Fix use of multiple parameter files (#156)
Browse files Browse the repository at this point in the history
* check for duplicate parameter definitions
Testing:
* duplicate parameterId to always raise an error
* check for parameters with only unique parameterId
  • Loading branch information
m-philipps authored Jun 14, 2022
1 parent 5a87f83 commit 9967ce1
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 19 deletions.
26 changes: 15 additions & 11 deletions petab/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,8 @@ def get_parameter_df(
return None

parameter_df = pd.concat(dfs)
# Remove identical parameter definitions
parameter_df.drop_duplicates(inplace=True, ignore_index=False)
# Check for contradicting parameter definitions
parameter_duplicates = set(parameter_df.index.values[
parameter_df.index.duplicated()])
if parameter_duplicates:
raise ValueError(
f'The values of {PARAMETER_ID} must be unique or'
' identical between all parameter subset files. The'
' following duplicates were found:\n'
f'{parameter_duplicates}'
)
_check_for_contradicting_parameter_definitions(parameter_df)

return parameter_df

Expand All @@ -81,10 +71,24 @@ def get_parameter_df(
except KeyError as e:
raise KeyError(
f"Parameter table missing mandatory field {PARAMETER_ID}.") from e
_check_for_contradicting_parameter_definitions(parameter_df)

return parameter_df


def _check_for_contradicting_parameter_definitions(parameter_df: pd.DataFrame):
"""
Raises a ValueError for non-unique parameter IDs
"""
parameter_duplicates = set(parameter_df.index.values[
parameter_df.index.duplicated()])
if parameter_duplicates:
raise ValueError(
f'The values of `{PARAMETER_ID}` must be unique. The '
f'following duplicates were found:\n{parameter_duplicates}'
)


def write_parameter_df(df: pd.DataFrame, filename: Union[str, Path]) -> None:
"""Write PEtab parameter table
Expand Down
53 changes: 45 additions & 8 deletions tests/test_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ def test_get_parameter_df():
PARAMETER_ID: ['id3'],
PARAMETER_NAME: ['name3']
})
parameter_dfs['subset2_overlap'] = pd.DataFrame(data={
parameter_dfs['subset2_redundance'] = pd.DataFrame(data={
PARAMETER_ID: ['id2', 'id3'],
PARAMETER_NAME: ['name2', 'name3']
})
parameter_dfs['subset2_error'] = pd.DataFrame(data={
parameter_dfs['subset2_contradiction'] = pd.DataFrame(data={
PARAMETER_ID: ['id2', 'id3'],
PARAMETER_NAME: ['different_name2', 'name3']
})
Expand All @@ -98,15 +98,52 @@ def test_get_parameter_df():
assert(petab.get_parameter_df(parameter_files['complete']).equals(
petab.get_parameter_df([parameter_files['subset1'],
parameter_files['subset2_strict']])))
# Check that identical parameter definitions are correctly combined
assert(petab.get_parameter_df(parameter_files['complete']).equals(
petab.get_parameter_df([parameter_files['subset1'],
parameter_files['subset2_overlap']])))
# Ensure an error is raised if there exist parameterId duplicates
# with identical parameter definitions
with pytest.raises(ValueError):
petab.get_parameter_df(
[parameter_files["subset1"],
parameter_files["subset2_redundance"]]
)
# with non-identical parameter definitions
with pytest.raises(ValueError):
petab.get_parameter_df([parameter_files['subset1'],
parameter_files['subset2_error']])
petab.get_parameter_df(
[parameter_files["subset1"],
parameter_files["subset2_contradiction"],
]
)

# Ensure that parameters that differ only by parameterId
# are recognized as distinct
with tempfile.TemporaryDirectory() as directory:
parameter_dfs, parameter_files = ({}, {})
parameter_dfs["complete"] = pd.DataFrame(
data={
PARAMETER_ID: ["id1", "id2", "id3", "id4"],
NOMINAL_VALUE: [1, 1, 1, 1],
}
)
parameter_dfs["subset1"] = pd.DataFrame(
data={PARAMETER_ID: ["id1", "id2"], NOMINAL_VALUE: [1, 1]}
)
parameter_dfs["subset2"] = pd.DataFrame(
data={PARAMETER_ID: ["id3", "id4"], NOMINAL_VALUE: [1, 1]}
)
for name, df in parameter_dfs.items():
with tempfile.NamedTemporaryFile(
mode="w", delete=False, dir=directory
) as fh:
parameter_files[name] = fh.name
parameter_dfs[name].to_csv(fh, sep="\t", index=False)
# from one parameter file
df_template = parameter_dfs["complete"].set_index(PARAMETER_ID)
df_test = petab.get_parameter_df(parameter_files["complete"])
assert (df_template == df_test).all().all()
# several parameter files
assert petab.get_parameter_df(parameter_files["complete"]).equals(
petab.get_parameter_df([parameter_files["subset1"],
parameter_files["subset2"]])
)


def test_write_parameter_df():
Expand Down

0 comments on commit 9967ce1

Please sign in to comment.