Skip to content

Commit

Permalink
Add a raw argument to the iamc.tabulate() (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored Jan 22, 2024
1 parent ce71f4a commit f06ed5d
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 187 deletions.
33 changes: 32 additions & 1 deletion ixmp4/core/iamc/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,22 @@
from ..base import BaseFacade
from .variable import VariableRepository

# column for the year or datetime value by datapoint type
MAP_STEP_COLUMN = {
"ANNUAL": "step_year",
"CATEGORICAL": "step_year",
"DATETIME": "step_time",
}


class IamcRepository(BaseFacade):
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
self.variables = VariableRepository(_backend=self.backend)

def tabulate(self, join_runs=True, **filters) -> pd.DataFrame:
def tabulate(
self, join_runs: bool = True, raw: bool = False, **filters
) -> pd.DataFrame:
# return only default runs unless a run-filter is provided
if "run" not in filters:
filters["run"] = {"default_only": True}
Expand All @@ -24,4 +33,26 @@ def tabulate(self, join_runs=True, **filters) -> pd.DataFrame:
df = df.drop(columns=["time_series__id"])
df.unit = df.unit.replace({"dimensionless": ""})

# shorten step-[year/time/categorical] format to standard IAMC format
if raw is False:
df.rename(columns={"step_category": "subannual"}, inplace=True)

if set(df.type.unique()).issubset(["ANNUAL", "CATEGORICAL"]):
df.rename(columns={"step_year": "year"}, inplace=True)
time_col = "year"
else:

def map_step_column(df: pd.Series):
df["time"] = df[MAP_STEP_COLUMN[df.type]]
return df

df = df.apply(map_step_column, axis=1)
time_col = "time"

columns = ["model", "scenario", "version"] if join_runs else []
columns += ["region", "variable", "unit"] + [time_col]
if "subannual" in df.columns:
columns += ["subannual"]
return df[columns + ["value"]]

return df
99 changes: 50 additions & 49 deletions tests/core/test_iamc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,52 +7,52 @@


@all_platforms
def test_run_annual_datapoints(test_mp, test_data_annual):
do_run_datapoints(test_mp, test_data_annual, DataPoint.Type.ANNUAL)
def test_run_annual_datapoints_raw(test_mp, test_data_annual):
do_run_datapoints(test_mp, test_data_annual, True, DataPoint.Type.ANNUAL)


@all_platforms
def test_run_annual_datapoints_from_pyam(test_mp, test_data_annual):
# convert to pyam.data format
def test_run_annual_datapoints_iamc(test_mp, test_data_annual):
# convert to test data to standard IAMC format
df = test_data_annual.rename(columns={"step_year": "year"})
do_run_datapoints(test_mp, test_data_annual, arg_data=df)
do_run_datapoints(test_mp, df, False)


@all_platforms
@pytest.mark.parametrize("_type", (DataPoint.Type.CATEGORICAL, DataPoint.Type.DATETIME))
def test_run_inconsistent_annual_raises(test_mp, test_data_annual, _type):
with pytest.raises(SchemaError):
do_run_datapoints(test_mp, test_data_annual, _type)
do_run_datapoints(test_mp, test_data_annual, True, _type)


@all_platforms
def test_run_categorical_datapoints(test_mp, test_data_categorical):
do_run_datapoints(test_mp, test_data_categorical, DataPoint.Type.CATEGORICAL)
def test_run_categorical_datapoints_raw(test_mp, test_data_categorical):
do_run_datapoints(test_mp, test_data_categorical, True, DataPoint.Type.CATEGORICAL)


@all_platforms
@pytest.mark.parametrize("_type", (DataPoint.Type.ANNUAL, DataPoint.Type.DATETIME))
def test_run_inconsistent_categorical_raises(test_mp, test_data_categorical, _type):
with pytest.raises(SchemaError):
do_run_datapoints(test_mp, test_data_categorical, _type)
do_run_datapoints(test_mp, test_data_categorical, True, _type)


@all_platforms
def test_run_datetime_datapoints(test_mp, test_data_datetime):
do_run_datapoints(test_mp, test_data_datetime, DataPoint.Type.DATETIME)
def test_run_datetime_datapoints_raw(test_mp, test_data_datetime):
do_run_datapoints(test_mp, test_data_datetime, True, DataPoint.Type.DATETIME)


@all_platforms
@pytest.mark.parametrize("_type", (DataPoint.Type.ANNUAL, DataPoint.Type.CATEGORICAL))
def test_run_inconsistent_datetime_type_raises(test_mp, test_data_datetime, _type):
with pytest.raises(SchemaError):
do_run_datapoints(test_mp, test_data_datetime, _type)
do_run_datapoints(test_mp, test_data_datetime, True, _type)


@all_platforms
def test_unit_dimensionless(test_mp, test_data_annual):
def test_unit_dimensionless_raw(test_mp, test_data_annual):
test_data_annual.loc[0, "unit"] = ""
do_run_datapoints(test_mp, test_data_annual, DataPoint.Type.ANNUAL)
do_run_datapoints(test_mp, test_data_annual, True, DataPoint.Type.ANNUAL)


@all_platforms
Expand All @@ -63,52 +63,51 @@ def test_unit_as_string_dimensionless_raises(test_mp, test_data_annual):


@all_platforms
def test_run_tabulate_with_filter(test_mp, test_data_annual):
def test_run_tabulate_with_filter_raw(test_mp, test_data_annual):
# Filter run directly
add_regions(test_mp, test_data_annual["region"].unique())
add_units(test_mp, test_data_annual["unit"].unique())

run = test_mp.runs.create("Model", "Scenario")
run.iamc.add(test_data_annual, type=DataPoint.Type.ANNUAL)
obs = run.iamc.tabulate(
variable={"name": "Primary Energy"}, unit={"name": "EJ/yr"}
raw=True, variable={"name": "Primary Energy"}, unit={"name": "EJ/yr"}
).drop(["id", "type"], axis=1)
exp = test_data_annual[test_data_annual.variable == "Primary Energy"]
assert_unordered_equality(obs, exp, check_like=True)


def do_run_datapoints(test_mp, ixmp_data, type=None, arg_data=None):
# ixmp_data: expected return format from Run.iamc.tabulate() (column names 'step_*')
# arg_data: passed to Run.iamc.[add/remove](),
# can be ixmp4 or pyam format (column names 'year' or 'time')

if arg_data is None:
arg_data = ixmp_data.copy()
def do_run_datapoints(test_mp, data, raw=True, _type=None):
# Test adding, updating, removing data to a run
# either as ixmp4-database format (columns `step_[year/datetime/categorical]`)
# or as standard iamc format (column names 'year' or 'time')

# Define required regions and units in the database
add_regions(test_mp, ixmp_data["region"].unique())
add_units(test_mp, ixmp_data["unit"].unique())
add_regions(test_mp, data["region"].unique())
add_units(test_mp, data["unit"].unique())

run = test_mp.runs.create("Model", "Scenario")

# == Full Addition ==
# Save to database
run.iamc.add(arg_data, type=type)
run.iamc.add(data, type=_type)

# Retrieve from database via Run
ret = run.iamc.tabulate()
ret = ret.drop(columns=["id", "type"])
assert_unordered_equality(ixmp_data, ret, check_like=True)
ret = run.iamc.tabulate(raw=raw)
if raw:
ret = ret.drop(columns=["id", "type"])
assert_unordered_equality(data, ret, check_like=True)

# If not set as default, retrieve from database via Platform returns an empty frame
ret = test_mp.iamc.tabulate()
ret = test_mp.iamc.tabulate(raw=raw)
assert ret.empty

# Retrieve from database via Platform (including model, scenario, version columns)
ret = test_mp.iamc.tabulate(run={"default_only": False})
ret = ret.drop(columns=["id", "type"])
ret = test_mp.iamc.tabulate(raw=raw, run={"default_only": False})
if raw:
ret = ret.drop(columns=["id", "type"])

test_mp_data = ixmp_data.copy()
test_mp_data = data.copy()
test_mp_data["model"] = run.model.name
test_mp_data["scenario"] = run.scenario.name
test_mp_data["version"] = run.version
Expand All @@ -117,39 +116,41 @@ def do_run_datapoints(test_mp, ixmp_data, type=None, arg_data=None):

# Retrieve from database after setting the run to default
run.set_as_default()
ret = test_mp.iamc.tabulate()
ret = ret.drop(columns=["id", "type"])
ret = test_mp.iamc.tabulate(raw=raw)
if raw:
ret = ret.drop(columns=["id", "type"])
assert_unordered_equality(test_mp_data, ret, check_like=True)

# == Partial Removal ==
# Remove half the data
remove_data = arg_data.head(len(ixmp_data) // 2).drop(columns=["value"])
remaining_data = ixmp_data.tail(len(ixmp_data) // 2).reset_index(drop=True)
run.iamc.remove(remove_data, type=type)
remove_data = data.head(len(data) // 2).drop(columns=["value"])
remaining_data = data.tail(len(data) // 2).reset_index(drop=True)
run.iamc.remove(remove_data, type=_type)

# Retrieve from database
ret = run.iamc.tabulate()
ret = ret.drop(columns=["id", "type"])
ret = run.iamc.tabulate(raw=raw)
if raw:
ret = ret.drop(columns=["id", "type"])
assert_unordered_equality(remaining_data, ret, check_like=True)

# == Partial Update / Partial Addition ==
# Update all data values
ixmp_data["value"] = -9.9
arg_data["value"] = -9.9
data["value"] = -9.9

# Results in a half insert / half update
run.iamc.add(arg_data, type=type)
run.iamc.add(data, type=_type)

# Retrieve from database
ret = run.iamc.tabulate()
ret = ret.drop(columns=["id", "type"])
assert_unordered_equality(ixmp_data, ret, check_like=True)
ret = run.iamc.tabulate(raw=raw)
if raw:
ret = ret.drop(columns=["id", "type"])
assert_unordered_equality(data, ret, check_like=True)

# == Full Removal ==
# Remove all data
remove_data = arg_data.drop(columns=["value"])
run.iamc.remove(remove_data, type=type)
remove_data = data.drop(columns=["value"])
run.iamc.remove(remove_data, type=_type)

# Retrieve from database
ret = run.iamc.tabulate()
ret = run.iamc.tabulate(raw=raw)
assert ret.empty
137 changes: 0 additions & 137 deletions tests/core/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,140 +92,3 @@ def test_run_versions(self, test_mp):
# non-default version cannot be again set as un-default
with pytest.raises(IxmpError):
run2.unset_as_default()

def test_run_annual_datapoints(self, test_mp, test_data_annual):
do_run_datapoints(test_mp, test_data_annual, DataPoint.Type.ANNUAL)

def test_run_annual_datapoints_from_pyam(self, test_mp, test_data_annual):
# convert to pyam.data format
df = test_data_annual.rename(columns={"step_year": "year"})
do_run_datapoints(test_mp, test_data_annual, arg_data=df)

@pytest.mark.parametrize(
"_type", (DataPoint.Type.CATEGORICAL, DataPoint.Type.DATETIME)
)
def test_run_inconsistent_annual_raises(self, test_mp, test_data_annual, _type):
with pytest.raises(SchemaError):
do_run_datapoints(test_mp, test_data_annual, _type)

def test_run_categorical_datapoints(self, test_mp, test_data_categorical):
do_run_datapoints(test_mp, test_data_categorical, DataPoint.Type.CATEGORICAL)

@pytest.mark.parametrize("_type", (DataPoint.Type.ANNUAL, DataPoint.Type.DATETIME))
def test_run_inconsistent_categorical_raises(
self, test_mp, test_data_categorical, _type
):
with pytest.raises(SchemaError):
do_run_datapoints(test_mp, test_data_categorical, _type)

def test_run_datetime_datapoints(self, test_mp, test_data_datetime):
do_run_datapoints(test_mp, test_data_datetime, DataPoint.Type.DATETIME)

@pytest.mark.parametrize(
"_type", (DataPoint.Type.ANNUAL, DataPoint.Type.CATEGORICAL)
)
def test_run_inconsistent_datetime_type_raises(
self, test_mp, test_data_datetime, _type
):
with pytest.raises(SchemaError):
do_run_datapoints(test_mp, test_data_datetime, _type)

def test_unit_dimensionless(self, test_mp, test_data_annual):
test_data_annual.loc[0, "unit"] = ""
do_run_datapoints(test_mp, test_data_annual, DataPoint.Type.ANNUAL)

def test_unit_as_string_dimensionless_raises(self, test_mp, test_data_annual):
test_data_annual.loc[0, "unit"] = "dimensionless"
with pytest.raises(ValueError, match="Unit name 'dimensionless' is reserved,"):
do_run_datapoints(test_mp, test_data_annual, DataPoint.Type.ANNUAL)

def test_run_tabulate_with_filter(self, test_mp, test_data_annual):
# Filter run directly
add_regions(test_mp, test_data_annual["region"].unique())
add_units(test_mp, test_data_annual["unit"].unique())

run = test_mp.runs.create("Model", "Scenario")
run.iamc.add(test_data_annual, type=DataPoint.Type.ANNUAL)
obs = run.iamc.tabulate(
variable={"name": "Primary Energy"}, unit={"name": "EJ/yr"}
).drop(["id", "type"], axis=1)
exp = test_data_annual[test_data_annual.variable == "Primary Energy"]
assert_unordered_equality(obs, exp, check_like=True)


def do_run_datapoints(test_mp, ixmp_data, type=None, arg_data=None):
# ixmp_data: expected return format from Run.iamc.tabulate() (column names 'step_*')
# arg_data: passed to Run.iamc.[add/remove](),
# can be ixmp4 or pyam format (column names 'year' or 'time')

if arg_data is None:
arg_data = ixmp_data.copy()

# Define required regions and units in the database
add_regions(test_mp, ixmp_data["region"].unique())
add_units(test_mp, ixmp_data["unit"].unique())

run = test_mp.runs.create("Model", "Scenario")

# == Full Addition ==
# Save to database
run.iamc.add(arg_data, type=type)

# Retrieve from database via Run
ret = run.iamc.tabulate()
ret = ret.drop(columns=["id", "type"])
assert_unordered_equality(ixmp_data, ret, check_like=True)

# If not set as default, retrieve from database via Platform returns an empty frame
ret = test_mp.iamc.tabulate()
assert ret.empty

# Retrieve from database via Platform (including model, scenario, version columns)
ret = test_mp.iamc.tabulate(run={"default_only": False})
ret = ret.drop(columns=["id", "type"])

test_mp_data = ixmp_data.copy()
test_mp_data["model"] = run.model.name
test_mp_data["scenario"] = run.scenario.name
test_mp_data["version"] = run.version
test_mp_data = test_mp_data[ret.columns]
assert_unordered_equality(test_mp_data, ret, check_like=True)

# Retrieve from database after setting the run to default
run.set_as_default()
ret = test_mp.iamc.tabulate()
ret = ret.drop(columns=["id", "type"])
assert_unordered_equality(test_mp_data, ret, check_like=True)

# == Partial Removal ==
# Remove half the data
remove_data = arg_data.head(len(ixmp_data) // 2).drop(columns=["value"])
remaining_data = ixmp_data.tail(len(ixmp_data) // 2).reset_index(drop=True)
run.iamc.remove(remove_data, type=type)

# Retrieve from database
ret = run.iamc.tabulate()
ret = ret.drop(columns=["id", "type"])
assert_unordered_equality(remaining_data, ret, check_like=True)

# == Partial Update / Partial Addition ==
# Update all data values
ixmp_data["value"] = -9.9
arg_data["value"] = -9.9

# Results in a half insert / half update
run.iamc.add(arg_data, type=type)

# Retrieve from database
ret = run.iamc.tabulate()
ret = ret.drop(columns=["id", "type"])
assert_unordered_equality(ixmp_data, ret, check_like=True)

# == Full Removal ==
# Remove all data
remove_data = arg_data.drop(columns=["value"])
run.iamc.remove(remove_data, type=type)

# Retrieve from database
ret = run.iamc.tabulate()
assert ret.empty

0 comments on commit f06ed5d

Please sign in to comment.