Add a raw argument to the iamc.tabulate() (#38)

iiasa · Jan 22, 2024 · f06ed5d · f06ed5d
1 parent ce71f4a
commit f06ed5d
Show file tree

Hide file tree

Showing 3 changed files with 82 additions and 187 deletions.
diff --git a/ixmp4/core/iamc/repository.py b/ixmp4/core/iamc/repository.py
@@ -3,13 +3,22 @@
 from ..base import BaseFacade
 from .variable import VariableRepository
 
+# column for the year or datetime value by datapoint type
+MAP_STEP_COLUMN = {
+    "ANNUAL": "step_year",
+    "CATEGORICAL": "step_year",
+    "DATETIME": "step_time",
+}
+
 
 class IamcRepository(BaseFacade):
     def __init__(self, **kwargs) -> None:
         super().__init__(**kwargs)
         self.variables = VariableRepository(_backend=self.backend)
 
-    def tabulate(self, join_runs=True, **filters) -> pd.DataFrame:
+    def tabulate(
+        self, join_runs: bool = True, raw: bool = False, **filters
+    ) -> pd.DataFrame:
         # return only default runs unless a run-filter is provided
         if "run" not in filters:
             filters["run"] = {"default_only": True}
@@ -24,4 +33,26 @@ def tabulate(self, join_runs=True, **filters) -> pd.DataFrame:
             df = df.drop(columns=["time_series__id"])
             df.unit = df.unit.replace({"dimensionless": ""})
 
+            # shorten step-[year/time/categorical] format to standard IAMC format
+            if raw is False:
+                df.rename(columns={"step_category": "subannual"}, inplace=True)
+
+                if set(df.type.unique()).issubset(["ANNUAL", "CATEGORICAL"]):
+                    df.rename(columns={"step_year": "year"}, inplace=True)
+                    time_col = "year"
+                else:
+
+                    def map_step_column(df: pd.Series):
+                        df["time"] = df[MAP_STEP_COLUMN[df.type]]
+                        return df
+
+                    df = df.apply(map_step_column, axis=1)
+                    time_col = "time"
+
+                columns = ["model", "scenario", "version"] if join_runs else []
+                columns += ["region", "variable", "unit"] + [time_col]
+                if "subannual" in df.columns:
+                    columns += ["subannual"]
+                return df[columns + ["value"]]
+
         return df
diff --git a/tests/core/test_iamc.py b/tests/core/test_iamc.py
@@ -7,52 +7,52 @@
 
 
 @all_platforms
-def test_run_annual_datapoints(test_mp, test_data_annual):
-    do_run_datapoints(test_mp, test_data_annual, DataPoint.Type.ANNUAL)
+def test_run_annual_datapoints_raw(test_mp, test_data_annual):
+    do_run_datapoints(test_mp, test_data_annual, True, DataPoint.Type.ANNUAL)
 
 
 @all_platforms
-def test_run_annual_datapoints_from_pyam(test_mp, test_data_annual):
-    # convert to pyam.data format
+def test_run_annual_datapoints_iamc(test_mp, test_data_annual):
+    # convert to test data to standard IAMC format
     df = test_data_annual.rename(columns={"step_year": "year"})
-    do_run_datapoints(test_mp, test_data_annual, arg_data=df)
+    do_run_datapoints(test_mp, df, False)
 
 
 @all_platforms
 @pytest.mark.parametrize("_type", (DataPoint.Type.CATEGORICAL, DataPoint.Type.DATETIME))
 def test_run_inconsistent_annual_raises(test_mp, test_data_annual, _type):
     with pytest.raises(SchemaError):
-        do_run_datapoints(test_mp, test_data_annual, _type)
+        do_run_datapoints(test_mp, test_data_annual, True, _type)
 
 
 @all_platforms
-def test_run_categorical_datapoints(test_mp, test_data_categorical):
-    do_run_datapoints(test_mp, test_data_categorical, DataPoint.Type.CATEGORICAL)
+def test_run_categorical_datapoints_raw(test_mp, test_data_categorical):
+    do_run_datapoints(test_mp, test_data_categorical, True, DataPoint.Type.CATEGORICAL)
 
 
 @all_platforms
 @pytest.mark.parametrize("_type", (DataPoint.Type.ANNUAL, DataPoint.Type.DATETIME))
 def test_run_inconsistent_categorical_raises(test_mp, test_data_categorical, _type):
     with pytest.raises(SchemaError):
-        do_run_datapoints(test_mp, test_data_categorical, _type)
+        do_run_datapoints(test_mp, test_data_categorical, True, _type)
 
 
 @all_platforms
-def test_run_datetime_datapoints(test_mp, test_data_datetime):
-    do_run_datapoints(test_mp, test_data_datetime, DataPoint.Type.DATETIME)
+def test_run_datetime_datapoints_raw(test_mp, test_data_datetime):
+    do_run_datapoints(test_mp, test_data_datetime, True, DataPoint.Type.DATETIME)
 
 
 @all_platforms
 @pytest.mark.parametrize("_type", (DataPoint.Type.ANNUAL, DataPoint.Type.CATEGORICAL))
 def test_run_inconsistent_datetime_type_raises(test_mp, test_data_datetime, _type):
     with pytest.raises(SchemaError):
-        do_run_datapoints(test_mp, test_data_datetime, _type)
+        do_run_datapoints(test_mp, test_data_datetime, True, _type)
 
 
 @all_platforms
-def test_unit_dimensionless(test_mp, test_data_annual):
+def test_unit_dimensionless_raw(test_mp, test_data_annual):
     test_data_annual.loc[0, "unit"] = ""
-    do_run_datapoints(test_mp, test_data_annual, DataPoint.Type.ANNUAL)
+    do_run_datapoints(test_mp, test_data_annual, True, DataPoint.Type.ANNUAL)
 
 
 @all_platforms
@@ -63,52 +63,51 @@ def test_unit_as_string_dimensionless_raises(test_mp, test_data_annual):
 
 
 @all_platforms
-def test_run_tabulate_with_filter(test_mp, test_data_annual):
+def test_run_tabulate_with_filter_raw(test_mp, test_data_annual):
     # Filter run directly
     add_regions(test_mp, test_data_annual["region"].unique())
     add_units(test_mp, test_data_annual["unit"].unique())
 
     run = test_mp.runs.create("Model", "Scenario")
     run.iamc.add(test_data_annual, type=DataPoint.Type.ANNUAL)
     obs = run.iamc.tabulate(
-        variable={"name": "Primary Energy"}, unit={"name": "EJ/yr"}
+        raw=True, variable={"name": "Primary Energy"}, unit={"name": "EJ/yr"}
     ).drop(["id", "type"], axis=1)
     exp = test_data_annual[test_data_annual.variable == "Primary Energy"]
     assert_unordered_equality(obs, exp, check_like=True)
 
 
-def do_run_datapoints(test_mp, ixmp_data, type=None, arg_data=None):
-    # ixmp_data: expected return format from Run.iamc.tabulate() (column names 'step_*')
-    # arg_data: passed to Run.iamc.[add/remove](),
-    # can be ixmp4 or pyam format (column names 'year' or 'time')
-
-    if arg_data is None:
-        arg_data = ixmp_data.copy()
+def do_run_datapoints(test_mp, data, raw=True, _type=None):
+    # Test adding, updating, removing data to a run
+    # either as ixmp4-database format (columns `step_[year/datetime/categorical]`)
+    # or as standard iamc format  (column names 'year' or 'time')
 
     # Define required regions and units in the database
-    add_regions(test_mp, ixmp_data["region"].unique())
-    add_units(test_mp, ixmp_data["unit"].unique())
+    add_regions(test_mp, data["region"].unique())
+    add_units(test_mp, data["unit"].unique())
 
     run = test_mp.runs.create("Model", "Scenario")
 
     # == Full Addition ==
     # Save to database
-    run.iamc.add(arg_data, type=type)
+    run.iamc.add(data, type=_type)
 
     # Retrieve from database via Run
-    ret = run.iamc.tabulate()
-    ret = ret.drop(columns=["id", "type"])
-    assert_unordered_equality(ixmp_data, ret, check_like=True)
+    ret = run.iamc.tabulate(raw=raw)
+    if raw:
+        ret = ret.drop(columns=["id", "type"])
+    assert_unordered_equality(data, ret, check_like=True)
 
     # If not set as default, retrieve from database via Platform returns an empty frame
-    ret = test_mp.iamc.tabulate()
+    ret = test_mp.iamc.tabulate(raw=raw)
     assert ret.empty
 
     # Retrieve from database via Platform (including model, scenario, version columns)
-    ret = test_mp.iamc.tabulate(run={"default_only": False})
-    ret = ret.drop(columns=["id", "type"])
+    ret = test_mp.iamc.tabulate(raw=raw, run={"default_only": False})
+    if raw:
+        ret = ret.drop(columns=["id", "type"])
 
-    test_mp_data = ixmp_data.copy()
+    test_mp_data = data.copy()
     test_mp_data["model"] = run.model.name
     test_mp_data["scenario"] = run.scenario.name
     test_mp_data["version"] = run.version
@@ -117,39 +116,41 @@ def do_run_datapoints(test_mp, ixmp_data, type=None, arg_data=None):
 
     # Retrieve from database after setting the run to default
     run.set_as_default()
-    ret = test_mp.iamc.tabulate()
-    ret = ret.drop(columns=["id", "type"])
+    ret = test_mp.iamc.tabulate(raw=raw)
+    if raw:
+        ret = ret.drop(columns=["id", "type"])
     assert_unordered_equality(test_mp_data, ret, check_like=True)
 
     # == Partial Removal ==
     # Remove half the data
-    remove_data = arg_data.head(len(ixmp_data) // 2).drop(columns=["value"])
-    remaining_data = ixmp_data.tail(len(ixmp_data) // 2).reset_index(drop=True)
-    run.iamc.remove(remove_data, type=type)
+    remove_data = data.head(len(data) // 2).drop(columns=["value"])
+    remaining_data = data.tail(len(data) // 2).reset_index(drop=True)
+    run.iamc.remove(remove_data, type=_type)
 
     # Retrieve from database
-    ret = run.iamc.tabulate()
-    ret = ret.drop(columns=["id", "type"])
+    ret = run.iamc.tabulate(raw=raw)
+    if raw:
+        ret = ret.drop(columns=["id", "type"])
     assert_unordered_equality(remaining_data, ret, check_like=True)
 
     # == Partial Update / Partial Addition ==
     # Update all data values
-    ixmp_data["value"] = -9.9
-    arg_data["value"] = -9.9
+    data["value"] = -9.9
 
     # Results in a half insert / half update
-    run.iamc.add(arg_data, type=type)
+    run.iamc.add(data, type=_type)
 
     # Retrieve from database
-    ret = run.iamc.tabulate()
-    ret = ret.drop(columns=["id", "type"])
-    assert_unordered_equality(ixmp_data, ret, check_like=True)
+    ret = run.iamc.tabulate(raw=raw)
+    if raw:
+        ret = ret.drop(columns=["id", "type"])
+    assert_unordered_equality(data, ret, check_like=True)
 
     # == Full Removal ==
     # Remove all data
-    remove_data = arg_data.drop(columns=["value"])
-    run.iamc.remove(remove_data, type=type)
+    remove_data = data.drop(columns=["value"])
+    run.iamc.remove(remove_data, type=_type)
 
     # Retrieve from database
-    ret = run.iamc.tabulate()
+    ret = run.iamc.tabulate(raw=raw)
     assert ret.empty
diff --git a/tests/core/test_run.py b/tests/core/test_run.py
@@ -92,140 +92,3 @@ def test_run_versions(self, test_mp):
         # non-default version cannot be again set as un-default
         with pytest.raises(IxmpError):
             run2.unset_as_default()
-
-    def test_run_annual_datapoints(self, test_mp, test_data_annual):
-        do_run_datapoints(test_mp, test_data_annual, DataPoint.Type.ANNUAL)
-
-    def test_run_annual_datapoints_from_pyam(self, test_mp, test_data_annual):
-        # convert to pyam.data format
-        df = test_data_annual.rename(columns={"step_year": "year"})
-        do_run_datapoints(test_mp, test_data_annual, arg_data=df)
-
-    @pytest.mark.parametrize(
-        "_type", (DataPoint.Type.CATEGORICAL, DataPoint.Type.DATETIME)
-    )
-    def test_run_inconsistent_annual_raises(self, test_mp, test_data_annual, _type):
-        with pytest.raises(SchemaError):
-            do_run_datapoints(test_mp, test_data_annual, _type)
-
-    def test_run_categorical_datapoints(self, test_mp, test_data_categorical):
-        do_run_datapoints(test_mp, test_data_categorical, DataPoint.Type.CATEGORICAL)
-
-    @pytest.mark.parametrize("_type", (DataPoint.Type.ANNUAL, DataPoint.Type.DATETIME))
-    def test_run_inconsistent_categorical_raises(
-        self, test_mp, test_data_categorical, _type
-    ):
-        with pytest.raises(SchemaError):
-            do_run_datapoints(test_mp, test_data_categorical, _type)
-
-    def test_run_datetime_datapoints(self, test_mp, test_data_datetime):
-        do_run_datapoints(test_mp, test_data_datetime, DataPoint.Type.DATETIME)
-
-    @pytest.mark.parametrize(
-        "_type", (DataPoint.Type.ANNUAL, DataPoint.Type.CATEGORICAL)
-    )
-    def test_run_inconsistent_datetime_type_raises(
-        self, test_mp, test_data_datetime, _type
-    ):
-        with pytest.raises(SchemaError):
-            do_run_datapoints(test_mp, test_data_datetime, _type)
-
-    def test_unit_dimensionless(self, test_mp, test_data_annual):
-        test_data_annual.loc[0, "unit"] = ""
-        do_run_datapoints(test_mp, test_data_annual, DataPoint.Type.ANNUAL)
-
-    def test_unit_as_string_dimensionless_raises(self, test_mp, test_data_annual):
-        test_data_annual.loc[0, "unit"] = "dimensionless"
-        with pytest.raises(ValueError, match="Unit name 'dimensionless' is reserved,"):
-            do_run_datapoints(test_mp, test_data_annual, DataPoint.Type.ANNUAL)
-
-    def test_run_tabulate_with_filter(self, test_mp, test_data_annual):
-        # Filter run directly
-        add_regions(test_mp, test_data_annual["region"].unique())
-        add_units(test_mp, test_data_annual["unit"].unique())
-
-        run = test_mp.runs.create("Model", "Scenario")
-        run.iamc.add(test_data_annual, type=DataPoint.Type.ANNUAL)
-        obs = run.iamc.tabulate(
-            variable={"name": "Primary Energy"}, unit={"name": "EJ/yr"}
-        ).drop(["id", "type"], axis=1)
-        exp = test_data_annual[test_data_annual.variable == "Primary Energy"]
-        assert_unordered_equality(obs, exp, check_like=True)
-
-
-def do_run_datapoints(test_mp, ixmp_data, type=None, arg_data=None):
-    # ixmp_data: expected return format from Run.iamc.tabulate() (column names 'step_*')
-    # arg_data: passed to Run.iamc.[add/remove](),
-    # can be ixmp4 or pyam format (column names 'year' or 'time')
-
-    if arg_data is None:
-        arg_data = ixmp_data.copy()
-
-    # Define required regions and units in the database
-    add_regions(test_mp, ixmp_data["region"].unique())
-    add_units(test_mp, ixmp_data["unit"].unique())
-
-    run = test_mp.runs.create("Model", "Scenario")
-
-    # == Full Addition ==
-    # Save to database
-    run.iamc.add(arg_data, type=type)
-
-    # Retrieve from database via Run
-    ret = run.iamc.tabulate()
-    ret = ret.drop(columns=["id", "type"])
-    assert_unordered_equality(ixmp_data, ret, check_like=True)
-
-    # If not set as default, retrieve from database via Platform returns an empty frame
-    ret = test_mp.iamc.tabulate()
-    assert ret.empty
-
-    # Retrieve from database via Platform (including model, scenario, version columns)
-    ret = test_mp.iamc.tabulate(run={"default_only": False})
-    ret = ret.drop(columns=["id", "type"])
-
-    test_mp_data = ixmp_data.copy()
-    test_mp_data["model"] = run.model.name
-    test_mp_data["scenario"] = run.scenario.name
-    test_mp_data["version"] = run.version
-    test_mp_data = test_mp_data[ret.columns]
-    assert_unordered_equality(test_mp_data, ret, check_like=True)
-
-    # Retrieve from database after setting the run to default
-    run.set_as_default()
-    ret = test_mp.iamc.tabulate()
-    ret = ret.drop(columns=["id", "type"])
-    assert_unordered_equality(test_mp_data, ret, check_like=True)
-
-    # == Partial Removal ==
-    # Remove half the data
-    remove_data = arg_data.head(len(ixmp_data) // 2).drop(columns=["value"])
-    remaining_data = ixmp_data.tail(len(ixmp_data) // 2).reset_index(drop=True)
-    run.iamc.remove(remove_data, type=type)
-
-    # Retrieve from database
-    ret = run.iamc.tabulate()
-    ret = ret.drop(columns=["id", "type"])
-    assert_unordered_equality(remaining_data, ret, check_like=True)
-
-    # == Partial Update / Partial Addition ==
-    # Update all data values
-    ixmp_data["value"] = -9.9
-    arg_data["value"] = -9.9
-
-    # Results in a half insert / half update
-    run.iamc.add(arg_data, type=type)
-
-    # Retrieve from database
-    ret = run.iamc.tabulate()
-    ret = ret.drop(columns=["id", "type"])
-    assert_unordered_equality(ixmp_data, ret, check_like=True)
-
-    # == Full Removal ==
-    # Remove all data
-    remove_data = arg_data.drop(columns=["value"])
-    run.iamc.remove(remove_data, type=type)
-
-    # Retrieve from database
-    ret = run.iamc.tabulate()
-    assert ret.empty