Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add profiling & benchmarks for Indexset and Parameter -- clean #155

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ secrets.env
*.iml
# tests
tests/test-data
tests/fixtures/optimization/big/parameterdata.csv
.coverage*
.profiles
.benchmarks
Expand Down
11 changes: 1 addition & 10 deletions ixmp4/data/db/optimization/equation/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import copy
from typing import Any, ClassVar

from sqlalchemy.orm import validates
Expand Down Expand Up @@ -28,15 +27,7 @@ class Equation(base.BaseModel):
def validate_data(self, key: Any, data: dict[str, Any]) -> dict[str, Any]:
if not bool(data):
return data
data_to_validate = copy.deepcopy(data)
del data_to_validate["levels"]
del data_to_validate["marginals"]
if bool(data_to_validate):
_ = utils.validate_data(
host=self,
data=data_to_validate,
columns=self.columns,
)
utils.validate_data(host=self, data=data, columns=self.columns)
return data

__table_args__ = (db.UniqueConstraint("name", "run__id"),)
10 changes: 1 addition & 9 deletions ixmp4/data/db/optimization/parameter/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import copy
from typing import Any, ClassVar

from sqlalchemy.orm import validates
Expand Down Expand Up @@ -26,14 +25,7 @@ class Parameter(base.BaseModel):

@validates("data")
def validate_data(self, key: Any, data: dict[str, Any]) -> dict[str, Any]:
data_to_validate = copy.deepcopy(data)
del data_to_validate["values"]
del data_to_validate["units"]
_ = utils.validate_data(
host=self,
data=data_to_validate,
columns=self.columns,
)
utils.validate_data(host=self, data=data, columns=self.columns)
return data

__table_args__ = (db.UniqueConstraint("name", "run__id"),)
9 changes: 3 additions & 6 deletions ixmp4/data/db/optimization/table/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,11 @@ class Table(base.BaseModel):
columns: types.Mapped[list["Column"]] = db.relationship()
data: types.JsonDict = db.Column(db.JsonType, nullable=False, default={})

# TODO: should we pass self to validate_data to raise more specific errors?

@validates("data")
def validate_data(self, key: Any, data: dict[str, Any]) -> dict[str, Any]:
return utils.validate_data(
host=self,
data=data,
columns=self.columns,
utils.validate_data(
host=self, data=data, columns=self.columns, has_values_and_units=False
)
return data

__table_args__ = (db.UniqueConstraint("name", "run__id"),)
36 changes: 24 additions & 12 deletions ixmp4/data/db/optimization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,28 @@ def collect_indexsets_to_check(


def validate_data(
host: base.BaseModel, data: dict[str, Any], columns: list["Column"]
) -> dict[str, Any]:
data_frame: pd.DataFrame = pd.DataFrame.from_dict(data)
host: base.BaseModel,
data: dict[str, Any],
columns: list["Column"],
column_names: list[str] | None = None,
has_values_and_units: bool = True,
) -> None:
data_frame = pd.DataFrame.from_dict(data)

# We don't want to validate "values" and "units" when they are present
number_columns = (
len(data_frame.columns) - 2 if has_values_and_units else len(data_frame.columns)
)

# TODO for all of the following, we might want to create unique exceptions
# Could me make both more specific by specifiying missing/extra columns?
if len(data_frame.columns) < len(columns):
if number_columns < len(columns):
raise host.DataInvalid(
f"While handling {host.__str__()}: \n"
f"Data is missing for some Columns! \n Data: {data} \n "
f"Columns: {[column.name for column in columns]}"
)
elif len(data_frame.columns) > len(columns):
elif number_columns > len(columns):
raise host.DataInvalid(
f"While handling {host.__str__()}: \n"
f"Trying to add data to unknown Columns! \n Data: {data} \n "
Expand All @@ -42,23 +52,25 @@ def validate_data(
"The data is missing values, please make sure it "
"does not contain None or NaN, either!"
)

limited_to_indexsets = collect_indexsets_to_check(columns=columns)

# We can make this more specific e.g. highlighting all duplicate rows via
# pd.DataFrame.duplicated(keep="False")
if data_frame.value_counts().max() > 1:
if data_frame[limited_to_indexsets.keys()].value_counts().max() > 1:
raise host.DataInvalid(
f"While handling {host.__str__()}: \n" "The data contains duplicate rows!"
)

# Can we make this more specific? Iterating over columns; if any is False,
# return its name or something?
limited_to_indexsets = collect_indexsets_to_check(columns=columns)
if not data_frame.isin(limited_to_indexsets).all(axis=None):
if (
not data_frame[limited_to_indexsets.keys()]
.isin(limited_to_indexsets)
.all(axis=None)
):
raise host.DataInvalid(
f"While handling {host.__str__()}: \n"
"The data contains values that are not allowed as per the IndexSets "
"and Columns it is constrained to!"
)

# we can assume the keys are always str
dict_data: dict[str, Any] = data_frame.to_dict(orient="list") # type: ignore[assignment]
return dict_data
11 changes: 1 addition & 10 deletions ixmp4/data/db/optimization/variable/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import copy
from typing import Any, ClassVar

from sqlalchemy.orm import validates
Expand Down Expand Up @@ -29,15 +28,7 @@ class OptimizationVariable(base.BaseModel):
def validate_data(self, key: Any, data: dict[str, Any]) -> dict[str, Any]:
if not bool(data):
return data
data_to_validate = copy.deepcopy(data)
del data_to_validate["levels"]
del data_to_validate["marginals"]
if bool(data_to_validate):
_ = utils.validate_data(
host=self,
data=data_to_validate,
columns=self.columns,
)
utils.validate_data(host=self, data=data, columns=self.columns)
return data

__table_args__ = (db.UniqueConstraint("name", "run__id"),)
Empty file.
Loading
Loading