From 3a79e86e035ded6fc6451d874294ac0af9f48f73 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 23 Jan 2025 12:17:20 +0100 Subject: [PATCH 01/10] allow to read a single document --- src/simtools/utils/general.py | 38 +++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/src/simtools/utils/general.py b/src/simtools/utils/general.py index d9a5cf60f..63097c24d 100644 --- a/src/simtools/utils/general.py +++ b/src/simtools/utils/general.py @@ -135,7 +135,7 @@ def collect_data_from_http(url): return data -def collect_data_from_file(file_name): +def collect_data_from_file(file_name, yaml_document=None): """ Collect data from file based on its extension. @@ -143,6 +143,8 @@ def collect_data_from_file(file_name): ---------- file_name: str Name of the yaml/json/ascii file. + yaml_document: None, int + Return list of yaml documents or a single document (for yaml files with several documents). Returns ------- @@ -152,21 +154,31 @@ def collect_data_from_file(file_name): if is_url(file_name): return collect_data_from_http(file_name) + data = None + suffix = Path(file_name).suffix.lower() with open(file_name, encoding="utf-8") as file: - if Path(file_name).suffix.lower() == ".json": - return json.load(file) - - if Path(file_name).suffix.lower() == ".list": + if suffix == ".json": + data = json.load(file) + elif suffix == ".list": lines = file.readlines() - return [line.strip() for line in lines] + data = [line.strip() for line in lines] + elif suffix in [".yml", ".yaml"]: + try: + data = yaml.safe_load(file) + except yaml.constructor.ConstructorError: + data = _load_yaml_using_astropy(file) + except yaml.composer.ComposerError: + file.seek(0) + if yaml_document is None: + data = list(yaml.safe_load_all(file)) + try: + data = list(yaml.safe_load_all(file))[yaml_document] + except IndexError as exc: + raise InvalidConfigDataError( + f"YAML file {file_name} does not contain {yaml_document} documents." + ) from exc - try: - return yaml.safe_load(file) - except yaml.constructor.ConstructorError: - return _load_yaml_using_astropy(file) - except yaml.composer.ComposerError: - file.seek(0) - return list(yaml.safe_load_all(file)) + return data def collect_kwargs(label, in_kwargs): From 436a05987d29522bb2087f4c2ed6d0517e9b2e0b Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 23 Jan 2025 13:41:10 +0100 Subject: [PATCH 02/10] cleanup of constants --- src/simtools/constants.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/simtools/constants.py b/src/simtools/constants.py index b5185b218..38937c6cb 100644 --- a/src/simtools/constants.py +++ b/src/simtools/constants.py @@ -2,8 +2,11 @@ from importlib.resources import files +# Schema path +SCHEMA_PATH = files("simtools") / "schemas" # Path to metadata jsonschema -METADATA_JSON_SCHEMA = files("simtools") / "schemas/metadata.metaschema.yml" - +METADATA_JSON_SCHEMA = SCHEMA_PATH / "metadata.metaschema.yml" +# Path to model parameter metaschema +MODEL_PARAMETER_METASCHEMA = SCHEMA_PATH / "model_parameter.metaschema.yml" # Path to model parameter schema files -MODEL_PARAMETER_SCHEMA_PATH = files("simtools") / "schemas/model_parameters" +MODEL_PARAMETER_SCHEMA_PATH = SCHEMA_PATH / "model_parameters" From 3d06f750f35b43353f769a18cd7529ff63a06f70 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 23 Jan 2025 13:41:53 +0100 Subject: [PATCH 03/10] get schema version from file --- src/simtools/data_model/model_data_writer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/simtools/data_model/model_data_writer.py b/src/simtools/data_model/model_data_writer.py index 507e34811..882aeefd3 100644 --- a/src/simtools/data_model/model_data_writer.py +++ b/src/simtools/data_model/model_data_writer.py @@ -10,7 +10,7 @@ from astropy.io.registry.base import IORegistryError import simtools.utils.general as gen -from simtools.constants import MODEL_PARAMETER_SCHEMA_PATH +from simtools.constants import MODEL_PARAMETER_METASCHEMA, MODEL_PARAMETER_SCHEMA_PATH from simtools.data_model import validate_data from simtools.data_model.metadata_collector import MetadataCollector from simtools.io_operations import io_handler @@ -176,7 +176,7 @@ def dump_model_parameter( return _json_dict def get_validated_parameter_dict( - self, parameter_name, value, instrument, parameter_version, schema_version="0.1.0" + self, parameter_name, value, instrument, parameter_version, schema_version=None ): """ Get validated parameter dictionary. @@ -209,6 +209,11 @@ def get_validated_parameter_dict( value, unit = value_conversion.split_value_and_unit(value) + if schema_version is None: + schema_version = gen.collect_data_from_file(MODEL_PARAMETER_METASCHEMA, 0).get( + "version", "0.0.0" + ) + data_dict = { "schema_version": schema_version, "parameter": parameter_name, From 399ede570ca9b4b5ada9584bf1d14fcf770452ec Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 29 Jan 2025 13:58:58 +0100 Subject: [PATCH 04/10] Generalize reading of schema files --- ...onvert_all_model_parameters_from_simtel.py | 28 +------ .../validate_file_using_schema.py | 4 +- src/simtools/constants.py | 4 + src/simtools/data_model/metadata_collector.py | 5 +- src/simtools/data_model/metadata_model.py | 4 +- src/simtools/data_model/model_data_writer.py | 29 +------ src/simtools/data_model/schema.py | 77 +++++++++++++++++++ src/simtools/data_model/validate_data.py | 7 +- src/simtools/layout/array_layout.py | 9 ++- src/simtools/model/array_model.py | 4 +- src/simtools/utils/general.py | 13 ++-- src/simtools/utils/names.py | 7 +- tests/conftest.py | 4 - .../unit_tests/data_model/test_data_reader.py | 7 +- .../data_model/test_metadata_collector.py | 16 ++-- .../data_model/test_metadata_model.py | 45 +++++++---- .../data_model/test_model_data_writer.py | 19 +---- tests/unit_tests/data_model/test_schema.py | 43 +++++++++++ .../data_model/test_validate_data.py | 14 ++-- tests/unit_tests/model/test_array_model.py | 3 +- tests/unit_tests/utils/test_general.py | 4 +- 21 files changed, 212 insertions(+), 134 deletions(-) create mode 100644 src/simtools/data_model/schema.py create mode 100644 tests/unit_tests/data_model/test_schema.py diff --git a/src/simtools/applications/convert_all_model_parameters_from_simtel.py b/src/simtools/applications/convert_all_model_parameters_from_simtel.py index 7c0624eec..a2ad911f8 100644 --- a/src/simtools/applications/convert_all_model_parameters_from_simtel.py +++ b/src/simtools/applications/convert_all_model_parameters_from_simtel.py @@ -61,7 +61,7 @@ import simtools.data_model.model_data_writer as writer import simtools.utils.general as gen from simtools.configuration import configurator -from simtools.constants import MODEL_PARAMETER_SCHEMA_PATH +from simtools.data_model import schema from simtools.io_operations.io_handler import IOHandler from simtools.simtel.simtel_config_reader import SimtelConfigReader @@ -108,26 +108,6 @@ def _parse(label=None, description=None): return config.initialize(simulation_model=["telescope", "parameter_version"]) -def get_list_of_parameters_and_schema_files(schema_directory): - """ - Return list of parameters and schema files located in schema file directory. - - Returns - ------- - list - List of parameters found in schema file directory. - list - List of schema files found in schema file directory. - - """ - schema_files = sorted(Path(schema_directory).rglob("*.schema.yml")) - parameters = [] - for schema_file in schema_files: - schema_dict = gen.collect_data_from_file(file_name=schema_file) - parameters.append(schema_dict.get("name")) - return parameters, schema_files - - def get_list_of_simtel_parameters(simtel_config_file, logger): """ Return list of simtel parameters found in simtel configuration file. @@ -205,7 +185,7 @@ def get_number_of_camera_pixel(args_dict, logger): """ try: simtel_config_reader = SimtelConfigReader( - schema_file=MODEL_PARAMETER_SCHEMA_PATH / "camera_pixels.schema.yml", + schema_file=schema.model_parameter_schema_file("camera_pixels"), simtel_config_file=args_dict["simtel_cfg_file"], simtel_telescope_name=args_dict["simtel_telescope_name"], ) @@ -239,8 +219,8 @@ def read_and_export_parameters(args_dict, logger): List of simtools parameter not found in simtel configuration file. """ - _parameters, _schema_files = get_list_of_parameters_and_schema_files( - args_dict.get("schema_directory", MODEL_PARAMETER_SCHEMA_PATH) + _parameters, _schema_files = schema.model_parameter_schema_files( + args_dict.get("schema_directory") ) _simtel_parameters = get_list_of_simtel_parameters(args_dict["simtel_cfg_file"], logger) diff --git a/src/simtools/applications/validate_file_using_schema.py b/src/simtools/applications/validate_file_using_schema.py index 1ea17ca5e..d309db7c8 100644 --- a/src/simtools/applications/validate_file_using_schema.py +++ b/src/simtools/applications/validate_file_using_schema.py @@ -41,7 +41,7 @@ import simtools.utils.general as gen from simtools.configuration import configurator from simtools.constants import MODEL_PARAMETER_SCHEMA_PATH -from simtools.data_model import metadata_collector, metadata_model, validate_data +from simtools.data_model import metadata_collector, metadata_model, schema, validate_data def _parse(label, description): @@ -160,7 +160,7 @@ def validate_data_files(args_dict, logger): for file_name in _get_json_file_list(args_dict.get("file_directory")): tmp_args_dict["file_name"] = file_name parameter_name = re.sub(r"-\d+\.\d+\.\d+", "", file_name.stem) - schema_file = MODEL_PARAMETER_SCHEMA_PATH / f"{parameter_name}.schema.yml" + schema_file = (schema.model_parameter_schema_file(f"{parameter_name}"),) tmp_args_dict["schema"] = schema_file tmp_args_dict["data_type"] = "model_parameter" tmp_args_dict["require_exact_data_type"] = args_dict["require_exact_data_type"] diff --git a/src/simtools/constants.py b/src/simtools/constants.py index 38937c6cb..bb764104c 100644 --- a/src/simtools/constants.py +++ b/src/simtools/constants.py @@ -8,5 +8,9 @@ METADATA_JSON_SCHEMA = SCHEMA_PATH / "metadata.metaschema.yml" # Path to model parameter metaschema MODEL_PARAMETER_METASCHEMA = SCHEMA_PATH / "model_parameter.metaschema.yml" +# Path to model parameter description metaschema +MODEL_PARAMETER_DESCRIPTION_METASCHEMA = ( + SCHEMA_PATH / "model_parameter_and_data_schema.metaschema.yml" +) # Path to model parameter schema files MODEL_PARAMETER_SCHEMA_PATH = SCHEMA_PATH / "model_parameters" diff --git a/src/simtools/data_model/metadata_collector.py b/src/simtools/data_model/metadata_collector.py index a693fb221..226aaaf3f 100644 --- a/src/simtools/data_model/metadata_collector.py +++ b/src/simtools/data_model/metadata_collector.py @@ -10,13 +10,12 @@ import getpass import logging import uuid -from importlib.resources import files from pathlib import Path import simtools.constants import simtools.utils.general as gen import simtools.version -from simtools.data_model import metadata_model +from simtools.data_model import metadata_model, schema from simtools.io_operations import io_handler from simtools.utils import names @@ -135,7 +134,7 @@ def get_data_model_schema_file_name(self): # from data model name if self.data_model_name: self._logger.debug(f"Schema file from data model name: {self.data_model_name}") - return f"{files('simtools')}/schemas/model_parameters/{self.data_model_name}.schema.yml" + return schema.model_parameter_schema_file(self.data_model_name) # from input metadata try: diff --git a/src/simtools/data_model/metadata_model.py b/src/simtools/data_model/metadata_model.py index 2344576f3..16f9ed3b6 100644 --- a/src/simtools/data_model/metadata_model.py +++ b/src/simtools/data_model/metadata_model.py @@ -13,8 +13,8 @@ import jsonschema -import simtools.constants import simtools.utils.general as gen +from simtools.constants import METADATA_JSON_SCHEMA from simtools.data_model import format_checkers from simtools.utils import names @@ -94,7 +94,7 @@ def _load_schema(schema_file=None, schema_version=None): """ if schema_file is None: - schema_file = files("simtools").joinpath(simtools.constants.METADATA_JSON_SCHEMA) + schema_file = METADATA_JSON_SCHEMA try: schema = gen.collect_data_from_file(file_name=schema_file) diff --git a/src/simtools/data_model/model_data_writer.py b/src/simtools/data_model/model_data_writer.py index 882aeefd3..966cc4495 100644 --- a/src/simtools/data_model/model_data_writer.py +++ b/src/simtools/data_model/model_data_writer.py @@ -10,8 +10,7 @@ from astropy.io.registry.base import IORegistryError import simtools.utils.general as gen -from simtools.constants import MODEL_PARAMETER_METASCHEMA, MODEL_PARAMETER_SCHEMA_PATH -from simtools.data_model import validate_data +from simtools.data_model import schema, validate_data from simtools.data_model.metadata_collector import MetadataCollector from simtools.io_operations import io_handler from simtools.utils import names, value_conversion @@ -200,7 +199,8 @@ def get_validated_parameter_dict( Validated parameter dictionary. """ self._logger.debug(f"Getting validated parameter dictionary for {instrument}") - schema_file = self._read_model_parameter_schema(parameter_name) + schema_file = schema.model_parameter_schema_file(parameter_name) + self.schema_dict = gen.collect_data_from_file(schema_file) try: # e.g. instrument is 'North" site = names.validate_site_name(instrument) @@ -209,13 +209,8 @@ def get_validated_parameter_dict( value, unit = value_conversion.split_value_and_unit(value) - if schema_version is None: - schema_version = gen.collect_data_from_file(MODEL_PARAMETER_METASCHEMA, 0).get( - "version", "0.0.0" - ) - data_dict = { - "schema_version": schema_version, + "schema_version": schema.model_parameter_schema_version(schema_version), "parameter": parameter_name, "instrument": instrument, "site": site, @@ -232,22 +227,6 @@ def get_validated_parameter_dict( is_model_parameter=True, ) - def _read_model_parameter_schema(self, parameter_name): - """ - Read model parameter schema. - - Parameters - ---------- - parameter_name: str - Name of the parameter. - """ - schema_file = MODEL_PARAMETER_SCHEMA_PATH / f"{parameter_name}.schema.yml" - try: - self.schema_dict = gen.collect_data_from_file(file_name=schema_file) - except FileNotFoundError as exc: - raise FileNotFoundError(f"Schema file not found: {schema_file}") from exc - return schema_file - def _get_parameter_type(self): """ Return parameter type from schema. diff --git a/src/simtools/data_model/schema.py b/src/simtools/data_model/schema.py new file mode 100644 index 000000000..01d15a415 --- /dev/null +++ b/src/simtools/data_model/schema.py @@ -0,0 +1,77 @@ +"""Module providing functionality to read schema.""" + +from pathlib import Path + +import simtools.utils.general as gen +from simtools.constants import MODEL_PARAMETER_METASCHEMA, MODEL_PARAMETER_SCHEMA_PATH + + +def model_parameter_schema_files(schema_directory=MODEL_PARAMETER_SCHEMA_PATH): + """ + Return list of parameters and schema files located in schema file directory. + + Returns + ------- + list + List of parameters found in schema file directory. + list + List of schema files found in schema file directory. + + """ + schema_files = sorted(Path(schema_directory).rglob("*.schema.yml")) + if not schema_files: + raise FileNotFoundError(f"No schema files found in {schema_directory}") + parameters = [] + for schema_file in schema_files: + schema_dict = gen.collect_data_from_file(file_name=schema_file) + parameters.append(schema_dict.get("name")) + return parameters, schema_files + + +def model_parameter_schema_file(parameter): + """ + Return schema file path for a given model parameter. + + Parameters + ---------- + parameter: str + Model parameter name. + + Returns + ------- + Path + Schema file path. + + """ + schema_file = MODEL_PARAMETER_SCHEMA_PATH / f"{parameter}.schema.yml" + if not schema_file.exists(): + raise FileNotFoundError(f"Schema file not found: {schema_file}") + return schema_file + + +def model_parameter_schema_version(schema_version=None): + """ + Validate and return schema versions. + + If no schema_version is given, the most recent version is provided. + + Parameters + ---------- + schema_version: str + Schema version. + + Returns + ------- + str + Schema version. + + """ + schemas = gen.collect_data_from_file(MODEL_PARAMETER_METASCHEMA) + + if schema_version is None and schemas: + return schemas[0].get("version") + + if any(schema.get("version") == schema_version for schema in schemas): + return schema_version + + raise ValueError(f"Schema version {schema_version} not found in {MODEL_PARAMETER_METASCHEMA}.") diff --git a/src/simtools/data_model/validate_data.py b/src/simtools/data_model/validate_data.py index 0c20dd653..f6768f370 100644 --- a/src/simtools/data_model/validate_data.py +++ b/src/simtools/data_model/validate_data.py @@ -12,8 +12,7 @@ from astropy.utils.diff import report_diff_values import simtools.utils.general as gen -from simtools.constants import MODEL_PARAMETER_SCHEMA_PATH -from simtools.data_model import format_checkers +from simtools.data_model import format_checkers, schema from simtools.utils import value_conversion __all__ = ["DataValidator"] @@ -129,7 +128,7 @@ def validate_model_parameter(par_dict): Validated data dictionary """ data_validator = DataValidator( - schema_file=MODEL_PARAMETER_SCHEMA_PATH / f"{par_dict['parameter']}.schema.yml", + schema_file=schema.model_parameter_schema_file(f"{par_dict['parameter']}"), data_dict=par_dict, check_exact_data_type=False, ) @@ -716,6 +715,8 @@ def _read_validation_schema(self, schema_file, parameter=None): KeyError if 'data' can not be read from dict in schema file + TODO - understand if parts of it can be replaced + """ try: if Path(schema_file).is_dir(): diff --git a/src/simtools/layout/array_layout.py b/src/simtools/layout/array_layout.py index 9c039bdf7..02172b823 100644 --- a/src/simtools/layout/array_layout.py +++ b/src/simtools/layout/array_layout.py @@ -9,7 +9,7 @@ from astropy.table import QTable import simtools.utils.general as gen -from simtools.data_model import data_reader +from simtools.data_model import data_reader, schema from simtools.io_operations import io_handler from simtools.layout.geo_coordinates import GeoCoordinates from simtools.layout.telescope_position import TelescopePosition @@ -582,7 +582,10 @@ def export_telescope_list_table(self, crs_name): return table def export_one_telescope_as_json( - self, crs_name, parameter_version=None, schema_version="0.2.0" + self, + crs_name, + parameter_version=None, + schema_version=None, ): """ Return a list containing a single telescope in simtools-DB-style json. @@ -626,7 +629,7 @@ def export_one_telescope_as_json( ] return { - "schema_version": schema_version, + "schema_version": schema.model_parameter_schema_version(schema_version), "parameter": parameter_name, "instrument": table["telescope_name"][0], "site": self.site, diff --git a/src/simtools/model/array_model.py b/src/simtools/model/array_model.py index 29f52cf1f..e103e5929 100644 --- a/src/simtools/model/array_model.py +++ b/src/simtools/model/array_model.py @@ -6,7 +6,7 @@ import astropy.units as u from astropy.table import QTable -from simtools.data_model import data_reader +from simtools.data_model import data_reader, schema from simtools.db import db_handler from simtools.io_operations import io_handler from simtools.model.site_model import SiteModel @@ -320,7 +320,7 @@ def _get_telescope_position_parameter( Dict with telescope position parameters. """ return { - "schema_version": "0.1.0", + "schema_version": schema.model_parameter_schema_version(), "parameter": "array_element_position_ground", "instrument": telescope_name, "site": site, diff --git a/src/simtools/utils/general.py b/src/simtools/utils/general.py index 1bfd4a599..8c5a89081 100644 --- a/src/simtools/utils/general.py +++ b/src/simtools/utils/general.py @@ -172,12 +172,13 @@ def collect_data_from_file(file_name, yaml_document=None): file.seek(0) if yaml_document is None: data = list(yaml.safe_load_all(file)) - try: - data = list(yaml.safe_load_all(file))[yaml_document] - except IndexError as exc: - raise InvalidConfigDataError( - f"YAML file {file_name} does not contain {yaml_document} documents." - ) from exc + else: + try: + data = list(yaml.safe_load_all(file))[yaml_document] + except IndexError as exc: + raise InvalidConfigDataError( + f"YAML file {file_name} does not contain {yaml_document} documents." + ) from exc return data diff --git a/src/simtools/utils/names.py b/src/simtools/utils/names.py index e4ee803ca..e8141b45b 100644 --- a/src/simtools/utils/names.py +++ b/src/simtools/utils/names.py @@ -3,11 +3,12 @@ import logging import re from functools import cache -from importlib.resources import files from pathlib import Path import yaml +from simtools.constants import MODEL_PARAMETER_SCHEMA_PATH, SCHEMA_PATH + _logger = logging.getLogger(__name__) __all__ = [ @@ -34,7 +35,7 @@ def array_elements(): dict Array elements. """ - with open(files("simtools") / "schemas/array_elements.yml", encoding="utf-8") as file: + with open(Path(SCHEMA_PATH) / "array_elements.yml", encoding="utf-8") as file: return yaml.safe_load(file)["data"] @@ -59,7 +60,7 @@ def site_names(): @cache def load_model_parameters(class_key_list): model_parameters = {} - schema_files = list(Path(files("simtools") / "schemas/model_parameters").rglob("*.yml")) + schema_files = list(Path(MODEL_PARAMETER_SCHEMA_PATH).rglob("*.yml")) for schema_file in schema_files: with open(schema_file, encoding="utf-8") as f: data = yaml.safe_load(f) diff --git a/tests/conftest.py b/tests/conftest.py index 94781c303..e29c2bf81 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -62,10 +62,6 @@ def io_handler(tmp_test_directory, data_path): @pytest.fixture def _mock_settings_env_vars(tmp_test_directory): """Removes all environment variable from the test system and explicitly sets those needed.""" - _url = ( - "https://gitlab.cta-observatory.org/cta-science/simulations/" - "simulation-model/model_parameters/-/raw/main" - ) with mock.patch.dict( os.environ, diff --git a/tests/unit_tests/data_model/test_data_reader.py b/tests/unit_tests/data_model/test_data_reader.py index 21787225d..6f7808ec6 100644 --- a/tests/unit_tests/data_model/test_data_reader.py +++ b/tests/unit_tests/data_model/test_data_reader.py @@ -2,7 +2,6 @@ import json import logging -from importlib.resources import files import astropy.units as u import jsonschema @@ -10,7 +9,7 @@ from astropy.io.registry.base import IORegistryError from astropy.table import Table -from simtools.data_model import data_reader +from simtools.data_model import data_reader, schema logger = logging.getLogger() @@ -102,12 +101,10 @@ def test_read_value_from_file_and_validate( assert "Successful validation of yaml/json file" in caplog.text # schema explicitly given - schema_dir = files("simtools").joinpath("schemas/model_parameters/") - schema_file = str(schema_dir) + "/reference_point_altitude.schema.yml" with caplog.at_level("DEBUG"): data_reader.read_value_from_file( reference_point_altitude_file, - schema_file=schema_file, + schema_file=schema.model_parameter_schema_file("reference_point_altitude"), validate=True, ) assert "Successful validation of yaml/json file" in caplog.text diff --git a/tests/unit_tests/data_model/test_metadata_collector.py b/tests/unit_tests/data_model/test_metadata_collector.py index b5c43d7cd..b12d8bc9f 100644 --- a/tests/unit_tests/data_model/test_metadata_collector.py +++ b/tests/unit_tests/data_model/test_metadata_collector.py @@ -6,41 +6,41 @@ import logging import time import uuid -from importlib.resources import files from pathlib import Path import pytest import simtools.data_model.metadata_collector as metadata_collector import simtools.utils.general as gen +from simtools.constants import METADATA_JSON_SCHEMA, SCHEMA_PATH +from simtools.data_model import schema from simtools.utils import names logger = logging.getLogger() def test_get_data_model_schema_file_name(): - schema_file_path = files("simtools") / "schemas" # from args_dict / command line args_dict = {"no_schema": "schema_file.yml"} _collector = metadata_collector.MetadataCollector(args_dict) schema_file = _collector.get_data_model_schema_file_name() assert schema_file is None - args_dict = {"schema": str(schema_file_path / "metadata.metaschema.yml")} + args_dict = {"schema": str(METADATA_JSON_SCHEMA)} _collector = metadata_collector.MetadataCollector(args_dict) schema_file = _collector.get_data_model_schema_file_name() assert schema_file == args_dict["schema"] # from metadata _collector.top_level_meta["cta"]["product"]["data"]["model"]["url"] = str( - schema_file_path / "top_level_meta.schema.yml" + SCHEMA_PATH / "top_level_meta.schema.yml" ) schema_file = _collector.get_data_model_schema_file_name() # test that priority is given to args_dict (if not none) assert schema_file == args_dict["schema"] _collector.args_dict["schema"] = None schema_file = _collector.get_data_model_schema_file_name() - assert schema_file == str(schema_file_path / "top_level_meta.schema.yml") + assert schema_file == str(SCHEMA_PATH / "top_level_meta.schema.yml") _collector.top_level_meta["cta"]["product"]["data"]["model"].pop("url") schema_file = _collector.get_data_model_schema_file_name() @@ -49,9 +49,7 @@ def test_get_data_model_schema_file_name(): # from data model_name _collector.data_model_name = "array_coordinates" schema_file = _collector.get_data_model_schema_file_name() - assert Path(schema_file) == ( - files("simtools") / "schemas/model_parameters" / "array_coordinates.schema.yml" - ) + assert Path(schema_file) == (schema.model_parameter_schema_file(_collector.data_model_name)) # from input metadata _collector.input_metadata = { @@ -64,7 +62,7 @@ def test_get_data_model_schema_file_name(): def test_get_data_model_schema_dict(args_dict_site): metadata = metadata_collector.MetadataCollector(args_dict=args_dict_site) - metadata.schema_file_name = "simtools/schemas/metadata.metaschema.yml" + metadata.schema_file_name = METADATA_JSON_SCHEMA assert isinstance(metadata.get_data_model_schema_dict(), dict) diff --git a/tests/unit_tests/data_model/test_metadata_model.py b/tests/unit_tests/data_model/test_metadata_model.py index 55b4ee225..2c5bf0cf4 100644 --- a/tests/unit_tests/data_model/test_metadata_model.py +++ b/tests/unit_tests/data_model/test_metadata_model.py @@ -1,11 +1,11 @@ import logging -from importlib.resources import files from pathlib import Path import jsonschema import pytest import yaml +from simtools.constants import MODEL_PARAMETER_DESCRIPTION_METASCHEMA, MODEL_PARAMETER_METASCHEMA from simtools.data_model import metadata_model from simtools.utils import general as gen @@ -29,18 +29,17 @@ def test_load_schema(caplog, tmp_test_directory): with pytest.raises(FileNotFoundError): metadata_model._load_schema(schema_file="not_existing_file") - schema_file = files("simtools") / "schemas" / "model_parameter.metaschema.yml" # schema versions with pytest.raises(ValueError, match=r"^Schema version not given in"): - metadata_model._load_schema(schema_file) + metadata_model._load_schema(MODEL_PARAMETER_METASCHEMA) - _schema_1, _ = metadata_model._load_schema(schema_file, "0.1.0") + _schema_1, _ = metadata_model._load_schema(MODEL_PARAMETER_METASCHEMA, "0.1.0") assert _schema_1["version"] == "0.1.0" - _schema_2, _ = metadata_model._load_schema(schema_file, "0.2.0") + _schema_2, _ = metadata_model._load_schema(MODEL_PARAMETER_METASCHEMA, "0.2.0") assert _schema_2["version"] == "0.2.0" with pytest.raises(ValueError, match=r"^Schema version 0.2 not found in"): - metadata_model._load_schema(schema_file, "0.2") + metadata_model._load_schema(MODEL_PARAMETER_METASCHEMA, "0.2") # test a single doc yaml file (write a temporary schema file; to make sure it is a single doc) tmp_schema_file = Path(tmp_test_directory) / "schema.yml" @@ -74,49 +73,63 @@ def test_validate_schema(tmp_test_directory): def test_validate_schema_astropy_units(caplog): - _schema = files("simtools") / "schemas" / "model_parameter_and_data_schema.metaschema.yml" - success_string = "Successful validation of data using schema from" _dict_1 = gen.collect_data_from_file(file_name="tests/resources/num_gains.schema.yml") with caplog.at_level(logging.DEBUG): - metadata_model.validate_schema(data=_dict_1, schema_file=_schema) + metadata_model.validate_schema( + data=_dict_1, schema_file=MODEL_PARAMETER_DESCRIPTION_METASCHEMA + ) assert success_string in caplog.text # m and cm _dict_1["data"][0]["unit"] = "m" with caplog.at_level(logging.DEBUG): - metadata_model.validate_schema(data=_dict_1, schema_file=_schema) + metadata_model.validate_schema( + data=_dict_1, schema_file=MODEL_PARAMETER_DESCRIPTION_METASCHEMA + ) assert success_string in caplog.text _dict_1["data"][0]["unit"] = "cm" with caplog.at_level(logging.DEBUG): - metadata_model.validate_schema(data=_dict_1, schema_file=_schema) + metadata_model.validate_schema( + data=_dict_1, schema_file=MODEL_PARAMETER_DESCRIPTION_METASCHEMA + ) assert success_string in caplog.text # combined units _dict_1["data"][0]["unit"] = "cm/s" with caplog.at_level(logging.DEBUG): - metadata_model.validate_schema(data=_dict_1, schema_file=_schema) + metadata_model.validate_schema( + data=_dict_1, schema_file=MODEL_PARAMETER_DESCRIPTION_METASCHEMA + ) assert success_string in caplog.text _dict_1["data"][0]["unit"] = "km/ s" with caplog.at_level(logging.DEBUG): - metadata_model.validate_schema(data=_dict_1, schema_file=_schema) + metadata_model.validate_schema( + data=_dict_1, schema_file=MODEL_PARAMETER_DESCRIPTION_METASCHEMA + ) assert success_string in caplog.text # dimensionless _dict_1["data"][0]["unit"] = "dimensionless" with caplog.at_level(logging.DEBUG): - metadata_model.validate_schema(data=_dict_1, schema_file=_schema) + metadata_model.validate_schema( + data=_dict_1, schema_file=MODEL_PARAMETER_DESCRIPTION_METASCHEMA + ) assert success_string in caplog.text _dict_1["data"][0]["unit"] = "" with caplog.at_level(logging.DEBUG): - metadata_model.validate_schema(data=_dict_1, schema_file=_schema) + metadata_model.validate_schema( + data=_dict_1, schema_file=MODEL_PARAMETER_DESCRIPTION_METASCHEMA + ) assert success_string in caplog.text # not good _dict_1["data"][0]["unit"] = "not_a_unit" with pytest.raises(ValueError, match="'not_a_unit' is not a valid Unit"): - metadata_model.validate_schema(data=_dict_1, schema_file=_schema) + metadata_model.validate_schema( + data=_dict_1, schema_file=MODEL_PARAMETER_DESCRIPTION_METASCHEMA + ) def test_resolve_references(): diff --git a/tests/unit_tests/data_model/test_model_data_writer.py b/tests/unit_tests/data_model/test_model_data_writer.py index 580d1622a..d1170a895 100644 --- a/tests/unit_tests/data_model/test_model_data_writer.py +++ b/tests/unit_tests/data_model/test_model_data_writer.py @@ -11,6 +11,7 @@ import simtools.data_model.model_data_writer as writer import simtools.utils.general as gen +from simtools.data_model import schema from simtools.data_model.model_data_writer import JsonNumpyEncoder logger = logging.getLogger() @@ -266,7 +267,7 @@ def test_get_validated_parameter_dict(): assert w1.get_validated_parameter_dict( parameter_name="num_gains", value=2, instrument="MSTN-01", parameter_version="0.0.1" ) == { - "schema_version": "0.1.0", + "schema_version": schema.model_parameter_schema_version(), "parameter": "num_gains", "instrument": "MSTN-01", "site": "North", @@ -284,7 +285,7 @@ def test_get_validated_parameter_dict(): instrument="LSTN-01", parameter_version="0.0.1", ) == { - "schema_version": "0.1.0", + "schema_version": schema.model_parameter_schema_version(), "parameter": "transit_time_error", "instrument": "LSTN-01", "site": "North", @@ -302,7 +303,7 @@ def test_get_validated_parameter_dict(): instrument="North", parameter_version="0.0.1", ) == { - "schema_version": "0.1.0", + "schema_version": schema.model_parameter_schema_version(), "parameter": "reference_point_altitude", "instrument": "North", "site": "North", @@ -391,15 +392,3 @@ def test_parameter_is_a_file(num_gains_schema): w1.schema_dict["data"] = [] assert not w1._parameter_is_a_file() - - -def test_read_model_parameter_schema(): - w1 = writer.ModelDataWriter() - - schema_file = str(w1._read_model_parameter_schema("num_gains")) - - assert "simtools/schemas/model_parameters/num_gains.schema.yml" in schema_file - assert isinstance(w1.schema_dict, dict) - - with pytest.raises(FileNotFoundError, match=r"^Schema file not found:"): - w1._read_model_parameter_schema("not_a_parameter") diff --git a/tests/unit_tests/data_model/test_schema.py b/tests/unit_tests/data_model/test_schema.py new file mode 100644 index 000000000..d84e73ba5 --- /dev/null +++ b/tests/unit_tests/data_model/test_schema.py @@ -0,0 +1,43 @@ +#!/usr/bin/python3 + +import pytest + +from simtools.data_model import schema + + +def test_model_parameter_schema_files(tmp_test_directory): + + par, files = schema.model_parameter_schema_files() + assert len(files) + assert files[0].is_file() + assert "num_gains" in par + + # no files in the directory + with pytest.raises(FileNotFoundError, match=r"^No schema files"): + schema.model_parameter_schema_files(tmp_test_directory) + + # directory does not exist + with pytest.raises(FileNotFoundError, match=r"^No schema files"): + schema.model_parameter_schema_files("not_a_directory") + + +def test_model_parameter_schema_file(): + + schema_file = str(schema.model_parameter_schema_file("num_gains")) + + assert "simtools/schemas/model_parameters/num_gains.schema.yml" in schema_file + + with pytest.raises(FileNotFoundError, match=r"^Schema file not found:"): + schema.model_parameter_schema_file("not_a_parameter") + + +def test_model_parameter_schema_version(): + + most_recent = schema.model_parameter_schema_version() + assert most_recent == "0.2.0" + + assert schema.model_parameter_schema_version("0.2.0") == "0.2.0" + assert schema.model_parameter_schema_version("0.1.0") == "0.1.0" + + with pytest.raises(ValueError, match=r"^Schema version 0.0.1 not found in"): + schema.model_parameter_schema_version("0.0.1") diff --git a/tests/unit_tests/data_model/test_validate_data.py b/tests/unit_tests/data_model/test_validate_data.py index 2b3a90647..b6ffdf84c 100644 --- a/tests/unit_tests/data_model/test_validate_data.py +++ b/tests/unit_tests/data_model/test_validate_data.py @@ -4,7 +4,6 @@ import re import shutil import sys -from importlib.resources import files import jsonschema import numpy as np @@ -14,7 +13,7 @@ from astropy.table import Column, Table from astropy.utils.diff import report_diff_values -from simtools.data_model import validate_data +from simtools.data_model import schema, validate_data logger = logging.getLogger() @@ -624,11 +623,9 @@ def test_read_validation_schema(tmp_test_directory): # incomplete test def test_validate_data_dict(): - schema_dir = files("simtools").joinpath("schemas/model_parameters/") - # parameter with unit data_validator = validate_data.DataValidator( - schema_file=str(schema_dir) + "/reference_point_altitude.schema.yml" + schema_file=schema.model_parameter_schema_file("reference_point_altitude") ) data_validator.data_dict = { "name": "reference_point_altitude", @@ -639,7 +636,7 @@ def test_validate_data_dict(): # parameter without unit data_validator_2 = validate_data.DataValidator( - schema_file=str(schema_dir) + "/num_gains.schema.yml" + schema_file=schema.model_parameter_schema_file("num_gains") ) data_validator_2.data_dict = {"name": "num_gains", "value": [2], "unit": [""]} data_validator_2._validate_data_dict() @@ -662,7 +659,7 @@ def test_validate_data_dict(): data_validator_2._validate_data_dict() data_validator_3 = validate_data.DataValidator( - schema_file=str(schema_dir) + "/random_focal_length.schema.yml" + schema_file=schema.model_parameter_schema_file("random_focal_length") ) data_validator_3.data_dict = { "name": "random_focal_length", @@ -676,9 +673,8 @@ def test_validate_data_dict(): def test_convert_results_to_model_format(): - schema_dir = files("simtools").joinpath("schemas/model_parameters/") data_validator_3 = validate_data.DataValidator( - schema_file=str(schema_dir) + "/random_focal_length.schema.yml" + schema_file=schema.model_parameter_schema_file("random_focal_length") ) data_validator_3.data_dict = { "name": "random_focal_length", diff --git a/tests/unit_tests/model/test_array_model.py b/tests/unit_tests/model/test_array_model.py index 77d469d74..211f4c73d 100644 --- a/tests/unit_tests/model/test_array_model.py +++ b/tests/unit_tests/model/test_array_model.py @@ -7,6 +7,7 @@ from astropy import units as u from astropy.table import QTable +from simtools.data_model import schema from simtools.model.array_model import ArrayModel logger = logging.getLogger() @@ -109,7 +110,7 @@ def test_get_telescope_position_parameter(array_model, io_handler): assert am._get_telescope_position_parameter( "LSTN-01", "North", 10.0 * u.m, 200.0 * u.cm, 30.0 * u.m, "2.0.0" ) == { - "schema_version": "0.1.0", + "schema_version": schema.model_parameter_schema_version(), "parameter": "array_element_position_ground", "instrument": "LSTN-01", "site": "North", diff --git a/tests/unit_tests/utils/test_general.py b/tests/unit_tests/utils/test_general.py index 69ca0aa4c..5ade80306 100644 --- a/tests/unit_tests/utils/test_general.py +++ b/tests/unit_tests/utils/test_general.py @@ -14,6 +14,7 @@ from astropy.table import Table import simtools.utils.general as gen +from simtools.constants import MODEL_PARAMETER_METASCHEMA url_desy = "https://www.desy.de" url_simtools = "https://raw.githubusercontent.com/gammasim/simtools/main/" @@ -43,8 +44,7 @@ def test_collect_dict_data(io_handler, caplog) -> None: assert len(_dict) > 0 # file with several documents - _file = "src/simtools/schemas/model_parameter.metaschema.yml" - _list = gen.collect_data_from_file(_file) + _list = gen.collect_data_from_file(MODEL_PARAMETER_METASCHEMA) assert isinstance(_list, list) assert len(_list) > 0 From c57d437ad03317a55a9ad357c4b550ea47673b7a Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 29 Jan 2025 14:02:27 +0100 Subject: [PATCH 05/10] docs --- docs/source/api-reference/data_model.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/api-reference/data_model.md b/docs/source/api-reference/data_model.md index 45efa4963..ac852a32c 100644 --- a/docs/source/api-reference/data_model.md +++ b/docs/source/api-reference/data_model.md @@ -50,6 +50,15 @@ Data products ingested or produced by simtools generally follows the CTAO data m :members: ``` +(datamodelschema)= + +## schema + +```{eval-rst} +.. automodule:: data_model.schema + :members: +``` + (datamodelvalidatedata)= ## validate_data From 19714337ef7eb96e2548eca03b6238214e7a77bc Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 29 Jan 2025 14:23:02 +0100 Subject: [PATCH 06/10] fix integration tests --- src/simtools/applications/validate_file_using_schema.py | 2 +- src/simtools/data_model/metadata_collector.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simtools/applications/validate_file_using_schema.py b/src/simtools/applications/validate_file_using_schema.py index d309db7c8..6055b44d5 100644 --- a/src/simtools/applications/validate_file_using_schema.py +++ b/src/simtools/applications/validate_file_using_schema.py @@ -160,7 +160,7 @@ def validate_data_files(args_dict, logger): for file_name in _get_json_file_list(args_dict.get("file_directory")): tmp_args_dict["file_name"] = file_name parameter_name = re.sub(r"-\d+\.\d+\.\d+", "", file_name.stem) - schema_file = (schema.model_parameter_schema_file(f"{parameter_name}"),) + schema_file = schema.model_parameter_schema_file(f"{parameter_name}") tmp_args_dict["schema"] = schema_file tmp_args_dict["data_type"] = "model_parameter" tmp_args_dict["require_exact_data_type"] = args_dict["require_exact_data_type"] diff --git a/src/simtools/data_model/metadata_collector.py b/src/simtools/data_model/metadata_collector.py index 226aaaf3f..93838925d 100644 --- a/src/simtools/data_model/metadata_collector.py +++ b/src/simtools/data_model/metadata_collector.py @@ -134,7 +134,7 @@ def get_data_model_schema_file_name(self): # from data model name if self.data_model_name: self._logger.debug(f"Schema file from data model name: {self.data_model_name}") - return schema.model_parameter_schema_file(self.data_model_name) + return str(schema.model_parameter_schema_file(self.data_model_name)) # from input metadata try: From 3bc938c7d7b858f71618851f601e3855dafa40ee Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 29 Jan 2025 17:09:06 +0100 Subject: [PATCH 07/10] simplify (codesmell) --- src/simtools/utils/general.py | 46 ++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/simtools/utils/general.py b/src/simtools/utils/general.py index 8c5a89081..b021ef0dd 100644 --- a/src/simtools/utils/general.py +++ b/src/simtools/utils/general.py @@ -155,32 +155,34 @@ def collect_data_from_file(file_name, yaml_document=None): if is_url(file_name): return collect_data_from_http(file_name) - data = None suffix = Path(file_name).suffix.lower() with open(file_name, encoding="utf-8") as file: if suffix == ".json": - data = json.load(file) - elif suffix == ".list": - lines = file.readlines() - data = [line.strip() for line in lines] - elif suffix in [".yml", ".yaml"]: - try: - data = yaml.safe_load(file) - except yaml.constructor.ConstructorError: - data = _load_yaml_using_astropy(file) - except yaml.composer.ComposerError: - file.seek(0) - if yaml_document is None: - data = list(yaml.safe_load_all(file)) - else: - try: - data = list(yaml.safe_load_all(file))[yaml_document] - except IndexError as exc: - raise InvalidConfigDataError( - f"YAML file {file_name} does not contain {yaml_document} documents." - ) from exc + return json.load(file) + if suffix == ".list": + return [line.strip() for line in file.readlines()] + if suffix in [".yml", ".yaml"]: + return _collect_data_from_yaml_file(file, file_name, yaml_document) + return None - return data + +def _collect_data_from_yaml_file(file, file_name, yaml_document): + """Collect data from a yaml file.""" + try: + return yaml.safe_load(file) + except yaml.constructor.ConstructorError: + return _load_yaml_using_astropy(file) + except yaml.composer.ComposerError: + pass + file.seek(0) + if yaml_document is None: + return list(yaml.safe_load_all(file)) + try: + return list(yaml.safe_load_all(file))[yaml_document] + except IndexError as exc: + raise InvalidConfigDataError( + f"YAML file {file_name} does not contain {yaml_document} documents." + ) from exc def collect_kwargs(label, in_kwargs): From 70bde6319fe01f73fe79a079e09c3f086034cef4 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 29 Jan 2025 17:37:22 +0100 Subject: [PATCH 08/10] coverage --- tests/unit_tests/utils/test_general.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/unit_tests/utils/test_general.py b/tests/unit_tests/utils/test_general.py index 5ade80306..4dba9861e 100644 --- a/tests/unit_tests/utils/test_general.py +++ b/tests/unit_tests/utils/test_general.py @@ -48,6 +48,21 @@ def test_collect_dict_data(io_handler, caplog) -> None: assert isinstance(_list, list) assert len(_list) > 0 + # file with several documents - get first document + _dict = gen.collect_data_from_file(MODEL_PARAMETER_METASCHEMA, 0) + assert _dict["version"] != "0.1.0" + + with pytest.raises(gen.InvalidConfigDataError, match=r"^YAML file"): + gen.collect_data_from_file(MODEL_PARAMETER_METASCHEMA, 999) + + # document type not supported + assert ( + gen.collect_data_from_file( + "tests/resources/run1_proton_za20deg_azm0deg_North_1LST_test-lst-array.corsika.zst" + ) + is None + ) + def test_collect_dict_from_url(io_handler) -> None: _file = "tests/resources/num_gains.schema.yml" From e72e985933a62abdc2f403984235974c2ec74e01 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 29 Jan 2025 20:34:36 +0100 Subject: [PATCH 09/10] simplification of validate_data --- src/simtools/data_model/validate_data.py | 23 ++-------- .../data_model/test_validate_data.py | 43 +++---------------- 2 files changed, 11 insertions(+), 55 deletions(-) diff --git a/src/simtools/data_model/validate_data.py b/src/simtools/data_model/validate_data.py index f6768f370..a6d980798 100644 --- a/src/simtools/data_model/validate_data.py +++ b/src/simtools/data_model/validate_data.py @@ -157,9 +157,7 @@ def _validate_data_dict(self, is_model_parameter=False, lists_as_strings=False): if is_model_parameter: self._prepare_model_parameter() - if not (_name := self.data_dict.get("name") or self.data_dict.get("parameter")): - raise KeyError("Data dict does not contain a 'name' or 'parameter' key.") - self._data_description = self._read_validation_schema(self.schema_file_name, _name) + self._data_description = self._read_validation_schema(self.schema_file_name) value_as_list, unit_as_list = self._get_value_and_units_as_lists() @@ -691,7 +689,7 @@ def _interval_check(data, axis_range, range_type): return False - def _read_validation_schema(self, schema_file, parameter=None): + def _read_validation_schema(self, schema_file): """ Read validation schema from file. @@ -699,11 +697,6 @@ def _read_validation_schema(self, schema_file, parameter=None): ---------- schema_file: Path Schema file describing input data. - If this is a directory, a filename of - '.schema.yml' is assumed. - parameter: str - Parameter name of required schema - (if None, return first schema in file) Returns ------- @@ -714,19 +707,11 @@ def _read_validation_schema(self, schema_file, parameter=None): ------ KeyError if 'data' can not be read from dict in schema file - - TODO - understand if parts of it can be replaced - """ try: - if Path(schema_file).is_dir(): - return gen.collect_data_from_file( - file_name=Path(schema_file) / (parameter + ".schema.yml"), - )["data"] return gen.collect_data_from_file(file_name=schema_file)["data"] - except KeyError: - self._logger.error(f"Error reading validation schema from {schema_file}") - raise + except KeyError as exc: + raise KeyError(f"Error reading validation schema from {schema_file}") from exc def _get_data_description(self, column_name=None, status_test=False): """ diff --git a/tests/unit_tests/data_model/test_validate_data.py b/tests/unit_tests/data_model/test_validate_data.py index b6ffdf84c..f9a3e3631 100644 --- a/tests/unit_tests/data_model/test_validate_data.py +++ b/tests/unit_tests/data_model/test_validate_data.py @@ -2,7 +2,6 @@ import logging import re -import shutil import sys import jsonschema @@ -586,38 +585,18 @@ def test_read_validation_schema(tmp_test_directory): data_validator._read_validation_schema(schema_file=None) # file given - data_validator._read_validation_schema(schema_file=mirror_2f_schema_file) + _schema = data_validator._read_validation_schema(schema_file=mirror_2f_schema_file) + assert isinstance(_schema, list) # file does not exist with pytest.raises(FileNotFoundError): data_validator._read_validation_schema(schema_file="this_file_does_not_exist.yml") - # file given and parameter name given - data_validator._read_validation_schema( - schema_file=mirror_2f_schema_file, - parameter="mirror_2f_measurement", - ) - - # copy the schema file to a temporary directory; this is to test - # that the schema file is read from the temporary directory with the - # correct path / name - shutil.copy( - mirror_2f_schema_file, - tmp_test_directory / "mirror_2f_measurement.schema.yml", - ) - data_validator._read_validation_schema( - schema_file=str(tmp_test_directory), parameter="mirror_2f_measurement" - ) - - _incomplete_schema = {"description": "test schema"} - # write yaml file in temp directory - with open(tmp_test_directory / "incomplete_schema.schema.yml", "w") as _file: - yaml.dump(_incomplete_schema, _file) - - with pytest.raises(KeyError): - data_validator._read_validation_schema( - schema_file=str(tmp_test_directory), parameter="incomplete_schema" - ) + # read a 'wrong' schema file with no 'data' key included + with open(tmp_test_directory / "wrong_schema.yml", "w") as _file: + yaml.dump({"wrong_key": []}, _file) + with pytest.raises(KeyError, match=r"Error reading validation schema from .*wrong_schema.yml"): + data_validator._read_validation_schema(schema_file=tmp_test_directory / "wrong_schema.yml") # incomplete test @@ -644,14 +623,6 @@ def test_validate_data_dict(): data_validator_2.data_dict = {"name": "num_gains", "value": np.array([2]), "unit": [""]} data_validator_2._validate_data_dict() - data_validator.data_dict = { - "no_name": "test_data", - "value": [1.0, 2.0, 3.0], - "unit": ["", "", ""], - } - with pytest.raises(KeyError): - data_validator._validate_data_dict() - data_validator_2.data_dict = {"name": "num_gains", "value": [2], "unit": [None]} data_validator_2._validate_data_dict() From 7f9961caf887fa2d263e4cdcba0163acf9b43d9a Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 3 Feb 2025 10:35:24 +0100 Subject: [PATCH 10/10] add get_ to function names in schema module --- ...onvert_all_model_parameters_from_simtel.py | 4 ++-- .../validate_file_using_schema.py | 2 +- src/simtools/data_model/metadata_collector.py | 2 +- src/simtools/data_model/model_data_writer.py | 4 ++-- src/simtools/data_model/schema.py | 6 ++--- src/simtools/data_model/validate_data.py | 2 +- src/simtools/layout/array_layout.py | 2 +- src/simtools/model/array_model.py | 2 +- .../unit_tests/data_model/test_data_reader.py | 2 +- .../data_model/test_metadata_collector.py | 2 +- .../data_model/test_model_data_writer.py | 6 ++--- tests/unit_tests/data_model/test_schema.py | 24 +++++++++---------- .../data_model/test_validate_data.py | 8 +++---- tests/unit_tests/model/test_array_model.py | 2 +- 14 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/simtools/applications/convert_all_model_parameters_from_simtel.py b/src/simtools/applications/convert_all_model_parameters_from_simtel.py index a2ad911f8..d8db15904 100644 --- a/src/simtools/applications/convert_all_model_parameters_from_simtel.py +++ b/src/simtools/applications/convert_all_model_parameters_from_simtel.py @@ -185,7 +185,7 @@ def get_number_of_camera_pixel(args_dict, logger): """ try: simtel_config_reader = SimtelConfigReader( - schema_file=schema.model_parameter_schema_file("camera_pixels"), + schema_file=schema.get_model_parameter_schema_file("camera_pixels"), simtel_config_file=args_dict["simtel_cfg_file"], simtel_telescope_name=args_dict["simtel_telescope_name"], ) @@ -219,7 +219,7 @@ def read_and_export_parameters(args_dict, logger): List of simtools parameter not found in simtel configuration file. """ - _parameters, _schema_files = schema.model_parameter_schema_files( + _parameters, _schema_files = schema.get_get_model_parameter_schema_files( args_dict.get("schema_directory") ) _simtel_parameters = get_list_of_simtel_parameters(args_dict["simtel_cfg_file"], logger) diff --git a/src/simtools/applications/validate_file_using_schema.py b/src/simtools/applications/validate_file_using_schema.py index 6055b44d5..dc0ae9a3e 100644 --- a/src/simtools/applications/validate_file_using_schema.py +++ b/src/simtools/applications/validate_file_using_schema.py @@ -160,7 +160,7 @@ def validate_data_files(args_dict, logger): for file_name in _get_json_file_list(args_dict.get("file_directory")): tmp_args_dict["file_name"] = file_name parameter_name = re.sub(r"-\d+\.\d+\.\d+", "", file_name.stem) - schema_file = schema.model_parameter_schema_file(f"{parameter_name}") + schema_file = schema.get_model_parameter_schema_file(f"{parameter_name}") tmp_args_dict["schema"] = schema_file tmp_args_dict["data_type"] = "model_parameter" tmp_args_dict["require_exact_data_type"] = args_dict["require_exact_data_type"] diff --git a/src/simtools/data_model/metadata_collector.py b/src/simtools/data_model/metadata_collector.py index 93838925d..e97fd091e 100644 --- a/src/simtools/data_model/metadata_collector.py +++ b/src/simtools/data_model/metadata_collector.py @@ -134,7 +134,7 @@ def get_data_model_schema_file_name(self): # from data model name if self.data_model_name: self._logger.debug(f"Schema file from data model name: {self.data_model_name}") - return str(schema.model_parameter_schema_file(self.data_model_name)) + return str(schema.get_model_parameter_schema_file(self.data_model_name)) # from input metadata try: diff --git a/src/simtools/data_model/model_data_writer.py b/src/simtools/data_model/model_data_writer.py index 966cc4495..1651275b0 100644 --- a/src/simtools/data_model/model_data_writer.py +++ b/src/simtools/data_model/model_data_writer.py @@ -199,7 +199,7 @@ def get_validated_parameter_dict( Validated parameter dictionary. """ self._logger.debug(f"Getting validated parameter dictionary for {instrument}") - schema_file = schema.model_parameter_schema_file(parameter_name) + schema_file = schema.get_model_parameter_schema_file(parameter_name) self.schema_dict = gen.collect_data_from_file(schema_file) try: # e.g. instrument is 'North" @@ -210,7 +210,7 @@ def get_validated_parameter_dict( value, unit = value_conversion.split_value_and_unit(value) data_dict = { - "schema_version": schema.model_parameter_schema_version(schema_version), + "schema_version": schema.get_model_parameter_schema_version(schema_version), "parameter": parameter_name, "instrument": instrument, "site": site, diff --git a/src/simtools/data_model/schema.py b/src/simtools/data_model/schema.py index 01d15a415..1e0d13cb6 100644 --- a/src/simtools/data_model/schema.py +++ b/src/simtools/data_model/schema.py @@ -6,7 +6,7 @@ from simtools.constants import MODEL_PARAMETER_METASCHEMA, MODEL_PARAMETER_SCHEMA_PATH -def model_parameter_schema_files(schema_directory=MODEL_PARAMETER_SCHEMA_PATH): +def get_get_model_parameter_schema_files(schema_directory=MODEL_PARAMETER_SCHEMA_PATH): """ Return list of parameters and schema files located in schema file directory. @@ -28,7 +28,7 @@ def model_parameter_schema_files(schema_directory=MODEL_PARAMETER_SCHEMA_PATH): return parameters, schema_files -def model_parameter_schema_file(parameter): +def get_model_parameter_schema_file(parameter): """ Return schema file path for a given model parameter. @@ -49,7 +49,7 @@ def model_parameter_schema_file(parameter): return schema_file -def model_parameter_schema_version(schema_version=None): +def get_model_parameter_schema_version(schema_version=None): """ Validate and return schema versions. diff --git a/src/simtools/data_model/validate_data.py b/src/simtools/data_model/validate_data.py index a6d980798..55e4dc4f8 100644 --- a/src/simtools/data_model/validate_data.py +++ b/src/simtools/data_model/validate_data.py @@ -128,7 +128,7 @@ def validate_model_parameter(par_dict): Validated data dictionary """ data_validator = DataValidator( - schema_file=schema.model_parameter_schema_file(f"{par_dict['parameter']}"), + schema_file=schema.get_model_parameter_schema_file(f"{par_dict['parameter']}"), data_dict=par_dict, check_exact_data_type=False, ) diff --git a/src/simtools/layout/array_layout.py b/src/simtools/layout/array_layout.py index 02172b823..fd82ec591 100644 --- a/src/simtools/layout/array_layout.py +++ b/src/simtools/layout/array_layout.py @@ -629,7 +629,7 @@ def export_one_telescope_as_json( ] return { - "schema_version": schema.model_parameter_schema_version(schema_version), + "schema_version": schema.get_model_parameter_schema_version(schema_version), "parameter": parameter_name, "instrument": table["telescope_name"][0], "site": self.site, diff --git a/src/simtools/model/array_model.py b/src/simtools/model/array_model.py index e103e5929..e8c1e1b36 100644 --- a/src/simtools/model/array_model.py +++ b/src/simtools/model/array_model.py @@ -320,7 +320,7 @@ def _get_telescope_position_parameter( Dict with telescope position parameters. """ return { - "schema_version": schema.model_parameter_schema_version(), + "schema_version": schema.get_model_parameter_schema_version(), "parameter": "array_element_position_ground", "instrument": telescope_name, "site": site, diff --git a/tests/unit_tests/data_model/test_data_reader.py b/tests/unit_tests/data_model/test_data_reader.py index 6f7808ec6..ad01860c3 100644 --- a/tests/unit_tests/data_model/test_data_reader.py +++ b/tests/unit_tests/data_model/test_data_reader.py @@ -104,7 +104,7 @@ def test_read_value_from_file_and_validate( with caplog.at_level("DEBUG"): data_reader.read_value_from_file( reference_point_altitude_file, - schema_file=schema.model_parameter_schema_file("reference_point_altitude"), + schema_file=schema.get_model_parameter_schema_file("reference_point_altitude"), validate=True, ) assert "Successful validation of yaml/json file" in caplog.text diff --git a/tests/unit_tests/data_model/test_metadata_collector.py b/tests/unit_tests/data_model/test_metadata_collector.py index b12d8bc9f..f7f4dbf4b 100644 --- a/tests/unit_tests/data_model/test_metadata_collector.py +++ b/tests/unit_tests/data_model/test_metadata_collector.py @@ -49,7 +49,7 @@ def test_get_data_model_schema_file_name(): # from data model_name _collector.data_model_name = "array_coordinates" schema_file = _collector.get_data_model_schema_file_name() - assert Path(schema_file) == (schema.model_parameter_schema_file(_collector.data_model_name)) + assert Path(schema_file) == (schema.get_model_parameter_schema_file(_collector.data_model_name)) # from input metadata _collector.input_metadata = { diff --git a/tests/unit_tests/data_model/test_model_data_writer.py b/tests/unit_tests/data_model/test_model_data_writer.py index d1170a895..5184fc9d1 100644 --- a/tests/unit_tests/data_model/test_model_data_writer.py +++ b/tests/unit_tests/data_model/test_model_data_writer.py @@ -267,7 +267,7 @@ def test_get_validated_parameter_dict(): assert w1.get_validated_parameter_dict( parameter_name="num_gains", value=2, instrument="MSTN-01", parameter_version="0.0.1" ) == { - "schema_version": schema.model_parameter_schema_version(), + "schema_version": schema.get_model_parameter_schema_version(), "parameter": "num_gains", "instrument": "MSTN-01", "site": "North", @@ -285,7 +285,7 @@ def test_get_validated_parameter_dict(): instrument="LSTN-01", parameter_version="0.0.1", ) == { - "schema_version": schema.model_parameter_schema_version(), + "schema_version": schema.get_model_parameter_schema_version(), "parameter": "transit_time_error", "instrument": "LSTN-01", "site": "North", @@ -303,7 +303,7 @@ def test_get_validated_parameter_dict(): instrument="North", parameter_version="0.0.1", ) == { - "schema_version": schema.model_parameter_schema_version(), + "schema_version": schema.get_model_parameter_schema_version(), "parameter": "reference_point_altitude", "instrument": "North", "site": "North", diff --git a/tests/unit_tests/data_model/test_schema.py b/tests/unit_tests/data_model/test_schema.py index d84e73ba5..0ba33e77b 100644 --- a/tests/unit_tests/data_model/test_schema.py +++ b/tests/unit_tests/data_model/test_schema.py @@ -5,39 +5,39 @@ from simtools.data_model import schema -def test_model_parameter_schema_files(tmp_test_directory): +def test_get_get_model_parameter_schema_files(tmp_test_directory): - par, files = schema.model_parameter_schema_files() + par, files = schema.get_get_model_parameter_schema_files() assert len(files) assert files[0].is_file() assert "num_gains" in par # no files in the directory with pytest.raises(FileNotFoundError, match=r"^No schema files"): - schema.model_parameter_schema_files(tmp_test_directory) + schema.get_get_model_parameter_schema_files(tmp_test_directory) # directory does not exist with pytest.raises(FileNotFoundError, match=r"^No schema files"): - schema.model_parameter_schema_files("not_a_directory") + schema.get_get_model_parameter_schema_files("not_a_directory") -def test_model_parameter_schema_file(): +def test_get_model_parameter_schema_file(): - schema_file = str(schema.model_parameter_schema_file("num_gains")) + schema_file = str(schema.get_model_parameter_schema_file("num_gains")) assert "simtools/schemas/model_parameters/num_gains.schema.yml" in schema_file with pytest.raises(FileNotFoundError, match=r"^Schema file not found:"): - schema.model_parameter_schema_file("not_a_parameter") + schema.get_model_parameter_schema_file("not_a_parameter") -def test_model_parameter_schema_version(): +def test_get_model_parameter_schema_version(): - most_recent = schema.model_parameter_schema_version() + most_recent = schema.get_model_parameter_schema_version() assert most_recent == "0.2.0" - assert schema.model_parameter_schema_version("0.2.0") == "0.2.0" - assert schema.model_parameter_schema_version("0.1.0") == "0.1.0" + assert schema.get_model_parameter_schema_version("0.2.0") == "0.2.0" + assert schema.get_model_parameter_schema_version("0.1.0") == "0.1.0" with pytest.raises(ValueError, match=r"^Schema version 0.0.1 not found in"): - schema.model_parameter_schema_version("0.0.1") + schema.get_model_parameter_schema_version("0.0.1") diff --git a/tests/unit_tests/data_model/test_validate_data.py b/tests/unit_tests/data_model/test_validate_data.py index f9a3e3631..2b8d1e9ed 100644 --- a/tests/unit_tests/data_model/test_validate_data.py +++ b/tests/unit_tests/data_model/test_validate_data.py @@ -604,7 +604,7 @@ def test_validate_data_dict(): # parameter with unit data_validator = validate_data.DataValidator( - schema_file=schema.model_parameter_schema_file("reference_point_altitude") + schema_file=schema.get_model_parameter_schema_file("reference_point_altitude") ) data_validator.data_dict = { "name": "reference_point_altitude", @@ -615,7 +615,7 @@ def test_validate_data_dict(): # parameter without unit data_validator_2 = validate_data.DataValidator( - schema_file=schema.model_parameter_schema_file("num_gains") + schema_file=schema.get_model_parameter_schema_file("num_gains") ) data_validator_2.data_dict = {"name": "num_gains", "value": [2], "unit": [""]} data_validator_2._validate_data_dict() @@ -630,7 +630,7 @@ def test_validate_data_dict(): data_validator_2._validate_data_dict() data_validator_3 = validate_data.DataValidator( - schema_file=schema.model_parameter_schema_file("random_focal_length") + schema_file=schema.get_model_parameter_schema_file("random_focal_length") ) data_validator_3.data_dict = { "name": "random_focal_length", @@ -645,7 +645,7 @@ def test_validate_data_dict(): def test_convert_results_to_model_format(): data_validator_3 = validate_data.DataValidator( - schema_file=schema.model_parameter_schema_file("random_focal_length") + schema_file=schema.get_model_parameter_schema_file("random_focal_length") ) data_validator_3.data_dict = { "name": "random_focal_length", diff --git a/tests/unit_tests/model/test_array_model.py b/tests/unit_tests/model/test_array_model.py index ed2621478..6846e3376 100644 --- a/tests/unit_tests/model/test_array_model.py +++ b/tests/unit_tests/model/test_array_model.py @@ -109,7 +109,7 @@ def test_get_telescope_position_parameter(array_model, io_handler): assert am._get_telescope_position_parameter( "LSTN-01", "North", 10.0 * u.m, 200.0 * u.cm, 30.0 * u.m, "2.0.0" ) == { - "schema_version": schema.model_parameter_schema_version(), + "schema_version": schema.get_model_parameter_schema_version(), "parameter": "array_element_position_ground", "instrument": "LSTN-01", "site": "North",