From 0603d71d282fd90125b573f38abfa92e34086364 Mon Sep 17 00:00:00 2001 From: Rowland Ogwara Date: Sat, 2 Apr 2022 21:46:16 -0500 Subject: [PATCH] chore(repository): Use builders to load proper dictionary (#11) Use builder pattern to load dictionary to allow support for more dictionary schema locations in the future --- .pre-commit-config.yaml | 10 +- .secrets.baseline | 18 +-- setup.cfg | 2 +- src/psqlgml/__init__.py | 9 +- src/psqlgml/cli.py | 51 ++++-- src/psqlgml/dictionaries/__init__.py | 0 src/psqlgml/dictionaries/readers.py | 112 +++++++++++++ src/psqlgml/dictionaries/repository.py | 150 ++++++++++++++++++ .../schemas.py} | 62 +------- src/psqlgml/repository.py | 109 ------------- src/psqlgml/schema.py | 5 +- src/psqlgml/validators.py | 14 +- tests/conftest.py | 16 +- tests/integration/conftest.py | 6 +- tests/integration/test_dictionary.py | 21 +-- tests/integration/test_git_repository.py | 49 ++++++ tests/unit/test_dictionary.py | 18 +-- tests/unit/test_git_repository.py | 61 +++++++ tests/unit/test_local_repository.py | 46 ++++++ tests/unit/test_repository.py | 65 -------- tests/unit/test_schema.py | 5 +- tests/unit/test_validators.py | 5 +- tox.ini | 4 +- 23 files changed, 520 insertions(+), 318 deletions(-) create mode 100644 src/psqlgml/dictionaries/__init__.py create mode 100644 src/psqlgml/dictionaries/readers.py create mode 100644 src/psqlgml/dictionaries/repository.py rename src/psqlgml/{dictionary.py => dictionaries/schemas.py} (71%) delete mode 100644 src/psqlgml/repository.py create mode 100644 tests/integration/test_git_repository.py create mode 100644 tests/unit/test_git_repository.py create mode 100644 tests/unit/test_local_repository.py delete mode 100644 tests/unit/test_repository.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 308c0c8..45ef438 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v4.1.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -8,11 +8,11 @@ repos: args: [--remove] - id: check-yaml - repo: https://github.com/psf/black - rev: 21.8b0 + rev: 22.3.0 hooks: - id: black - repo: https://github.com/pycqa/isort - rev: 5.9.3 + rev: 5.10.1 hooks: - id: isort name: isort (python) @@ -23,12 +23,12 @@ repos: name: isort (pyi) types: [ pyi ] - repo: https://github.com/Yelp/detect-secrets - rev: v1.1.0 + rev: v1.2.0 hooks: - id: detect-secrets args: [ '--baseline', '.secrets.baseline' ] - repo: https://github.com/pycqa/flake8 - rev: '3.9.2' + rev: 4.0.1 hooks: - id: flake8 - repo: https://github.com/pre-commit/pygrep-hooks diff --git a/.secrets.baseline b/.secrets.baseline index 62b988b..aa510a3 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -1,5 +1,5 @@ { - "version": "1.1.0", + "version": "1.2.0", "plugins_used": [ { "name": "ArtifactoryDetector" @@ -103,29 +103,29 @@ } ], "results": { - "tests/unit/test_repository.py": [ + "tests/integration/test_git_repository.py": [ { "type": "Hex High Entropy String", - "filename": "tests/unit/test_repository.py", + "filename": "tests/integration/test_git_repository.py", "hashed_secret": "d262efa598a19d120989a1e19864171abe8efcb1", "is_verified": false, - "line_number": 41 + "line_number": 19 }, { "type": "Hex High Entropy String", - "filename": "tests/unit/test_repository.py", + "filename": "tests/integration/test_git_repository.py", "hashed_secret": "902b84b9dcfc39d52a0c959c5a10487e675c4667", "is_verified": false, - "line_number": 44 + "line_number": 20 }, { "type": "Hex High Entropy String", - "filename": "tests/unit/test_repository.py", + "filename": "tests/integration/test_git_repository.py", "hashed_secret": "4a9e2768850629c1bc1a0f9fac1fba06d6f549c9", "is_verified": false, - "line_number": 47 + "line_number": 21 } ] }, - "generated_at": "2021-08-19T18:02:21Z" + "generated_at": "2022-04-03T02:16:12Z" } diff --git a/setup.cfg b/setup.cfg index 700c0ab..1ab45e2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ install_requires = attrs click colored - dulwich + dulwich>=0.20.31 graphviz Jinja2 jsonschema diff --git a/src/psqlgml/__init__.py b/src/psqlgml/__init__.py index 469ce24..e204096 100644 --- a/src/psqlgml/__init__.py +++ b/src/psqlgml/__init__.py @@ -1,6 +1,7 @@ from pkg_resources import get_distribution -from psqlgml.dictionary import Association, Dictionary, from_object, load, load_local +from psqlgml.dictionaries.readers import DictionaryReader, load, load_local +from psqlgml.dictionaries.schemas import Association, Dictionary, from_object from psqlgml.resources import ResourceFile, load_by_resource, load_resource from psqlgml.schema import generate from psqlgml.schema import read as read_schema @@ -11,7 +12,9 @@ GmlEdge, GmlNode, GmlSchema, + RenderFormat, SystemAnnotation, + ValidatorType, ) from psqlgml.validators import DataViolation, ValidationRequest, validate from psqlgml.visualization import draw @@ -22,12 +25,14 @@ "Association", "DataViolation", "Dictionary", + "DictionaryReader", "DictionarySchema", "DictionarySchemaDict", "GmlData", "GmlEdge", "GmlSchema", "ResourceFile", + "RenderFormat", "SystemAnnotation", "ValidationRequest", "draw", @@ -39,4 +44,6 @@ "from_object", "read_schema", "validate", + "ValidatorType", + "VERSION", ] diff --git a/src/psqlgml/cli.py b/src/psqlgml/cli.py index 5b3f59c..6a8a453 100644 --- a/src/psqlgml/cli.py +++ b/src/psqlgml/cli.py @@ -7,10 +7,7 @@ import click import yaml -from psqlgml import VERSION -from psqlgml import dictionary as d -from psqlgml import schema, validators, visualization -from psqlgml.types import RenderFormat, ValidatorType +import psqlgml __all__: List[str] = [] @@ -23,7 +20,7 @@ class LoggingConfig: @click.group() -@click.version_option(VERSION) +@click.version_option(psqlgml.VERSION) def app() -> None: """psqlgml script for generating, validating and viewing graph data""" global logger @@ -72,19 +69,35 @@ def app() -> None: is_flag=True, help="Force regeneration if already exists", ) +@click.option( + "-t", + "--tag/--no-tag", + type=bool, + default=True, + is_flag=True, + help="True if specified version is a tag, defaults to True", +) @app.command(name="generate") def schema_gen( - dictionary: str, output_dir: str, version: str, name: str, schema_path: str, force: bool + dictionary: str, + output_dir: str, + version: str, + name: str, + schema_path: str, + force: bool, + tag: bool, ) -> None: """Generate schema for specified dictionary""" global logger logger.debug(f"Generating psqlgml schema for {dictionary} Dictionary") - loaded_dictionary = d.load( - version=version, name=name, git_url=dictionary, schema_path=schema_path, overwrite=force + current_dictionary = ( + psqlgml.DictionaryReader(name, version) + .git(url=dictionary, schema_path=schema_path, overwrite=force, is_tag=tag) + .read() ) - schema_file = schema.generate( - loaded_dictionary=loaded_dictionary, + schema_file = psqlgml.generate( + loaded_dictionary=current_dictionary, output_location=output_dir, ) logging.info(f"schema generation completed successfully: {schema_file}") @@ -122,17 +135,17 @@ def validate_file( data_file: str, dictionary: str, data_dir: str, - validator: ValidatorType, + validator: psqlgml.ValidatorType, ) -> None: global logger logger.debug(f"running {validator} validators for {data_dir}/{data_file}") - gml_schema = schema.read(dictionary, version) - loaded = d.load(name=dictionary, version=version) - request = validators.ValidationRequest( + gml_schema = psqlgml.read_schema(dictionary, version) + loaded = psqlgml.load(name=dictionary, version=version) + request = psqlgml.ValidationRequest( data_file=data_file, data_dir=data_dir, schema=gml_schema, dictionary=loaded ) - validators.validate( + psqlgml.validate( request=request, validator=validator, print_error=True, @@ -161,10 +174,14 @@ def validate_file( @click.option("-s", "--show/--no-show", is_flag=True, default=True) @app.command(name="visualize", help="Visualize a resource file using graphviz") def visualize_data( - output_dir: str, data_dir: str, data_file: str, output_format: RenderFormat, show: bool + output_dir: str, + data_dir: str, + data_file: str, + output_format: psqlgml.RenderFormat, + show: bool, ) -> None: - visualization.draw(data_dir, data_file, output_dir, output_format, show_rendered=show) + psqlgml.draw(data_dir, data_file, output_dir, output_format, show_rendered=show) def configure_logger(cfg: LoggingConfig) -> None: diff --git a/src/psqlgml/dictionaries/__init__.py b/src/psqlgml/dictionaries/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/psqlgml/dictionaries/readers.py b/src/psqlgml/dictionaries/readers.py new file mode 100644 index 0000000..0548c7b --- /dev/null +++ b/src/psqlgml/dictionaries/readers.py @@ -0,0 +1,112 @@ +import logging +import os +from pathlib import Path +from typing import Optional, cast + +from psqlgml.dictionaries import repository, schemas + +__all__ = ["load", "load_local", "DictionaryReader"] + +logger = logging.getLogger(__name__) + + +class DictionaryReader: + def __init__(self, name: str, version: str) -> None: + self.name = name + self.version = version + + self._url: Optional[str] = None + self._is_tag: bool = True + self._overwrite: bool = False + self._schema_path: str = "gdcdictionary/schemas" + self._base_dir: Path = Path( + os.getenv("GML_DICTIONARY_HOME", f"{Path.home()}/.gml/dictionaries") + ) + + self.reader: Optional[repository.Repository] = None + + def local(self, base_directory: Optional[Path] = None) -> "DictionaryReader": + logger.debug(f"Reading local Dictionary {self.name}: {self.version} @ {base_directory}") + self._base_dir = base_directory or self._base_dir + return self + + def git( + self, + url: str, + overwrite: bool, + schema_path: str = "gdcdictionary/schemas", + is_tag: bool = True, + ) -> "DictionaryReader": + logger.debug(f"Reading remote Dictionary {self.name}: {self.version} @ {url}") + + self._url = url + self._is_tag = is_tag + self._overwrite = overwrite + self._schema_path = schema_path + return self + + def is_preloaded_dictionary(self) -> bool: + """Checks if a dictionary with name and version has been previously loaded""" + return Path(f"{self._base_dir}/{self.name}/{self.version}").exists() + + def read(self) -> schemas.Dictionary: + if self.is_preloaded_dictionary() and not self._overwrite: + return repository.LocalRepository(name=self.name, base_directory=self._base_dir).read( + self.version + ) + return repository.GitRepository( + name=self.name, + url=cast(str, self._url), + schema_path=self._schema_path, + force=self._overwrite, + is_tag=self._is_tag, + ).read(self.version) + + +def load_local( + name: str, version: str, dictionary_location: Optional[str] = None +) -> schemas.Dictionary: + """Attempts to load a previously downloaded dictionary from a local location + + Args: + name: name/label used to save the dictionary locally + version: version number of the saved dictionary + dictionary_location: base directory where all dictionaries are dumped + Returns: + A Dictionary instance if dictionary files were previously downloaded, else None + """ + base_path = Path(dictionary_location) if dictionary_location else None + return DictionaryReader(name, version).local(base_path).read() + + +def load( + version: str, + overwrite: bool = False, + name: str = "gdcdictionary", + schema_path: str = "gdcdictionary/schemas", + git_url: str = "https://github.com/NCI-GDC/gdcdictionary.git", + is_tag: bool = True, +) -> schemas.Dictionary: + """Downloads and loads a dictionary instance based on the input parameters + + Args: + version: dictionary version number + overwrite: force a re-download of the dictionary files, defaults to false + name: name/label used to save the dictionary locally, defaults to gdcdictionary + schema_path: path to the dictionary files with the dictionary git repository + git_url: URL to the git repository + is_tag: tag or commit + Returns: + A Dictionary instance + """ + + return ( + DictionaryReader(name, version) + .git( + url=git_url, + is_tag=is_tag, + overwrite=overwrite, + schema_path=schema_path, + ) + .read() + ) diff --git a/src/psqlgml/dictionaries/repository.py b/src/psqlgml/dictionaries/repository.py new file mode 100644 index 0000000..c69e359 --- /dev/null +++ b/src/psqlgml/dictionaries/repository.py @@ -0,0 +1,150 @@ +import abc +import logging +import os +from pathlib import Path +from typing import Optional + +import attr +from dulwich import objects, porcelain + +__all__ = ["Repository", "LocalRepository", "GitRepository"] + +from psqlgml.dictionaries import schemas + +logger = logging.getLogger(__name__) + + +@attr.s(auto_attribs=True) +class Repository(abc.ABC): + name: str + + def get_dictionary_directory(self, version: str) -> Path: + """Local directory where dictionary files will be dumped""" + dict_home = os.getenv("GML_DICTIONARY_HOME", f"{Path.home()}/.gml/dictionaries") + return Path(f"{dict_home}/{self.name}/{version}") + + @abc.abstractmethod + def read(self, version: str) -> schemas.Dictionary: + """Reads the specified dictionary version from the repository""" + ... + + +@attr.s(auto_attribs=True) +class LocalRepository(Repository): + base_directory: Optional[Path] = None + + def get_dictionary_directory(self, version: str) -> Path: + base_dir = self.base_directory or Path( + f"{os.getenv('GML_DICTIONARY_HOME', f'{Path.home()}/.gml/dictionaries')}" + ) + return Path(f"{base_dir}/{self.name}/{version}") + + def read(self, version: str) -> schemas.Dictionary: + dict_path = self.get_dictionary_directory(version) + + if not dict_path.exists(): + logger.info(f"No local dictionary with name: {self.name}, version: {version} found") + raise IOError(f"No local dictionary found with name: {self.name}, version: {version}") + + return schemas.Dictionary( + name=self.name, + version=version, + url=str(dict_path), + schema=schemas.load_schemas(str(dict_path)), + ) + + +@attr.s(auto_attribs=True) +class GitRepository(Repository): + url: str + force: bool = False + is_tag: bool = True + origin: bytes = b"origin" + schema_path: str = "gdcdictionary/schemas" + repo: porcelain.BaseRepo = None + lazy_load: bool = False + + def __attrs_post_init__(self) -> None: + if not self.lazy_load: + self.clone() + + @property + def local_directory(self) -> Path: + git_home = os.getenv("GML_GIT_HOME", f"{Path.home()}/.gml/git") + local_dir = Path(f"{git_home}/{self.name}") + local_dir.mkdir(parents=True, exist_ok=True) + return local_dir + + @property + def is_cloned(self) -> bool: + return os.path.exists("{}/.git".format(self.local_directory)) + + def get_commit_ref(self, version: str) -> str: + if self.is_tag: + return f"refs/tags/{version}" + return f"refs/remotes/{self.origin.decode()}/{version}" + + def read(self, version: str) -> schemas.Dictionary: + self.clone() + + commit_id = self.get_commit_id(self.get_commit_ref(version)) + dictionary_dir = self.get_dictionary_directory(version) + + if dictionary_dir.exists() and not self.force: + return schemas.Dictionary( + url=self.url, + name=self.name, + version=version, + schema=schemas.load_schemas(str(dictionary_dir)), + ) + + dictionary_dir.mkdir(parents=True, exist_ok=True) + commit_tree: objects.Tree = porcelain.get_object_by_path( + self.repo, self.schema_path, committish=commit_id + ) + + # dump schema files to dump location + for entry in commit_tree.items(): + + file_name = entry.path.decode() + blob = self.repo.get_object(entry.sha) + + # skip sub folders + if not isinstance(blob, objects.Blob): + logger.debug(f"Skipping extra folders in schema directory {file_name}") + continue + + with open(f"{dictionary_dir}/{file_name}", "wb") as f: + f.write(blob.as_raw_string()) + return schemas.Dictionary( + url=self.url, + name=self.name, + version=version, + schema=schemas.load_schemas(str(dictionary_dir)), + ) + + def get_commit_id(self, commit_ref: str) -> bytes: + obj: objects.ShaFile = porcelain.parse_object(self.repo, commit_ref) + if isinstance(obj, objects.Commit): + return obj.id + if isinstance(obj, objects.Tag): + return obj.object[1] + raise ValueError(f"Unrecognized commit {commit_ref}") + + def clone(self) -> None: + + if self.repo: + return + + if not self.is_cloned: + logger.debug(f"cloning new repository {self.url} into {self.local_directory}") + + self.repo = porcelain.clone( + self.url, + target=self.local_directory, + depth=1, + checkout=False, + origin=self.origin.decode(), + ) + else: + self.repo = porcelain.Repo(self.local_directory) diff --git a/src/psqlgml/dictionary.py b/src/psqlgml/dictionaries/schemas.py similarity index 71% rename from src/psqlgml/dictionary.py rename to src/psqlgml/dictionaries/schemas.py index 7657718..df358f5 100644 --- a/src/psqlgml/dictionary.py +++ b/src/psqlgml/dictionaries/schemas.py @@ -8,14 +8,12 @@ import yaml from jsonschema import RefResolver -from psqlgml import repository, types, typings +from psqlgml import types, typings from psqlgml.types import DictionarySchema __all__ = [ "Association", "Dictionary", - "load", - "load_local", "from_object", ] @@ -205,64 +203,6 @@ def _load_schema(schemas: List[types.DictionarySchemaDict]) -> Dict[str, Diction return loaded -def load( - version: str, - overwrite: bool = False, - name: str = "gdcdictionary", - schema_path: str = "gdcdictionary/schemas", - git_url: str = "https://github.com/NCI-GDC/gdcdictionary.git", -) -> Dictionary: - """Downloads and loads a dictionary instance based on the input parameters - - Args: - version: dictionary version number - overwrite: force a re-download of the dictionary files, defaults to false - name: name/label used to save the dictionary locally, defaults to gdcdictionary - schema_path: path to the dictionary files with the dictionary git repository - git_url: URL to the git repository - Returns: - A Dictionary instance - """ - - local_dictionary = load_local(name, version) if not overwrite else None - if local_dictionary: - return local_dictionary - - logger.info(f"Attempting to read dictionary from location {git_url}") - repo = repository.RepoMeta(remote_git_url=git_url, name=name) - checkout_command = repository.RepoCheckout( - repo=repo, path=schema_path, commit=version, override=overwrite - ) - checkout_dir = repository.checkout(checkout_command) - - logger.info(f"loading dictionary from {checkout_dir}") - loaded_schema = load_schemas(checkout_dir, DEFAULT_META_SCHEMA, DEFAULT_DEFINITIONS) - return Dictionary(url=git_url, name=name, version=version, schema=loaded_schema) - - -def load_local( - name: str, version: str, dictionary_location: Optional[str] = None -) -> Optional[Dictionary]: - """Attempts to load a previously downloaded dictionary from a local location - - Args: - name: name/label used to save the dictionary locally - version: version number of the saved dictionary - dictionary_location: base directory where all dictionaries are dumped - Returns: - A Dictionary instance if dictionary files were proviously downloaded, else None - """ - dict_home = os.getenv("GML_DICTIONARY_HOME", f"{Path.home()}/.gml/dictionaries") - dictionary_location = dictionary_location or dict_home - directory = Path(f"{dictionary_location}/{name}/{version}") - if not directory.exists(): - logger.info(f"No local copy of dictionary with name: {name}, version: {version} found") - return None - logger.info(f"Attempting to load dictionary from local path {directory}") - s = load_schemas(str(directory), DEFAULT_META_SCHEMA, DEFAULT_DEFINITIONS) - return Dictionary(name=name, version=version, schema=s, url=str(directory)) - - def from_object( schema: Dict[str, types.DictionarySchemaDict], name: str, version: str ) -> Dictionary: diff --git a/src/psqlgml/repository.py b/src/psqlgml/repository.py deleted file mode 100644 index 607c0ff..0000000 --- a/src/psqlgml/repository.py +++ /dev/null @@ -1,109 +0,0 @@ -import logging -import os -from pathlib import Path - -import attr -from dulwich import objects, porcelain - -__all__ = [ - "checkout", - "clone", - "RepoMeta", - "RepoCheckout", -] - -logger = logging.getLogger(__name__) - - -@attr.s(auto_attribs=True, frozen=True) -class RepoMeta: - name: str - remote_git_url: str - origin: bytes = b"origin" - - @property - def git_dir(self) -> str: - git_home = os.getenv("GML_GIT_HOME", f"{Path.home()}/.gml/git") - dir_home = f"{git_home}/{self.name}" - os.makedirs(dir_home, exist_ok=True) - return dir_home - - @property - def is_cloned(self) -> bool: - return os.path.exists("{}/.git".format(self.git_dir)) - - -@attr.s(auto_attribs=True) -class RepoCheckout: - repo: RepoMeta - commit: str - path: str - origin: bytes = b"origin" - is_tag: bool = True - override: bool = False - - @property - def ref(self) -> str: - if self.is_tag: - return f"refs/tags/{self.commit}" - return f"refs/remotes/{self.origin.decode()}/{self.commit}" - - -def get_checkout_dir(repo_name: str, commit_ref: str) -> str: - dict_home = os.getenv("GML_DICTIONARY_HOME", f"{Path.home()}/.gml/dictionaries") - chk = f"{dict_home}/{repo_name}/{commit_ref}" - return chk - - -def get_commit_id(repo: porcelain.Repo, commit_ref: str) -> bytes: - obj: objects.ShaFile = porcelain.parse_object(repo, commit_ref) - if isinstance(obj, objects.Commit): - return obj.id - if isinstance(obj, objects.Tag): - return obj.object[1] - raise ValueError(f"Unrecognized commit {commit_ref}") - - -def clone(repo_meta: RepoMeta) -> porcelain.Repo: - if not repo_meta.is_cloned: - logger.debug( - f"cloning new repository {repo_meta.remote_git_url} into {repo_meta.git_dir}" - ) - - porcelain.clone( - repo_meta.remote_git_url, - target=repo_meta.git_dir, - depth=1, - checkout=False, - origin=repo_meta.origin, - ) - return porcelain.Repo(repo_meta.git_dir) - - -def checkout(command: RepoCheckout) -> str: - repo = clone(command.repo) - commit_id = get_commit_id(repo, command.ref) - chk_dir = get_checkout_dir(command.repo.name, command.commit) - - if os.path.exists(chk_dir) and not command.override: - return chk_dir - - os.makedirs(chk_dir, exist_ok=True) - commit_tree: objects.Tree = porcelain.get_object_by_path( - repo, command.path, committish=commit_id - ) - - # dump schema files to dump location - for entry in commit_tree.items(): - - file_name = entry.path.decode() - blob = repo.get_object(entry.sha) - - # skip sub folders - if not isinstance(blob, objects.Blob): - logger.debug(f"Skipping extra folders in schema directory {file_name}") - continue - - with open(f"{chk_dir}/{file_name}", "wb") as f: - f.write(blob.as_raw_string()) - return chk_dir diff --git a/src/psqlgml/schema.py b/src/psqlgml/schema.py index 6ad29b6..1203b48 100644 --- a/src/psqlgml/schema.py +++ b/src/psqlgml/schema.py @@ -7,7 +7,8 @@ import jinja2 as j import yaml -from psqlgml import dictionary, resources, types +from psqlgml import resources, types +from psqlgml.dictionaries import schemas __all__ = [ "generate", @@ -24,7 +25,7 @@ def generate( - loaded_dictionary: dictionary.Dictionary, + loaded_dictionary: schemas.Dictionary, output_location: Optional[str] = None, template_name: str = "schema.jinja2", ) -> str: diff --git a/src/psqlgml/validators.py b/src/psqlgml/validators.py index 1bd7ced..1964b6a 100644 --- a/src/psqlgml/validators.py +++ b/src/psqlgml/validators.py @@ -5,8 +5,8 @@ import colored from jsonschema import Draft7Validator -import psqlgml.types -from psqlgml import dictionary, resources, types, typings +from psqlgml import resources, types, typings +from psqlgml.dictionaries import schemas __all__ = [ "AssociationValidator", @@ -37,7 +37,7 @@ class ValidationRequest: data_dir: str data_file: str schema: types.GmlSchema - dictionary: dictionary.Dictionary + dictionary: schemas.Dictionary _payload: Dict[str, types.GmlData] = attr.ib(default=None) @@ -72,7 +72,7 @@ def validate(self) -> Dict[str, Set[DataViolation]]: ... @property - def dictionary(self) -> dictionary.Dictionary: + def dictionary(self) -> schemas.Dictionary: return self.request.dictionary def report_violation( @@ -240,7 +240,7 @@ def register_validator(self, validator_type: Type[Validator]) -> None: v = validator_type(request=self.request) self.validators.append(v) - def register_validator_type(self, validator_type: psqlgml.types.ValidatorType) -> None: + def register_validator_type(self, validator_type: types.ValidatorType) -> None: validators = VALIDATORS[validator_type] for validator in validators: self.register_validator(validator) @@ -274,7 +274,7 @@ def validate(self) -> Dict[str, Set[DataViolation]]: def validate( request: ValidationRequest, - validator: psqlgml.types.ValidatorType = "ALL", + validator: types.ValidatorType = "ALL", print_error: bool = False, ) -> Dict[str, Set[DataViolation]]: register_defaults = True if validator == "ALL" else False @@ -292,7 +292,7 @@ def validate( return violations -def print_violations(violations: Dict[str, Set[DataViolation]], d: dictionary.Dictionary) -> None: +def print_violations(violations: Dict[str, Set[DataViolation]], d: schemas.Dictionary) -> None: for resource_file, sub_violations in violations.items(): clr = "red" if sub_violations else "green" print( diff --git a/tests/conftest.py b/tests/conftest.py index d04fcee..85f3bec 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,31 +3,31 @@ import pkg_resources import pytest -from psqlgml import dictionary, schema, types +import psqlgml from tests.helpers import SchemaInfo @pytest.fixture(scope="session") -def data_dir(): +def data_dir() -> str: return pkg_resources.resource_filename("tests", "data") @pytest.fixture(scope="session") -def local_dictionary(data_dir: str) -> dictionary.Dictionary: - return dictionary.load_local(version="0.1.0", name="dictionary", dictionary_location=data_dir) +def local_dictionary(data_dir: str) -> psqlgml.Dictionary: + return psqlgml.load_local(version="0.1.0", name="dictionary", dictionary_location=data_dir) @pytest.fixture() -def local_schema(local_dictionary: dictionary.Dictionary, tmpdir: Path) -> SchemaInfo: - schema.generate(loaded_dictionary=local_dictionary, output_location=str(tmpdir)) +def local_schema(local_dictionary: psqlgml.Dictionary, tmpdir: Path) -> SchemaInfo: + psqlgml.generate(loaded_dictionary=local_dictionary, output_location=str(tmpdir)) return SchemaInfo( version=local_dictionary.version, name=local_dictionary.name, source_dir=str(tmpdir) ) @pytest.fixture() -def test_schema(local_schema: SchemaInfo) -> types.GmlSchema: - return schema.read( +def test_schema(local_schema: SchemaInfo) -> psqlgml.GmlSchema: + return psqlgml.read_schema( name=local_schema.name, version=local_schema.version, schema_location=local_schema.source_dir, diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index d2fbc19..818b67c 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,7 +1,7 @@ import pytest from click.testing import CliRunner -from psqlgml import dictionary +import psqlgml @pytest.fixture(scope="session") @@ -10,5 +10,5 @@ def cli_runner(): @pytest.fixture(scope="session") -def remote_dictionary() -> dictionary.Dictionary: - return dictionary.load(version="2.3.0") +def remote_dictionary() -> psqlgml.Dictionary: + return psqlgml.load(version="2.3.0") diff --git a/tests/integration/test_dictionary.py b/tests/integration/test_dictionary.py index 56eb92e..634a29e 100644 --- a/tests/integration/test_dictionary.py +++ b/tests/integration/test_dictionary.py @@ -1,24 +1,12 @@ -from pathlib import Path - import pytest -from psqlgml import dictionary, repository +import psqlgml pytestmark = [pytest.mark.slow, pytest.mark.dictionary] REMOTE_GIT_URL = "https://github.com/NCI-GDC/gdcdictionary.git" -@pytest.fixture() -def dictionary_path() -> Path: - repo = repository.RepoMeta(remote_git_url=REMOTE_GIT_URL, name="smiths") - command = repository.RepoCheckout(repo=repo, path="gdcdictionary/schemas", commit="2.3.0") - - chk_dir = Path(repository.checkout(command)) - assert chk_dir.exists() - return chk_dir - - -def test_remote_dictionary(remote_dictionary) -> None: +def test_remote_dictionary(remote_dictionary: psqlgml.Dictionary) -> None: assert len(remote_dictionary.links) == 92 assert len(remote_dictionary.all_associations()) == 360 @@ -30,8 +18,9 @@ def test_remote_dictionary(remote_dictionary) -> None: ) -def test_dictionary_loading(dictionary_path) -> None: - d1 = dictionary.load_local(version="2.3.0", name="gdcdictionary") +@pytest.mark.usefixtures("remote_dictionary") +def test_dictionary_loading() -> None: + d1 = psqlgml.load_local(version="2.3.0", name="gdcdictionary") assert d1.schema assert len(d1.schema) == 81 program = d1.schema["program"] diff --git a/tests/integration/test_git_repository.py b/tests/integration/test_git_repository.py new file mode 100644 index 0000000..fe60691 --- /dev/null +++ b/tests/integration/test_git_repository.py @@ -0,0 +1,49 @@ +import pytest + +from psqlgml.dictionaries import repository + +pytestmark = [pytest.mark.slow, pytest.mark.dictionary] +REMOTE_GIT_URL = "https://github.com/NCI-GDC/gdcdictionary.git" + + +def test_clone_git_repo() -> None: + rm = repository.GitRepository(url=REMOTE_GIT_URL, name="smiths") + rm.clone() + assert rm.is_cloned + assert rm.repo.head() + + +@pytest.mark.parametrize( + "commit, is_tag, ref", + [ + ("2.4.0", True, b"f7ba557228bc113c92387c4eb6160621d27b53ef"), + ("2.3.0", True, b"1595aef2484ab6fa6c945950b296c4031c2606fd"), + ("release/avery", False, b"7107e8116ce6ed8185626570dcba14b46e8e4d27"), + ], +) +def test_get_git_commit_id(commit: str, is_tag: bool, ref: bytes) -> None: + rm = repository.GitRepository(url=REMOTE_GIT_URL, name="smiths", is_tag=is_tag) + rm.clone() + + assert ref == rm.get_commit_id(rm.get_commit_ref(commit)) + + +@pytest.mark.slow +@pytest.mark.parametrize("lazy", [True, False]) +def test_read_remote_dictionary(lazy: bool) -> None: + project = repository.GitRepository( + url=REMOTE_GIT_URL, + name="smiths", + force=True, + schema_path="gdcdictionary/schemas", + lazy_load=lazy, + ) + chk_dir = project.get_dictionary_directory("2.3.0") + dictionary = project.read("2.3.0") + assert chk_dir.exists() + + entries = [f.name for f in chk_dir.iterdir()] + assert "program.yaml" in entries + + assert dictionary.name == "smiths" + assert dictionary.version == "2.3.0" diff --git a/tests/unit/test_dictionary.py b/tests/unit/test_dictionary.py index 5b9671b..7eff713 100644 --- a/tests/unit/test_dictionary.py +++ b/tests/unit/test_dictionary.py @@ -2,7 +2,7 @@ import pytest -from psqlgml import dictionary +from psqlgml.dictionaries import schemas from tests import helpers pytestmark = [pytest.mark.dictionary] @@ -16,13 +16,13 @@ DUMMY_SCHEMA = {"$ref": "_meta.yaml#/properties"} -def test_schema_resolution(local_dictionary: dictionary.Dictionary) -> None: +def test_schema_resolution(local_dictionary: schemas.Dictionary) -> None: assert local_dictionary.schema -@mock.patch.dict(dictionary.RESOLVERS, {"_meta.yaml": dictionary.Resolver("_meta.yaml", META)}) +@mock.patch.dict(schemas.RESOLVERS, {"_meta.yaml": schemas.Resolver("_meta.yaml", META)}) def test_resolvers(): - resolved = dictionary.resolve_schema(DUMMY_SCHEMA) + resolved = schemas.resolve_schema(DUMMY_SCHEMA) assert "name" in resolved assert "age" in resolved @@ -33,10 +33,10 @@ def test_dictionary(local_dictionary) -> None: def test_association__instance() -> None: - a1 = dictionary.Association("src", "dst", "member_of", "link1") - a2 = dictionary.Association("src", "dst", "member_of", "link1") - a3 = dictionary.Association("src", "dst", "member_of", "link2") - a4 = dictionary.Association("src", "dst", "member_of", "link2", is_reference=True) + a1 = schemas.Association("src", "dst", "member_of", "link1") + a2 = schemas.Association("src", "dst", "member_of", "link1") + a3 = schemas.Association("src", "dst", "member_of", "link2") + a4 = schemas.Association("src", "dst", "member_of", "link2", is_reference=True) assert a1 == a2 assert a1 != a3 @@ -45,6 +45,6 @@ def test_association__instance() -> None: def test_from_objects() -> None: - d = dictionary.from_object(helpers.MiniDictionary.schema, name="mini", version="1.0.0") + d = schemas.from_object(helpers.MiniDictionary.schema, name="mini", version="1.0.0") assert {"cases", "projects", "portions", "samples", "centers", "programs"} == d.links assert len(d.schema) == 6 diff --git a/tests/unit/test_git_repository.py b/tests/unit/test_git_repository.py new file mode 100644 index 0000000..ab4719f --- /dev/null +++ b/tests/unit/test_git_repository.py @@ -0,0 +1,61 @@ +import os +from pathlib import Path +from unittest import mock + +import pkg_resources +import pytest + +from psqlgml.dictionaries import repository + +REMOTE_GIT_URL = "https://github.com/NCI-GDC/gdcdictionary.git" + + +@pytest.mark.parametrize( + "default_base, expectation", + [ + (True, f"{Path.home()}/.gml/dictionaries/dictionary/0.1.0"), + (False, f"{pkg_resources.resource_filename('tests', 'data')}/dictionary/0.1.0"), + ], +) +def test_get_dictionary_dir(data_dir: str, default_base: str, expectation) -> None: + """Tests dictionary directory is set properly with and without env variables""" + + gml_dir = f"{Path.home()}/.gml/dictionaries" if default_base else data_dir + with mock.patch.dict(os.environ, {"GML_DICTIONARY_HOME": gml_dir}): + + repo = repository.GitRepository(name="dictionary", url=REMOTE_GIT_URL, lazy_load=True) + assert repo.name == "dictionary" + assert Path(expectation) == repo.get_dictionary_directory("0.1.0") + + +@pytest.mark.parametrize( + "local_git_home", + [ + f"{Path.home()}/.gml/git", + f"{pkg_resources.resource_filename('tests', 'data')}", + ], +) +def test_get_local_git_dir(local_git_home: str) -> None: + """Tests dictionary directory is set properly with and without env variables""" + + with mock.patch.dict(os.environ, {"GML_GIT_HOME": local_git_home}): + + repo = repository.GitRepository(name="dictionary", url=REMOTE_GIT_URL, lazy_load=True) + assert repo.name == "dictionary" + assert Path(f"{local_git_home}/dictionary") == repo.local_directory + + +def test_lazy_load_no_clone(tmpdir: Path) -> None: + with mock.patch.dict(os.environ, {"GML_GIT_HOME": str(tmpdir)}): + rm = repository.GitRepository(url=REMOTE_GIT_URL, name="smiths", lazy_load=True) + assert rm.is_cloned is False + + +@pytest.mark.parametrize( + "is_tag, expected_ref", [(True, "refs/tags/0.1.0"), (False, "refs/remotes/origin/0.1.0")] +) +def test_get_commit_ref(is_tag: bool, expected_ref: str) -> None: + rm = repository.GitRepository( + url=REMOTE_GIT_URL, name="smiths", lazy_load=True, is_tag=is_tag + ) + assert expected_ref == rm.get_commit_ref("0.1.0") diff --git a/tests/unit/test_local_repository.py b/tests/unit/test_local_repository.py new file mode 100644 index 0000000..9a9cfab --- /dev/null +++ b/tests/unit/test_local_repository.py @@ -0,0 +1,46 @@ +from pathlib import Path + +import pkg_resources +import pytest + +from psqlgml.dictionaries import repository + + +@pytest.mark.parametrize( + "default_base, expectation", + [ + (True, f"{Path.home()}/.gml/dictionaries/dictionary/0.1.0"), + (False, f"{pkg_resources.resource_filename('tests', 'data')}/dictionary/0.1.0"), + ], +) +def test_get_local_dictionary_dir(data_dir: str, default_base: bool, expectation: str) -> None: + """Tests initializing a Local repository""" + + base_dir = None if default_base else Path(data_dir) + repo = repository.LocalRepository(name="dictionary", base_directory=base_dir) + + assert repo.name == "dictionary" + assert Path(expectation) == repo.get_dictionary_directory("0.1.0") + + +def test_load_local_dictionary(data_dir: str) -> None: + + base_dir = Path(data_dir) + repo = repository.LocalRepository(name="dictionary", base_directory=base_dir) + dictionary = repo.read("0.1.0") + assert dictionary + assert dictionary.name == "dictionary" + assert dictionary.version == "0.1.0" + + +def test_load_invalid_local_dictionary(data_dir: str) -> None: + + base_dir = Path(data_dir) + repo = repository.LocalRepository(name="dictionary", base_directory=base_dir) + with pytest.raises(IOError) as exc_info: + repo.read("0.2.0") + assert exc_info.type == IOError + assert ( + exc_info.value.args[0] + == "No local dictionary found with name: dictionary, version: 0.2.0" + ) diff --git a/tests/unit/test_repository.py b/tests/unit/test_repository.py deleted file mode 100644 index e4665ef..0000000 --- a/tests/unit/test_repository.py +++ /dev/null @@ -1,65 +0,0 @@ -import os -from pathlib import Path -from unittest import mock - -from psqlgml import repository - -REMOTE_GIT_URL = "https://github.com/NCI-GDC/gdcdictionary.git" - - -def test_init_with_home() -> None: - rm = repository.RepoMeta(remote_git_url=REMOTE_GIT_URL, name="smiths") - assert rm.git_dir == f"{Path.home()}/.gml/git/smiths" - - -def test_init_repo_meta(tmpdir) -> None: - - with mock.patch.dict(os.environ, {"GML_GIT_HOME": f"{tmpdir}/.gml/git"}): - rm = repository.RepoMeta(remote_git_url=REMOTE_GIT_URL, name="smiths") - assert rm.git_dir == f"{tmpdir}/.gml/git/smiths" - assert rm.is_cloned is False - - -def test_clone_repo() -> None: - rm = repository.RepoMeta(remote_git_url=REMOTE_GIT_URL, name="smiths") - r = repository.clone(rm) - assert rm.is_cloned - assert r.head() - - -def test_get_checkout_dir(tmpdir: Path) -> None: - with mock.patch.dict(os.environ, {"GML_DICTIONARY_HOME": f"{tmpdir}/dictionaries"}): - chk_dir = repository.get_checkout_dir(repo_name="smokes", commit_ref="sss") - assert chk_dir == f"{tmpdir}/dictionaries/smokes/sss" - - -def test_get_commit_id() -> None: - rm = repository.RepoMeta(remote_git_url=REMOTE_GIT_URL, name="smiths") - repo = repository.clone(rm) - - commands = { - b"f7ba557228bc113c92387c4eb6160621d27b53ef": repository.RepoCheckout( - repo=rm, path="", commit="2.4.0" - ), - b"1595aef2484ab6fa6c945950b296c4031c2606fd": repository.RepoCheckout( - repo=rm, path="", commit="2.3.0" - ), - b"7107e8116ce6ed8185626570dcba14b46e8e4d27": repository.RepoCheckout( - repo=rm, path="", commit="release/avery", is_tag=False - ), - } - for sha, command in commands.items(): - assert sha == repository.get_commit_id(repo, command.ref) - - -def test_checkout() -> None: - repo = repository.RepoMeta(remote_git_url=REMOTE_GIT_URL, name="smiths") - command = repository.RepoCheckout( - repo=repo, path="gdcdictionary/schemas", commit="2.3.0", override=True - ) - - chk_dir = Path(repository.checkout(command)) - assert chk_dir.exists() - - entries = [f.name for f in chk_dir.iterdir()] - assert "program.yaml" in entries diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index f01efd7..6b8136e 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -4,11 +4,12 @@ import pytest import yaml -from psqlgml import dictionary, schema +from psqlgml import schema +from psqlgml.dictionaries import schemas from tests.helpers import SchemaInfo -def test_generate(local_dictionary: dictionary.Dictionary, tmpdir: Path) -> None: +def test_generate(local_dictionary: schemas.Dictionary, tmpdir: Path) -> None: output_location = f"{tmpdir}" schema.generate( output_location=output_location, diff --git a/tests/unit/test_validators.py b/tests/unit/test_validators.py index 576aa67..a74fd11 100644 --- a/tests/unit/test_validators.py +++ b/tests/unit/test_validators.py @@ -2,7 +2,8 @@ import pytest -from psqlgml import dictionary, types, typings, validators +from psqlgml import types, typings, validators +from psqlgml.dictionaries import schemas pytestmark = [pytest.mark.validation] @@ -14,7 +15,7 @@ def __call__(self, data_file: str) -> validators.ValidationRequest: @pytest.fixture() def validation_request( - data_dir: str, local_dictionary: dictionary.Dictionary, test_schema: types.GmlSchema + data_dir: str, local_dictionary: schemas.Dictionary, test_schema: types.GmlSchema ) -> Callable[[str], validators.ValidationRequest]: def create_request(data_file: str) -> validators.ValidationRequest: return validators.ValidationRequest( diff --git a/tox.ini b/tox.ini index 1fa7a89..d45469f 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,7 @@ [tox] minversion = 3.4.0 envlist = py{39,38,37,36} +isolated_build = True skip_missing_interpreters = true [pytest] @@ -11,7 +12,8 @@ markers = dictionary: dictionary loading test validation: data validation tests testpaths = - tests + tests/unit + tests/integration [flake8] ignore = E203, E266, E501, W503, F403, F401