Skip to content

Commit

Permalink
chore(repository): Use builders to load proper dictionary (#11)
Browse files Browse the repository at this point in the history
Use builder pattern to load dictionary to allow support for more dictionary schema locations in the future
  • Loading branch information
kulgan authored Apr 3, 2022
1 parent c6a4d11 commit 0603d71
Show file tree
Hide file tree
Showing 23 changed files with 520 additions and 318 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
rev: v4.1.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: fix-encoding-pragma
args: [--remove]
- id: check-yaml
- repo: https://github.com/psf/black
rev: 21.8b0
rev: 22.3.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.9.3
rev: 5.10.1
hooks:
- id: isort
name: isort (python)
Expand All @@ -23,12 +23,12 @@ repos:
name: isort (pyi)
types: [ pyi ]
- repo: https://github.com/Yelp/detect-secrets
rev: v1.1.0
rev: v1.2.0
hooks:
- id: detect-secrets
args: [ '--baseline', '.secrets.baseline' ]
- repo: https://github.com/pycqa/flake8
rev: '3.9.2'
rev: 4.0.1
hooks:
- id: flake8
- repo: https://github.com/pre-commit/pygrep-hooks
Expand Down
18 changes: 9 additions & 9 deletions .secrets.baseline
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "1.1.0",
"version": "1.2.0",
"plugins_used": [
{
"name": "ArtifactoryDetector"
Expand Down Expand Up @@ -103,29 +103,29 @@
}
],
"results": {
"tests/unit/test_repository.py": [
"tests/integration/test_git_repository.py": [
{
"type": "Hex High Entropy String",
"filename": "tests/unit/test_repository.py",
"filename": "tests/integration/test_git_repository.py",
"hashed_secret": "d262efa598a19d120989a1e19864171abe8efcb1",
"is_verified": false,
"line_number": 41
"line_number": 19
},
{
"type": "Hex High Entropy String",
"filename": "tests/unit/test_repository.py",
"filename": "tests/integration/test_git_repository.py",
"hashed_secret": "902b84b9dcfc39d52a0c959c5a10487e675c4667",
"is_verified": false,
"line_number": 44
"line_number": 20
},
{
"type": "Hex High Entropy String",
"filename": "tests/unit/test_repository.py",
"filename": "tests/integration/test_git_repository.py",
"hashed_secret": "4a9e2768850629c1bc1a0f9fac1fba06d6f549c9",
"is_verified": false,
"line_number": 47
"line_number": 21
}
]
},
"generated_at": "2021-08-19T18:02:21Z"
"generated_at": "2022-04-03T02:16:12Z"
}
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ install_requires =
attrs
click
colored
dulwich
dulwich>=0.20.31
graphviz
Jinja2
jsonschema
Expand Down
9 changes: 8 additions & 1 deletion src/psqlgml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pkg_resources import get_distribution

from psqlgml.dictionary import Association, Dictionary, from_object, load, load_local
from psqlgml.dictionaries.readers import DictionaryReader, load, load_local
from psqlgml.dictionaries.schemas import Association, Dictionary, from_object
from psqlgml.resources import ResourceFile, load_by_resource, load_resource
from psqlgml.schema import generate
from psqlgml.schema import read as read_schema
Expand All @@ -11,7 +12,9 @@
GmlEdge,
GmlNode,
GmlSchema,
RenderFormat,
SystemAnnotation,
ValidatorType,
)
from psqlgml.validators import DataViolation, ValidationRequest, validate
from psqlgml.visualization import draw
Expand All @@ -22,12 +25,14 @@
"Association",
"DataViolation",
"Dictionary",
"DictionaryReader",
"DictionarySchema",
"DictionarySchemaDict",
"GmlData",
"GmlEdge",
"GmlSchema",
"ResourceFile",
"RenderFormat",
"SystemAnnotation",
"ValidationRequest",
"draw",
Expand All @@ -39,4 +44,6 @@
"from_object",
"read_schema",
"validate",
"ValidatorType",
"VERSION",
]
51 changes: 34 additions & 17 deletions src/psqlgml/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@
import click
import yaml

from psqlgml import VERSION
from psqlgml import dictionary as d
from psqlgml import schema, validators, visualization
from psqlgml.types import RenderFormat, ValidatorType
import psqlgml

__all__: List[str] = []

Expand All @@ -23,7 +20,7 @@ class LoggingConfig:


@click.group()
@click.version_option(VERSION)
@click.version_option(psqlgml.VERSION)
def app() -> None:
"""psqlgml script for generating, validating and viewing graph data"""
global logger
Expand Down Expand Up @@ -72,19 +69,35 @@ def app() -> None:
is_flag=True,
help="Force regeneration if already exists",
)
@click.option(
"-t",
"--tag/--no-tag",
type=bool,
default=True,
is_flag=True,
help="True if specified version is a tag, defaults to True",
)
@app.command(name="generate")
def schema_gen(
dictionary: str, output_dir: str, version: str, name: str, schema_path: str, force: bool
dictionary: str,
output_dir: str,
version: str,
name: str,
schema_path: str,
force: bool,
tag: bool,
) -> None:
"""Generate schema for specified dictionary"""
global logger
logger.debug(f"Generating psqlgml schema for {dictionary} Dictionary")

loaded_dictionary = d.load(
version=version, name=name, git_url=dictionary, schema_path=schema_path, overwrite=force
current_dictionary = (
psqlgml.DictionaryReader(name, version)
.git(url=dictionary, schema_path=schema_path, overwrite=force, is_tag=tag)
.read()
)
schema_file = schema.generate(
loaded_dictionary=loaded_dictionary,
schema_file = psqlgml.generate(
loaded_dictionary=current_dictionary,
output_location=output_dir,
)
logging.info(f"schema generation completed successfully: {schema_file}")
Expand Down Expand Up @@ -122,17 +135,17 @@ def validate_file(
data_file: str,
dictionary: str,
data_dir: str,
validator: ValidatorType,
validator: psqlgml.ValidatorType,
) -> None:
global logger
logger.debug(f"running {validator} validators for {data_dir}/{data_file}")

gml_schema = schema.read(dictionary, version)
loaded = d.load(name=dictionary, version=version)
request = validators.ValidationRequest(
gml_schema = psqlgml.read_schema(dictionary, version)
loaded = psqlgml.load(name=dictionary, version=version)
request = psqlgml.ValidationRequest(
data_file=data_file, data_dir=data_dir, schema=gml_schema, dictionary=loaded
)
validators.validate(
psqlgml.validate(
request=request,
validator=validator,
print_error=True,
Expand Down Expand Up @@ -161,10 +174,14 @@ def validate_file(
@click.option("-s", "--show/--no-show", is_flag=True, default=True)
@app.command(name="visualize", help="Visualize a resource file using graphviz")
def visualize_data(
output_dir: str, data_dir: str, data_file: str, output_format: RenderFormat, show: bool
output_dir: str,
data_dir: str,
data_file: str,
output_format: psqlgml.RenderFormat,
show: bool,
) -> None:

visualization.draw(data_dir, data_file, output_dir, output_format, show_rendered=show)
psqlgml.draw(data_dir, data_file, output_dir, output_format, show_rendered=show)


def configure_logger(cfg: LoggingConfig) -> None:
Expand Down
Empty file.
112 changes: 112 additions & 0 deletions src/psqlgml/dictionaries/readers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import logging
import os
from pathlib import Path
from typing import Optional, cast

from psqlgml.dictionaries import repository, schemas

__all__ = ["load", "load_local", "DictionaryReader"]

logger = logging.getLogger(__name__)


class DictionaryReader:
def __init__(self, name: str, version: str) -> None:
self.name = name
self.version = version

self._url: Optional[str] = None
self._is_tag: bool = True
self._overwrite: bool = False
self._schema_path: str = "gdcdictionary/schemas"
self._base_dir: Path = Path(
os.getenv("GML_DICTIONARY_HOME", f"{Path.home()}/.gml/dictionaries")
)

self.reader: Optional[repository.Repository] = None

def local(self, base_directory: Optional[Path] = None) -> "DictionaryReader":
logger.debug(f"Reading local Dictionary {self.name}: {self.version} @ {base_directory}")
self._base_dir = base_directory or self._base_dir
return self

def git(
self,
url: str,
overwrite: bool,
schema_path: str = "gdcdictionary/schemas",
is_tag: bool = True,
) -> "DictionaryReader":
logger.debug(f"Reading remote Dictionary {self.name}: {self.version} @ {url}")

self._url = url
self._is_tag = is_tag
self._overwrite = overwrite
self._schema_path = schema_path
return self

def is_preloaded_dictionary(self) -> bool:
"""Checks if a dictionary with name and version has been previously loaded"""
return Path(f"{self._base_dir}/{self.name}/{self.version}").exists()

def read(self) -> schemas.Dictionary:
if self.is_preloaded_dictionary() and not self._overwrite:
return repository.LocalRepository(name=self.name, base_directory=self._base_dir).read(
self.version
)
return repository.GitRepository(
name=self.name,
url=cast(str, self._url),
schema_path=self._schema_path,
force=self._overwrite,
is_tag=self._is_tag,
).read(self.version)


def load_local(
name: str, version: str, dictionary_location: Optional[str] = None
) -> schemas.Dictionary:
"""Attempts to load a previously downloaded dictionary from a local location
Args:
name: name/label used to save the dictionary locally
version: version number of the saved dictionary
dictionary_location: base directory where all dictionaries are dumped
Returns:
A Dictionary instance if dictionary files were previously downloaded, else None
"""
base_path = Path(dictionary_location) if dictionary_location else None
return DictionaryReader(name, version).local(base_path).read()


def load(
version: str,
overwrite: bool = False,
name: str = "gdcdictionary",
schema_path: str = "gdcdictionary/schemas",
git_url: str = "https://github.com/NCI-GDC/gdcdictionary.git",
is_tag: bool = True,
) -> schemas.Dictionary:
"""Downloads and loads a dictionary instance based on the input parameters
Args:
version: dictionary version number
overwrite: force a re-download of the dictionary files, defaults to false
name: name/label used to save the dictionary locally, defaults to gdcdictionary
schema_path: path to the dictionary files with the dictionary git repository
git_url: URL to the git repository
is_tag: tag or commit
Returns:
A Dictionary instance
"""

return (
DictionaryReader(name, version)
.git(
url=git_url,
is_tag=is_tag,
overwrite=overwrite,
schema_path=schema_path,
)
.read()
)
Loading

0 comments on commit 0603d71

Please sign in to comment.