From be3aa205e33bae9169853cd7c40f519b8a49a699 Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 24 Oct 2023 10:36:14 +0200 Subject: [PATCH 01/26] load ontologies --- kgforge/core/archetypes/model.py | 4 +- kgforge/specializations/models/demo_model.py | 6 +- .../specializations/models/rdf/collectors.py | 16 + .../models/rdf/pyshacl_shape_wrapper.py | 288 ++++++++++++++++++ ...vice.py => rdf_model_directory_service.py} | 60 +++- ..._service.py => rdf_model_store_service.py} | 116 ++++--- .../models/rdf/{service.py => rdf_service.py} | 155 +++------- kgforge/specializations/models/rdf_model.py | 38 +-- .../specializations/stores/bluebrain_nexus.py | 2 +- .../specializations/stores/nexus/service.py | 29 +- 10 files changed, 505 insertions(+), 209 deletions(-) create mode 100644 kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py rename kgforge/specializations/models/rdf/{directory_service.py => rdf_model_directory_service.py} (62%) rename kgforge/specializations/models/rdf/{store_service.py => rdf_model_store_service.py} (64%) rename kgforge/specializations/models/rdf/{service.py => rdf_service.py} (57%) diff --git a/kgforge/core/archetypes/model.py b/kgforge/core/archetypes/model.py index 461a5326..d2002d4e 100644 --- a/kgforge/core/archetypes/model.py +++ b/kgforge/core/archetypes/model.py @@ -193,7 +193,9 @@ def _initialize_service(self, source: str, **source_config) -> Any: @staticmethod @abstractmethod - def _service_from_directory(dirpath: Path, context_iri: Optional[str]) -> Any: + def _service_from_directory( + ontologies_path: Path, shapes_path: Path, context_iri: Optional[str] + ) -> Any: pass @staticmethod diff --git a/kgforge/specializations/models/demo_model.py b/kgforge/specializations/models/demo_model.py index c00e728f..cc873adf 100644 --- a/kgforge/specializations/models/demo_model.py +++ b/kgforge/specializations/models/demo_model.py @@ -15,7 +15,7 @@ import json import re from pathlib import Path -from typing import Callable, Dict, List, Optional, Tuple, Union +from typing import Callable, Dict, List, Optional, Tuple, Union, Type from kgforge.core import Resource from kgforge.core.archetypes import Mapping, Model @@ -71,11 +71,11 @@ def _mappings(self, source: str) -> Dict[str, List[str]]: raise ValueError("unrecognized source") return mappings - def mapping(self, entity: str, source: str, type: Callable) -> Mapping: + def mapping(self, entity: str, source: str, type: Type[Mapping]) -> Mapping: filename = f"{entity}.hjson" filepath = Path(self.source, "mappings", source, type.__name__, filename) if filepath.is_file(): - return type.load(filepath) + return type.load(filepath) # TODO should be str raise ValueError("unrecognized entity type or source") diff --git a/kgforge/specializations/models/rdf/collectors.py b/kgforge/specializations/models/rdf/collectors.py index 8e3bac1b..efedef71 100644 --- a/kgforge/specializations/models/rdf/collectors.py +++ b/kgforge/specializations/models/rdf/collectors.py @@ -488,3 +488,19 @@ def get_node_path(node: NodeProperties, path: URIRef, field: str): else: result.append(values) return result + + +ALL_COLLECTORS = [ + AndCollector, + OrCollector, + PropertyCollector, + NodeCollector, + PropertyCollector, + MinCountCollector, + DatatypeCollector, + InCollector, + ClassCollector, + NodeKindCollector, + XoneCollector, + HasValueCollector +] diff --git a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py new file mode 100644 index 00000000..bddfbba9 --- /dev/null +++ b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py @@ -0,0 +1,288 @@ +import pyshacl +from pyshacl import Shape, ShapesGraph +from rdflib import Graph, URIRef +from pyshacl.constraints import ALL_CONSTRAINT_PARAMETERS + + +from kgforge.specializations.models.rdf.collectors import ALL_COLLECTORS + +from time import perf_counter +from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Type, Union, Dict + +from rdflib import BNode, Literal, URIRef + +from pyshacl.consts import ( + SH_Info, + SH_resultSeverity, + SH_Warning, +) +from pyshacl.errors import ConstraintLoadError, ConstraintLoadWarning, ReportableRuntimeError, \ + ShapeLoadError + +from pyshacl.pytypes import GraphLike + +if TYPE_CHECKING: + from pyshacl.constraints import ConstraintComponent + from pyshacl.shapes_graph import ShapesGraph + + +ALL_COLLECTORS_MAP = {c.constraint(): c for c in ALL_COLLECTORS} + + +class ShapeWrapper(Shape): + __slots__ = ('__dict__',) + + def __init__(self, shape: Shape) -> None: + super().__init__(shape.sg, shape.node, shape._p, shape._path, shape.logger) + + def parameters(self): + return ( + p for p, v in self.sg.predicate_objects(self.node) + if p in ALL_CONSTRAINT_PARAMETERS + ) + + def traverse(self, predecessors: Set[URIRef]) -> Tuple[List, Dict]: + """ traverses the Shape SHACL properties to collect constrained properties + + This function is injected to pyshacl Shape object in order to traverse the Shacl graph. + It will call a specific collector depending on the SHACL property present in the NodeShape + + Args: + predecessors: list of nodes that have being traversed, used to break circular + recursion + + Returns: + properties, attributes: Tuple(list,dict), the collected properties and attributes + respectively gathered from the collectors + """ + + parameters = self.parameters() + properties = list() + attributes = dict() + done_collectors = set() + for param in iter(parameters): + if param in ALL_COLLECTORS_MAP: + constraint_collector = ALL_COLLECTORS_MAP[param] + if constraint_collector not in done_collectors: + c = constraint_collector(self) + predecessors.add(self.node) + props, attrs = c.collect(predecessors) + if attrs: + attributes.update(attrs) + if props: + properties.extend(props) + done_collectors.add(constraint_collector) + if predecessors: + predecessors.remove(self.node) + else: + # FIXME: there are some SHACL constrains that are not implemented + # raise IndexError(f"{param} not implemented!") + pass + + return properties, attributes + + def validate( + self, + target_graph: GraphLike, + focus: Optional[ + Union[ + Tuple[Union[URIRef, BNode]], + List[Union[URIRef, BNode]], + Set[Union[URIRef, BNode]], + Union[URIRef, BNode], + ] + ] = None, + abort_on_first: Optional[bool] = False, + allow_infos: Optional[bool] = False, + allow_warnings: Optional[bool] = False, + _evaluation_path: Optional[List] = None, + ): + if self.deactivated: + if self.sg.debug: + self.logger.debug(f"Skipping shape because it is deactivated: {str(self)}") + return True, [] + if focus is not None: + lh_shape = False + rh_shape = True + self.logger.debug(f"Running evaluation of Shape {str(self)}") + if not isinstance(focus, (tuple, list, set)): + focus = [focus] + self.logger.debug(f"Shape was passed {len(focus)} Focus Node/s to evaluate.") + if len(focus) < 1: + return True, [] + else: + lh_shape = True + rh_shape = False + self.logger.debug(f"Checking if Shape {str(self)} defines its own targets.") + self.logger.debug("Identifying targets to find focus nodes.") + focus = self.focus_nodes(target_graph) + self.logger.debug(f"Found {len(focus)} Focus Nodes to evaluate.") + if len(focus) < 1: + # It's possible for shapes to have _no_ focus nodes + # (they are called in other ways) + if self.sg.debug: + self.logger.debug( + f"Skipping shape {str(self)} because it found no focus nodes.") + return True, [] + else: + self.logger.debug(f"Running evaluation of Shape {str(self)}") + if _evaluation_path is None: + _evaluation_path = [] + print(len(_evaluation_path)) + # elif len(_evaluation_path) >= 30: + # # 27 is the depth required to successfully do the meta-shacl test on shacl.ttl + # path_str = " -> ".join((str(e) for e in _evaluation_path)) + # raise ReportableRuntimeError("Evaluation path too deep!\n{}".format(path_str)) + t1 = perf_counter() + # Lazy import here to avoid an import loop + CONSTRAINT_PARAMETERS, PARAMETER_MAP = getattr( + pyshacl.module, 'CONSTRAINT_PARAMS', (None, None)) + if not CONSTRAINT_PARAMETERS or not PARAMETER_MAP: + from pyshacl.constraints import ALL_CONSTRAINT_PARAMETERS, CONSTRAINT_PARAMETERS_MAP + + setattr(pyshacl.shape, 'CONSTRAINT_PARAMS', + (ALL_CONSTRAINT_PARAMETERS, CONSTRAINT_PARAMETERS_MAP)) + + CONSTRAINT_PARAMETERS = ALL_CONSTRAINT_PARAMETERS + PARAMETER_MAP = CONSTRAINT_PARAMETERS_MAP + if self.sg.js_enabled or self._advanced: + search_parameters = CONSTRAINT_PARAMETERS.copy() + constraint_map = PARAMETER_MAP.copy() + if self._advanced: + from pyshacl.constraints.advanced import ExpressionConstraint, SH_expression + + search_parameters.append(SH_expression) + constraint_map[SH_expression] = ExpressionConstraint + if self.sg.js_enabled: + from pyshacl.extras.js.constraint import JSConstraint, SH_js + + search_parameters.append(SH_js) + constraint_map[SH_js] = JSConstraint + else: + search_parameters = CONSTRAINT_PARAMETERS + constraint_map = PARAMETER_MAP + parameters = (p for p, v in self.sg.predicate_objects(self.node) if p in search_parameters) + reports = [] + focus_value_nodes = self.value_nodes(target_graph, focus) + filter_reports: bool = False + allow_conform: bool = False + allowed_severities: Set[URIRef] = set() + if allow_infos: + allowed_severities.add(SH_Info) + if allow_warnings: + allowed_severities.add(SH_Info) + allowed_severities.add(SH_Warning) + if allow_infos or allow_warnings: + if self.severity in allowed_severities: + allow_conform = True + else: + filter_reports = True + + non_conformant = False + done_constraints = set() + run_count = 0 + _evaluation_path.append(self) + if self.sg.debug: + path_str = " -> ".join((str(e) for e in _evaluation_path)) + self.logger.debug(f"Current shape evaluation path: {path_str}") + constraint_components = [constraint_map[p] for p in iter(parameters)] + constraint_component: Type['ConstraintComponent'] + for constraint_component in constraint_components: + if constraint_component in done_constraints: + continue + try: + # if self.sg.debug: + # self.logger.debug(f"Constructing Constraint Component: {repr(constraint_component)}") + c = constraint_component(self) + except ConstraintLoadWarning as w: + self.logger.warning(repr(w)) + continue + except ConstraintLoadError as e: + self.logger.error(repr(e)) + raise e + _e_p_copy = _evaluation_path[:] + _e_p_copy.append(c) + if self.sg.debug: + self.logger.debug(f"Checking conformance for constraint: {str(c)}") + ct1 = perf_counter() + if self.sg.debug: + path_str = " -> ".join((str(e) for e in _e_p_copy)) + self.logger.debug(f"Current constraint evaluation path: {path_str}") + _is_conform, _reports = c.evaluate(target_graph, focus_value_nodes, _e_p_copy) + ct2 = perf_counter() + if self.sg.debug: + elapsed = ct2 - ct1 + self.logger.debug( + f"Milliseconds to check constraint {str(c)}: {elapsed * 1000.0:.3f}ms") + if _is_conform: + self.logger.debug(f"DataGraph conforms to constraint {c}.") + elif allow_conform: + self.logger.debug( + f"Focus nodes do _not_ conform to constraint {c} but given severity is allowed.") + else: + self.logger.debug(f"Focus nodes do _not_ conform to constraint {c}.") + if lh_shape or (not rh_shape): + for v_str, v_node, v_parts in _reports: + self.logger.debug(v_str) + + if _is_conform or allow_conform: + ... + elif filter_reports: + all_allow = True + for v_str, v_node, v_parts in _reports: + severity_bits = list( + filter(lambda p: p[0] == v_node and p[1] == SH_resultSeverity, v_parts)) + if severity_bits: + all_allow = all_allow and (severity_bits[0][2] in allowed_severities) + non_conformant = non_conformant or (not all_allow) + else: + non_conformant = non_conformant or (not _is_conform) + reports.extend(_reports) + run_count += 1 + done_constraints.add(constraint_component) + if non_conformant and abort_on_first: + break + applicable_custom_constraints = self.find_custom_constraints() + for a in applicable_custom_constraints: + if non_conformant and abort_on_first: + break + _e_p_copy2 = _evaluation_path[:] + validator = a.make_validator_for_shape(self) + _e_p_copy2.append(validator) + _is_conform, _r = validator.evaluate(target_graph, focus_value_nodes, _e_p_copy2) + non_conformant = non_conformant or (not _is_conform) + reports.extend(_r) + run_count += 1 + t2 = perf_counter() + if self.sg.debug: + elapsed = t2 - t1 + self.logger.debug( + f"Milliseconds to evaluate shape {str(self)}: {elapsed * 1000.0:.3f}ms") + # print(_evaluation_path, "Passes" if not non_conformant else "Fails") + return (not non_conformant), reports + + + +class ShapesGraphWrapper(ShapesGraph): + + def __init__(self, graph: Graph) -> None: + super().__init__(graph) + # the following line triggers the shape loading -> see pyshacl.ShapesGraph + self._shapes = self.shapes + + def lookup_shape_from_node(self, node: URIRef) -> Optional[ShapeWrapper]: + """ Overwrite function to inject the transverse function for only to requested nodes. + + Args: + node (URIRef): The node to look up. + + Returns: + Shape: The Shacl shape of the requested node. + """ + shape: Shape = self._node_shape_cache[node] + if shape: + return ShapeWrapper(shape) + # if not hasattr(shape_wrapper, "traverse"): + # shape_wrapper.traverse = types.MethodType(traverse, shape_wrapper) + # return shape_wrapper + return None \ No newline at end of file diff --git a/kgforge/specializations/models/rdf/directory_service.py b/kgforge/specializations/models/rdf/rdf_model_directory_service.py similarity index 62% rename from kgforge/specializations/models/rdf/directory_service.py rename to kgforge/specializations/models/rdf/rdf_model_directory_service.py index 789c8e9a..8253e25b 100644 --- a/kgforge/specializations/models/rdf/directory_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_directory_service.py @@ -21,22 +21,23 @@ from kgforge.core.commons.context import Context from kgforge.specializations.models.rdf.node_properties import NodeProperties -from kgforge.specializations.models.rdf.service import RdfService, ShapesGraphWrapper +from kgforge.specializations.models.rdf.rdf_service import RdfService +from kgforge.specializations.models.rdf.pyshacl_shape_wrapper import ShapesGraphWrapper class DirectoryService(RdfService): - def __init__(self, dirpath: Path, context_iri: str) -> None: - self._graph = load_rdf_files(dirpath) - self._sg = ShapesGraphWrapper(self._graph) - super().__init__(self._graph, context_iri) + def __init__(self, ontologies_path: Path, shapes_path: Path, context_iri: str) -> None: + g = Graph() + g = load_rdf_files(ontologies_path, g) + g = load_rdf_files(shapes_path, g) - def schema_source_id(self, schema_iri: str) -> str: - # FIXME should return the file path where the schema is in - return schema_iri + self._graph = g + self._shapes_graph = ShapesGraphWrapper(self._graph) + super().__init__(self._graph, context_iri) def materialize(self, iri: URIRef) -> NodeProperties: - sh = self._sg.lookup_shape_from_node(iri) + sh = self._shapes_graph.lookup_shape_from_node(iri) predecessors = set() props, attrs = sh.traverse(predecessors) if props: @@ -61,7 +62,22 @@ def resolve_context(self, iri: str) -> Dict: def generate_context(self) -> Dict: return self._generate_context() - def _build_shapes_map(self) -> Dict: + def _build_ontology_map(self) -> Dict[str, URIRef]: + query = """ + PREFIX rdfs: + PREFIX sh: + SELECT ?id ?label WHERE { + ?id a owl:Class ; + rdfs:label ?label + } + """ # TODO CHANGE + res = self._graph.query(query) + return { + row["label"]: URIRef(row["id"]) + for row in res + } + + def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: query = """ PREFIX rdfs: PREFIX sh: @@ -73,18 +89,32 @@ def _build_shapes_map(self) -> Dict: ?shape a rdfs:Class } } - } ORDER BY ?type""" + } ORDER BY ?type + """ res = self._graph.query(query) - return {row["type"]: row["shape"] for row in res} + + class_being_shaped_id_to_shape_uri: Dict[str, URIRef] = { + row["type"]: URIRef(row["shape"]) + for row in res + } + + # FIXME should return the file path where the schema is in + schema_to_file = dict( + (e, "") # TODO file source + for e in class_being_shaped_id_to_shape_uri.values() + ) + + return schema_to_file, class_being_shaped_id_to_shape_uri -def load_rdf_files(path: Path) -> Graph: - memory_graph = Graph() +def load_rdf_files(path: Path, memory_graph: Graph) -> Graph: extensions = [".ttl", ".n3", ".json", ".rdf"] for f in path.rglob(os.path.join("*.*")): if f.suffix in extensions: file_format = guess_format(f.name) if file_format is None: file_format = "json-ld" - memory_graph.parse(f.as_posix(), format=file_format) + t = f.as_posix() + memory_graph.parse(t, format=file_format) + return memory_graph diff --git a/kgforge/specializations/models/rdf/store_service.py b/kgforge/specializations/models/rdf/rdf_model_store_service.py similarity index 64% rename from kgforge/specializations/models/rdf/store_service.py rename to kgforge/specializations/models/rdf/rdf_model_store_service.py index eb600b12..bd29c4eb 100644 --- a/kgforge/specializations/models/rdf/store_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_store_service.py @@ -19,14 +19,16 @@ from rdflib import URIRef, Namespace, Graph from kgforge.core.commons.exceptions import RetrievalError -from kgforge.core.conversions.rdf import as_jsonld, as_graph +from kgforge.core.conversions.rdf import as_jsonld from kgforge.core.archetypes import Store from kgforge.specializations.models.rdf.node_properties import NodeProperties -from kgforge.specializations.models.rdf.service import RdfService, ShapesGraphWrapper +from kgforge.specializations.models.rdf.pyshacl_shape_wrapper import ShapesGraphWrapper, \ + ShapeWrapper +from kgforge.specializations.models.rdf.rdf_service import RdfService from kgforge.specializations.stores.nexus import Service -class StoreService(RdfService): +class RdfModelStoreService(RdfService): def __init__(self, default_store: Store, context_iri: Optional[str] = None, context_store: Optional[Store] = None) -> None: @@ -34,21 +36,22 @@ def __init__(self, default_store: Store, context_iri: Optional[str] = None, self.default_store = default_store self.context_store = context_store or default_store # FIXME: define a store independent strategy - self.NXV = Namespace(self.default_store.service.namespace) if hasattr(self.default_store.service, "namespace") \ + self.NXV = Namespace(self.default_store.service.namespace) \ + if hasattr(self.default_store.service, "namespace") \ else Namespace(Service.NEXUS_NAMESPACE_FALLBACK) - self.store_metadata_iri = self.default_store.service.store_context if hasattr(self.default_store.service, "store_context") \ + + self.store_metadata_iri = self.default_store.service.store_context \ + if hasattr(self.default_store.service, "store_context") \ else Namespace(Service.NEXUS_CONTEXT_FALLBACK) - self._shapes_to_resources: Dict + self._imported = [] - self._graph = Graph() - self._sg = ShapesGraphWrapper(self._graph) - super().__init__(self._graph, context_iri) - def schema_source_id(self, schema_iri: str) -> str: - return self._shapes_to_resources[schema_iri] + g = Graph() + self._shapes_graph = ShapesGraphWrapper(g) + super().__init__(g, context_iri) def materialize(self, iri: URIRef) -> NodeProperties: - shape = self._type_shape(iri) + shape: ShapeWrapper = self._load_and_get_type_shape(iri) predecessors = set() props, attrs = shape.traverse(predecessors) if props: @@ -57,24 +60,22 @@ def materialize(self, iri: URIRef) -> NodeProperties: def _validate(self, iri: str, data_graph: Graph) -> Tuple[bool, Graph, str]: # _type_shape will make sure all the shapes for this type are in the graph - self._type_shape(iri) + self._load_and_get_type_shape(URIRef(iri)) return validate(data_graph, shacl_graph=self._graph) def resolve_context(self, iri: str) -> Dict: - if iri in self._context_cache: - return self._context_cache[iri] - document = self.recursive_resolve(iri) - self._context_cache.update({iri: document}) - return document + if iri not in self._context_cache: + self._context_cache[iri] = self.recursive_resolve(iri) + + return self._context_cache[iri] def generate_context(self) -> Dict: - for v in self._shapes_to_resources.values(): - self._load_shape(v) - # reloads the shapes graph - self._sg = ShapesGraphWrapper(self._graph) + for v in self.schema_to_source.values(): + self._load_shape_and_reload_shapes_graph(v) + return self._generate_context() - def _build_shapes_map(self) -> Dict: + def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: query = f""" PREFIX rdfs: PREFIX sh: @@ -89,22 +90,24 @@ def _build_shapes_map(self) -> Dict: }} }} }} ORDER BY ?type""" + # make sure to get all types limit = 100 offset = 0 count = limit - class_to_shapes = {} - shape_resource = {} + class_being_shaped_id_to_shape_uri = {} + schema_to_resource: Dict[URIRef, URIRef] = {} + while count == limit: resources = self.context_store.sparql(query, debug=False, limit=limit, offset=offset) for r in resources: shape_uri = URIRef(r.shape) - class_to_shapes[r.type] = shape_uri - shape_resource[shape_uri] = URIRef(r.resource_id) + class_being_shaped_id_to_shape_uri[r.type] = shape_uri + schema_to_resource[shape_uri] = URIRef(r.resource_id) count = len(resources) - offset += limit - self._shapes_to_resources = shape_resource - return class_to_shapes + offset += count + + return schema_to_resource, class_being_shaped_id_to_shape_uri def recursive_resolve(self, context: Union[Dict, List, str]) -> Dict: document = {} @@ -113,10 +116,12 @@ def recursive_resolve(self, context: Union[Dict, List, str]) -> Dict: context.remove(self.store_metadata_iri) if hasattr(self.default_store.service, "store_local_context") and\ self.default_store.service.store_local_context in context: + context.remove(self.default_store.service.store_local_context) for x in context: document.update(self.recursive_resolve(x)) elif isinstance(context, str): + try: local_only = not self.default_store == self.context_store doc = self.default_store.service.resolve_context(context, local_only=local_only) @@ -125,12 +130,13 @@ def recursive_resolve(self, context: Union[Dict, List, str]) -> Dict: doc = self.context_store.service.resolve_context(context, local_only=False) except ValueError as e: raise e + document.update(self.recursive_resolve(doc)) elif isinstance(context, dict): document.update(context) return document - def _load_shape(self, resource_id): + def _load_shape(self, resource_id: URIRef): if resource_id not in self._imported: try: shape = self.context_store.retrieve(resource_id, version=None, cross_bucket=False) @@ -139,8 +145,11 @@ def _load_shape(self, resource_id): # failed, don't try to load again self._imported.append(resource_id) else: - json_dict = as_jsonld(shape, form="compacted", store_metadata=False, model_context=None, - metadata_context=None, context_resolver=self.context_store.service.resolve_context) + json_dict = as_jsonld( + shape, form="compacted", store_metadata=False, model_context=None, + metadata_context=None, + context_resolver=self.context_store.service.resolve_context + ) # this double conversion was due blank nodes were not "regenerated" with json-ld temp_graph = Graph().parse(data=json.dumps(json_dict), format="json-ld") self._graph.parse(data=temp_graph.serialize(format="n3"), format="n3") @@ -149,12 +158,39 @@ def _load_shape(self, resource_id): for dependency in shape.imports: self._load_shape(self.context.expand(dependency)) - def _type_shape(self, iri: URIRef): + def _load_and_get_type_shape(self, iri: URIRef) -> ShapeWrapper: try: - shape = self._sg.lookup_shape_from_node(iri) + return self._shapes_graph.lookup_shape_from_node(iri) except KeyError: - self._load_shape(self._shapes_to_resources[iri]) - # reloads the shapes graph - self._sg = ShapesGraphWrapper(self._graph) - shape = self._sg.lookup_shape_from_node(iri) - return shape + shape_resource_id = self.schema_to_source[iri] + self._load_shape_and_reload_shapes_graph(shape_resource_id) + return self._shapes_graph.lookup_shape_from_node(iri) + + def _load_shape_and_reload_shapes_graph(self, iri: URIRef): + self._load_shape(iri) + # reloads the shapes graph + self._shapes_graph = ShapesGraphWrapper(self._graph) + + def _build_ontology_map(self): + query = """ + PREFIX rdfs: + PREFIX sh: + SELECT ?id ?label WHERE { + ?id a owl:Class ; + rdfs:label ?label + } + """ + # make sure to get all types + limit = 100 + offset = 0 + count = limit + class_resource: Dict[URIRef, URIRef] = dict() + + while count == limit: + resources = self.context_store.sparql(query, debug=False, limit=limit, offset=offset) + for r in resources: + class_resource[r.label] = URIRef(r.id) + count = len(resources) + offset += count + + return class_resource diff --git a/kgforge/specializations/models/rdf/service.py b/kgforge/specializations/models/rdf/rdf_service.py similarity index 57% rename from kgforge/specializations/models/rdf/service.py rename to kgforge/specializations/models/rdf/rdf_service.py index f9894745..2fdbef8c 100644 --- a/kgforge/specializations/models/rdf/service.py +++ b/kgforge/specializations/models/rdf/rdf_service.py @@ -14,7 +14,7 @@ import types from typing import List, Dict, Tuple, Set, Optional from abc import abstractmethod -from pyshacl.constraints import ALL_CONSTRAINT_PARAMETERS +from typing import List, Dict, Tuple, Set, Optional from pyshacl.shape import Shape from pyshacl.shapes_graph import ShapesGraph from rdflib import Graph, URIRef, RDF, XSD @@ -23,108 +23,10 @@ from kgforge.core.commons.context import Context from kgforge.core.commons.exceptions import ConfigurationError from kgforge.core.conversions.rdf import as_graph -from kgforge.specializations.models.rdf.collectors import (AndCollector, NodeCollector, - PropertyCollector, MinCountCollector, - DatatypeCollector, InCollector, - ClassCollector, NodeKindCollector, - OrCollector, XoneCollector, - HasValueCollector) + from kgforge.specializations.models.rdf.node_properties import NodeProperties from kgforge.specializations.models.rdf.utils import as_term -ALL_COLLECTORS = [ - AndCollector, - OrCollector, - PropertyCollector, - NodeCollector, - PropertyCollector, - MinCountCollector, - DatatypeCollector, - InCollector, - ClassCollector, - NodeKindCollector, - XoneCollector, - HasValueCollector -] -ALL_COLLECTORS_MAP = {c.constraint(): c for c in ALL_COLLECTORS} - - -def traverse(self, predecessors: Set[URIRef]) -> Tuple[List, Dict]: - """ traverses the Shape SACL properties to collect constrained properties - - This function is injected to pyshacl Shape object in order to traverse the Shacl graph. - It will call a specific collector depending on the SHACL property present in the NodeShape - - Args: - predecessors: list of nodes that have being traversed, used to break circular - recursion - - Returns: - properties, attributes: Tuple(list,dict), the collected properties and attributes - respectively gathered from the collectors - """ - - parameters = self.parameters() - properties = [] - attributes = {} - done_collectors = set() - for param in iter(parameters): - if param in ALL_COLLECTORS_MAP: - constraint_collector = ALL_COLLECTORS_MAP[param] - if constraint_collector not in done_collectors: - c = constraint_collector(self) - predecessors.add(self.node) - props, attrs = c.collect(predecessors) - if attrs: - attributes.update(attrs) - if props: - properties.extend(props) - done_collectors.add(constraint_collector) - if predecessors: - predecessors.remove(self.node) - else: - # FIXME: there are some SHACL constrains that are not implemented - # raise IndexError(f"{param} not implemented!") - pass - - return properties, attributes - - -class ShapeWrapper(Shape): - __slots__ = ('__dict__',) - - def __init__(self, shape: Shape) -> None: - super().__init__(shape.sg, shape.node, shape._p, shape._path, shape.logger) - - def parameters(self): - return (p for p, v in self.sg.predicate_objects(self.node) - if p in ALL_CONSTRAINT_PARAMETERS) - - -class ShapesGraphWrapper(ShapesGraph): - - def __init__(self, graph: Graph) -> None: - super().__init__(graph) - # the following line triggers the shape loading - self._shapes = self.shapes - - def lookup_shape_from_node(self, node: URIRef) -> Shape: - """ Overwrite function to inject the transverse function for only to requested nodes. - - Args: - node (URIRef): The node to look up. - - Returns: - Shape: The Shacl shape of the requested node. - """ - shape = self._node_shape_cache[node] - if shape: - shape_wrapper = ShapeWrapper(self._node_shape_cache[node]) - if not hasattr(shape_wrapper, "traverse"): - shape_wrapper.traverse = types.MethodType(traverse, shape_wrapper) - return shape_wrapper - return shape - class RdfService: @@ -134,14 +36,14 @@ def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: raise ConfigurationError("RdfModel requires a context") self._graph = graph self._context_cache = {} - self.classes_to_shapes = self._build_shapes_map() - resolved_context = self.resolve_context(context_iri) - self.context = Context(resolved_context, context_iri) - self.types_to_shapes: Dict = self._build_types_to_shapes() + self.schema_to_source, self.classes_to_shapes = self._build_shapes_map() + self.label_to_ontology_id: Dict[str, URIRef] = self._build_ontology_map() - def schema_source_id(self, schema_iri: str) -> str: - # POLICY Should return the id of the resource containing the schema - raise NotImplementedError() + self.context = Context(self.resolve_context(context_iri), context_iri) + self.types_to_shapes = self._build_types_to_shapes() + + def schema_source(self, schema_iri: str) -> str: + return self.schema_to_source[URIRef(schema_iri)] @abstractmethod def materialize(self, iri: URIRef) -> NodeProperties: @@ -156,18 +58,23 @@ def materialize(self, iri: URIRef) -> NodeProperties: raise NotImplementedError() def validate(self, resource: Resource, type_: str): - try: - if isinstance(resource.type, list) and type_ is None: - raise ValueError("Resource has list of types as attribute and type_ parameter is not specified. " - "Please provide a type_ parameter to validate against it.") - if type_ is None: - shape_iri = self.types_to_shapes[resource.type] - else: - shape_iri = self.types_to_shapes[type_] - except AttributeError: - raise TypeError("resource requires a type attribute") + + if "type" not in resource.__dict__: + raise TypeError("Resource requires a type attribute") + + if isinstance(resource.type, list) and type_ is None: + raise ValueError( + "Resource has list of types as attribute and type_ parameter is not specified. " + "Please provide a type_ parameter to validate against it." + ) + + shape_iri = self.types_to_shapes.get(resource.type if type_ is None else type_, None) + + if shape_iri is None: + raise ValueError(f"Unknown type {type_}") data_graph = as_graph(resource, False, self.context, None, None) + return self._validate(shape_iri, data_graph) @abstractmethod @@ -185,7 +92,7 @@ def generate_context(self) -> Dict: raise NotImplementedError() @abstractmethod - def _build_shapes_map(self) -> Dict: + def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: """Queries the source and returns a map of owl:Class to sh:NodeShape""" raise NotImplementedError() @@ -196,11 +103,13 @@ def _build_types_to_shapes(self): for k, v in self.classes_to_shapes.items(): term = self.context.find_term(str(k)) if term: - key = term.name if term.name not in types_to_shapes: types_to_shapes[term.name] = v else: - print("WARN: duplicated term", key, k, [key], v) + print("WARN: duplicated term", term.name, k, [term.name], v) + else: + print(f"WARN: missing term: {str(k)} in context") + return types_to_shapes def _generate_context(self) -> Dict: @@ -233,7 +142,7 @@ def traverse_properties(properties) -> Tuple[Dict, Dict]: term_obj.update({"@type": "@id"}) else: try: - px, ns, n = self.graph.compute_qname(obj_type) + px, ns, n = self._graph.compute_qname(obj_type) l_prefixes.update({px: str(ns)}) if str(ns) == str(XSD): term_obj.update({"@type": ":".join((px, n))}) @@ -270,3 +179,7 @@ def traverse_properties(properties) -> Tuple[Dict, Dict]: context.update({key: terms[key] for key in sorted(terms)}) return {"@context": context} if len(context) > 0 else None + + @abstractmethod + def _build_ontology_map(self): + pass diff --git a/kgforge/specializations/models/rdf_model.py b/kgforge/specializations/models/rdf_model.py index be8e81b1..11397f62 100644 --- a/kgforge/specializations/models/rdf_model.py +++ b/kgforge/specializations/models/rdf_model.py @@ -27,9 +27,9 @@ from kgforge.core.commons.exceptions import ValidationError from kgforge.core.commons.execution import run from kgforge.specializations.models.rdf.collectors import NodeProperties -from kgforge.specializations.models.rdf.directory_service import DirectoryService -from kgforge.specializations.models.rdf.service import RdfService -from kgforge.specializations.models.rdf.store_service import StoreService +from kgforge.specializations.models.rdf.rdf_model_directory_service import DirectoryService +from kgforge.specializations.models.rdf.rdf_service import RdfService +from kgforge.specializations.models.rdf.rdf_model_store_service import RdfModelStoreService from kgforge.specializations.models.rdf.utils import as_term DEFAULT_VALUE = { @@ -90,22 +90,20 @@ def _generate_context(self) -> Context: # Templates. def _template(self, type: str, only_required: bool) -> Dict: - try: - uri = self.service.types_to_shapes[type] - except KeyError: - raise ValueError("type '" + type + "' not found in " + self.source) + uri = self.get_shape_from_type(type) node_properties = self.service.materialize(uri) dictionary = parse_attributes(node_properties, only_required, None) return dictionary - # Validation. + def get_shape_from_type(self, type: str): + if type not in self.service.types_to_shapes: + raise ValueError(f"Type {type} not found") def schema_id(self, type: str) -> str: - try: - shape_iri = self.service.types_to_shapes[type] - return str(self.service.schema_source_id(shape_iri)) - except KeyError: - raise ValueError("type not found") + shape_iri = self.get_shape_from_type(type) + return str(self.service.schema_source(shape_iri)) + + # Validation. def validate(self, data: Union[Resource, List[Resource]], execute_actions_before: bool, type_: str) -> None: run(self._validate_one, self._validate_many, data, execute_actions=execute_actions_before, @@ -133,8 +131,12 @@ def _validate_one(self, resource: Resource, type_: str) -> None: # Utils. @staticmethod - def _service_from_directory(dirpath: Path, context_iri: str, **dir_config) -> RdfService: - return DirectoryService(dirpath, context_iri) + def _service_from_directory( + ontologies_path: Path, shapes_path: Path, context_iri: str, **dir_config + ) -> RdfService: + return DirectoryService( + ontologies_path=ontologies_path, shapes_path=shapes_path, context_iri=context_iri + ) @staticmethod def _service_from_store(store: Callable, context_config: Optional[Dict], **source_config) -> Any: @@ -156,11 +158,11 @@ def _service_from_store(store: Callable, context_config: Optional[Dict], **sourc source_config.pop("bucket", None) context_store: Store = store(context_endpoint, context_bucket, context_token, **source_config) # FIXME: define a store independent StoreService - service = StoreService(default_store, context_iri, context_store) + service = RdfModelStoreService(default_store, context_iri, context_store) else: - service = StoreService(default_store, context_iri, None) + service = RdfModelStoreService(default_store, context_iri, None) else: - service = StoreService(default_store) + service = RdfModelStoreService(default_store) return service diff --git a/kgforge/specializations/stores/bluebrain_nexus.py b/kgforge/specializations/stores/bluebrain_nexus.py index 95350ba7..273c7b61 100644 --- a/kgforge/specializations/stores/bluebrain_nexus.py +++ b/kgforge/specializations/stores/bluebrain_nexus.py @@ -1015,7 +1015,7 @@ def rewrite_uri(self, uri: str, context: Context, **kwargs) -> str: return uri -def _create_select_query(vars_, statements, distinct, search_in_graph): +def _create_select_query(vars_, statements, distinct: bool, search_in_graph: bool): where_clauses = ( f"{{ Graph ?g {{{statements}}}}}" if search_in_graph else f"{{{statements}}}" ) diff --git a/kgforge/specializations/stores/nexus/service.py b/kgforge/specializations/stores/nexus/service.py index e4699bef..93847f12 100644 --- a/kgforge/specializations/stores/nexus/service.py +++ b/kgforge/specializations/stores/nexus/service.py @@ -107,8 +107,8 @@ def __init__( self.namespace = namespace self.project_property = project_property self.store_metadata_keys = [ - "_constrainedBy", "_createdAt", "_createdBy", "_deprecated", "_incoming", "_outgoing", - "_project", "_rev", "_schemaProject", "_self", "_updatedAt", "_updatedBy" + "_constrainedBy", "_createdAt", "_createdBy", "_deprecated", "_incoming", + "_outgoing", "_project", "_rev", "_schemaProject", "_self", "_updatedAt", "_updatedBy" ] self.deprecated_property = deprecated_property @@ -249,11 +249,13 @@ def get_project_context(self) -> Dict: def resolve_context(self, iri: str, local_only: Optional[bool] = False) -> Dict: if iri in self.context_cache: return self.context_cache[iri] + + context_to_resolve = ( + self.store_local_context if iri == self.store_context else iri + ) + url = "/".join((self.url_resolver, "_", quote_plus(context_to_resolve))) + try: - context_to_resolve = ( - self.store_local_context if iri == self.store_context else iri - ) - url = "/".join((self.url_resolver, "_", quote_plus(context_to_resolve))) response = requests.get(url, headers=self.headers) response.raise_for_status() resource = response.json() @@ -272,13 +274,18 @@ def resolve_context(self, iri: str, local_only: Optional[bool] = False) -> Dict: if '_deprecated' in resource and resource['_deprecated']: raise ConfigurationError(f"Context {context_to_resolve} exists but was deprecated") document = json.loads(json.dumps(resource["@context"])) + if isinstance(document, list): if self.store_context in document: document.remove(self.store_context) if self.store_local_context in document: document.remove(self.store_local_context) - self.context_cache.update({context_to_resolve: document}) - return document + + self.context_cache[context_to_resolve] = document + + # TODO context_to_resolve may be different from iri. Why is having it in the cache + # already leading to different outcome? (see first 2 lines of function) + return self.context_cache[context_to_resolve] def batch_request( self, @@ -461,9 +468,9 @@ def _prepare_uri(self, resource, schema_uri=None) -> Tuple[str, Dict]: return url, params def sync_metadata(self, resource: Resource, result: Dict) -> None: + metadata = ( - {"id": resource.id} - if hasattr(resource, "id") + {"id": resource.id} if hasattr(resource, "id") else ( {"id": resource.__getattribute__("@id")} if hasattr(resource, "@id") @@ -474,8 +481,10 @@ def sync_metadata(self, resource: Resource, result: Dict) -> None: keys.extend(["_index", "_score", "id", "@id"]) only_meta = {k: v for k, v in result.items() if k in keys} metadata.update(_remove_ld_keys(only_meta, self.metadata_context, False)) + if not hasattr(resource, "id") and not hasattr(resource, "@id"): resource.id = result.get("id", result.get("@id", None)) + resource._store_metadata = wrap_dict(metadata) def synchronize_resource( From a4f2a3d1bc1076e0615a2c3859a9ba05c013a6bb Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 24 Oct 2023 12:00:02 +0200 Subject: [PATCH 02/26] source or ontology/shape path --- kgforge/core/archetypes/model.py | 17 ++++++++++++++--- kgforge/specializations/models/demo_model.py | 7 +++++-- .../models/rdf/pyshacl_shape_wrapper.py | 2 +- .../models/rdf/rdf_model_directory_service.py | 16 ++++++++++++---- kgforge/specializations/models/rdf_model.py | 19 ++++++++++++++----- 5 files changed, 46 insertions(+), 15 deletions(-) diff --git a/kgforge/core/archetypes/model.py b/kgforge/core/archetypes/model.py index d2002d4e..55373f74 100644 --- a/kgforge/core/archetypes/model.py +++ b/kgforge/core/archetypes/model.py @@ -180,9 +180,19 @@ def _initialize_service(self, source: str, **source_config) -> Any: origin = source_config.pop("origin") context_config = source_config.pop("context", {}) context_iri = context_config.get("iri", None) + if origin == "directory": - dirpath = Path(source) - return self._service_from_directory(dirpath, context_iri) + + ontology_path = Path(source_config["ontology_path"]) \ + if "ontology_path" in source_config else None + shapes_path = Path(source_config["shapes_path"]) \ + if "shapes_path" in source_config else None + source_path = Path(source) + + return self._service_from_directory( + source_path=source_path, ontologies_path=ontology_path, shapes_path=shapes_path, + context_iri=context_iri + ) if origin == "url": return self._service_from_url(source, context_iri) if origin == "store": @@ -194,7 +204,8 @@ def _initialize_service(self, source: str, **source_config) -> Any: @staticmethod @abstractmethod def _service_from_directory( - ontologies_path: Path, shapes_path: Path, context_iri: Optional[str] + source_path: Optional[Path], ontologies_path: Optional[Path], + shapes_path: Optional[Path], context_iri: Optional[str] ) -> Any: pass diff --git a/kgforge/specializations/models/demo_model.py b/kgforge/specializations/models/demo_model.py index cc873adf..b6256bf7 100644 --- a/kgforge/specializations/models/demo_model.py +++ b/kgforge/specializations/models/demo_model.py @@ -96,8 +96,11 @@ def _validate_one(self, resource: Resource, type_: str) -> None: # Utils. @staticmethod - def _service_from_directory(dirpath: Path, context_iri: str, **dir_config): - return ModelLibrary(dirpath) + def _service_from_directory( + source_path: Optional[Path], ontologies_path: Optional[Path], + shapes_path: Optional[Path], context_iri: Optional[str] + ): + return ModelLibrary(source_path) class ModelLibrary: diff --git a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py index bddfbba9..dada1b98 100644 --- a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py +++ b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py @@ -279,7 +279,7 @@ def lookup_shape_from_node(self, node: URIRef) -> Optional[ShapeWrapper]: Returns: Shape: The Shacl shape of the requested node. """ - shape: Shape = self._node_shape_cache[node] + shape: Shape = self._node_shape_cache.get(node, None) if shape: return ShapeWrapper(shape) # if not hasattr(shape_wrapper, "traverse"): diff --git a/kgforge/specializations/models/rdf/rdf_model_directory_service.py b/kgforge/specializations/models/rdf/rdf_model_directory_service.py index 8253e25b..4bf90d38 100644 --- a/kgforge/specializations/models/rdf/rdf_model_directory_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_directory_service.py @@ -13,7 +13,7 @@ # along with Blue Brain Nexus Forge. If not, see . import os from pathlib import Path -from typing import Dict, Tuple +from typing import Dict, Tuple, Optional from pyshacl import validate from rdflib import Graph, URIRef @@ -27,10 +27,18 @@ class DirectoryService(RdfService): - def __init__(self, ontologies_path: Path, shapes_path: Path, context_iri: str) -> None: + def __init__(self, source_path: Path, ontologies_path: Optional[Path], + shapes_path: Optional[Path], context_iri: str) -> None: + g = Graph() - g = load_rdf_files(ontologies_path, g) - g = load_rdf_files(shapes_path, g) + if ontologies_path is None and shapes_path is None: + if source_path is None: + raise Exception("Must specify source path") + else: + g = load_rdf_files(source_path, g) + else: + g = load_rdf_files(ontologies_path, g) + g = load_rdf_files(shapes_path, g) self._graph = g self._shapes_graph = ShapesGraphWrapper(self._graph) diff --git a/kgforge/specializations/models/rdf_model.py b/kgforge/specializations/models/rdf_model.py index 11397f62..9d90aaf6 100644 --- a/kgforge/specializations/models/rdf_model.py +++ b/kgforge/specializations/models/rdf_model.py @@ -105,7 +105,8 @@ def schema_id(self, type: str) -> str: # Validation. - def validate(self, data: Union[Resource, List[Resource]], execute_actions_before: bool, type_: str) -> None: + def validate(self, data: Union[Resource, List[Resource]], execute_actions_before: bool, + type_: str) -> None: run(self._validate_one, self._validate_many, data, execute_actions=execute_actions_before, exception=ValidationError, monitored_status="_validated", type_=type_) @@ -132,14 +133,20 @@ def _validate_one(self, resource: Resource, type_: str) -> None: @staticmethod def _service_from_directory( - ontologies_path: Path, shapes_path: Path, context_iri: str, **dir_config + source_path: Optional[Path], + ontologies_path: Optional[Path], + shapes_path: Optional[Path], + context_iri: str, + **dir_config ) -> RdfService: return DirectoryService( + source_path=source_path, ontologies_path=ontologies_path, shapes_path=shapes_path, context_iri=context_iri ) @staticmethod - def _service_from_store(store: Callable, context_config: Optional[Dict], **source_config) -> Any: + def _service_from_store(store: Callable, context_config: Optional[Dict], + **source_config) -> Any: endpoint = source_config.get("endpoint") token = source_config.get("token") bucket = source_config["bucket"] @@ -156,7 +163,8 @@ def _service_from_store(store: Callable, context_config: Optional[Dict], **sourc source_config.pop("endpoint", None) source_config.pop("token", None) source_config.pop("bucket", None) - context_store: Store = store(context_endpoint, context_bucket, context_token, **source_config) + context_store: Store = store(context_endpoint, context_bucket, context_token, + **source_config) # FIXME: define a store independent StoreService service = RdfModelStoreService(default_store, context_iri, context_store) else: @@ -186,7 +194,8 @@ def parse_attributes(node: NodeProperties, only_required: bool, return attributes -def parse_properties(items: List[NodeProperties], only_required: bool, inherited_constraint: str) -> Dict: +def parse_properties(items: List[NodeProperties], only_required: bool, + inherited_constraint: str) -> Dict: props = {} for item in items: props.update(parse_attributes(item, only_required, inherited_constraint)) From f7ddd2dd49259024d4b99f3c18894a836b1c60f8 Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 24 Oct 2023 15:22:39 +0200 Subject: [PATCH 03/26] rm shapes and ontology path separation --- kgforge/core/archetypes/model.py | 16 +---- kgforge/specializations/models/demo_model.py | 7 +-- .../models/rdf/pyshacl_shape_wrapper.py | 21 +------ .../models/rdf/rdf_model_directory_service.py | 18 +----- kgforge/specializations/models/rdf_model.py | 13 +---- .../specializations/stores/nexus/service.py | 58 +++++++++---------- 6 files changed, 39 insertions(+), 94 deletions(-) diff --git a/kgforge/core/archetypes/model.py b/kgforge/core/archetypes/model.py index 55373f74..69c50e62 100644 --- a/kgforge/core/archetypes/model.py +++ b/kgforge/core/archetypes/model.py @@ -183,16 +183,7 @@ def _initialize_service(self, source: str, **source_config) -> Any: if origin == "directory": - ontology_path = Path(source_config["ontology_path"]) \ - if "ontology_path" in source_config else None - shapes_path = Path(source_config["shapes_path"]) \ - if "shapes_path" in source_config else None - source_path = Path(source) - - return self._service_from_directory( - source_path=source_path, ontologies_path=ontology_path, shapes_path=shapes_path, - context_iri=context_iri - ) + return self._service_from_directory(dir_path=Path(source), context_iri=context_iri) if origin == "url": return self._service_from_url(source, context_iri) if origin == "store": @@ -203,10 +194,7 @@ def _initialize_service(self, source: str, **source_config) -> Any: @staticmethod @abstractmethod - def _service_from_directory( - source_path: Optional[Path], ontologies_path: Optional[Path], - shapes_path: Optional[Path], context_iri: Optional[str] - ) -> Any: + def _service_from_directory(dir_path: Path, context_iri: Optional[str]) -> Any: pass @staticmethod diff --git a/kgforge/specializations/models/demo_model.py b/kgforge/specializations/models/demo_model.py index b6256bf7..ac03d518 100644 --- a/kgforge/specializations/models/demo_model.py +++ b/kgforge/specializations/models/demo_model.py @@ -96,11 +96,8 @@ def _validate_one(self, resource: Resource, type_: str) -> None: # Utils. @staticmethod - def _service_from_directory( - source_path: Optional[Path], ontologies_path: Optional[Path], - shapes_path: Optional[Path], context_iri: Optional[str] - ): - return ModelLibrary(source_path) + def _service_from_directory(dir_path: Path, context_iri: Optional[str]): + return ModelLibrary(dir_path) class ModelLibrary: diff --git a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py index dada1b98..09fcf8f1 100644 --- a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py +++ b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py @@ -1,30 +1,11 @@ -import pyshacl from pyshacl import Shape, ShapesGraph from rdflib import Graph, URIRef from pyshacl.constraints import ALL_CONSTRAINT_PARAMETERS +from typing import List, Optional, Set, Tuple, Dict from kgforge.specializations.models.rdf.collectors import ALL_COLLECTORS -from time import perf_counter -from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Type, Union, Dict - -from rdflib import BNode, Literal, URIRef - -from pyshacl.consts import ( - SH_Info, - SH_resultSeverity, - SH_Warning, -) -from pyshacl.errors import ConstraintLoadError, ConstraintLoadWarning, ReportableRuntimeError, \ - ShapeLoadError - -from pyshacl.pytypes import GraphLike - -if TYPE_CHECKING: - from pyshacl.constraints import ConstraintComponent - from pyshacl.shapes_graph import ShapesGraph - ALL_COLLECTORS_MAP = {c.constraint(): c for c in ALL_COLLECTORS} diff --git a/kgforge/specializations/models/rdf/rdf_model_directory_service.py b/kgforge/specializations/models/rdf/rdf_model_directory_service.py index 4bf90d38..fa916a61 100644 --- a/kgforge/specializations/models/rdf/rdf_model_directory_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_directory_service.py @@ -27,20 +27,8 @@ class DirectoryService(RdfService): - def __init__(self, source_path: Path, ontologies_path: Optional[Path], - shapes_path: Optional[Path], context_iri: str) -> None: - - g = Graph() - if ontologies_path is None and shapes_path is None: - if source_path is None: - raise Exception("Must specify source path") - else: - g = load_rdf_files(source_path, g) - else: - g = load_rdf_files(ontologies_path, g) - g = load_rdf_files(shapes_path, g) - - self._graph = g + def __init__(self, dir_path: Path, context_iri: str) -> None: + self._graph = load_rdf_files_into_graph(dir_path, Graph()) self._shapes_graph = ShapesGraphWrapper(self._graph) super().__init__(self._graph, context_iri) @@ -115,7 +103,7 @@ def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: return schema_to_file, class_being_shaped_id_to_shape_uri -def load_rdf_files(path: Path, memory_graph: Graph) -> Graph: +def load_rdf_files_into_graph(path: Path, memory_graph: Graph) -> Graph: extensions = [".ttl", ".n3", ".json", ".rdf"] for f in path.rglob(os.path.join("*.*")): if f.suffix in extensions: diff --git a/kgforge/specializations/models/rdf_model.py b/kgforge/specializations/models/rdf_model.py index 9d90aaf6..a8573a2f 100644 --- a/kgforge/specializations/models/rdf_model.py +++ b/kgforge/specializations/models/rdf_model.py @@ -132,17 +132,8 @@ def _validate_one(self, resource: Resource, type_: str) -> None: # Utils. @staticmethod - def _service_from_directory( - source_path: Optional[Path], - ontologies_path: Optional[Path], - shapes_path: Optional[Path], - context_iri: str, - **dir_config - ) -> RdfService: - return DirectoryService( - source_path=source_path, - ontologies_path=ontologies_path, shapes_path=shapes_path, context_iri=context_iri - ) + def _service_from_directory(dir_path: Path, context_iri: str, **dir_config) -> RdfService: + return DirectoryService(dir_path=dir_path, context_iri=context_iri) @staticmethod def _service_from_store(store: Callable, context_config: Optional[Dict], diff --git a/kgforge/specializations/stores/nexus/service.py b/kgforge/specializations/stores/nexus/service.py index 93847f12..ba9b671b 100644 --- a/kgforge/specializations/stores/nexus/service.py +++ b/kgforge/specializations/stores/nexus/service.py @@ -247,44 +247,44 @@ def get_project_context(self) -> Dict: return context def resolve_context(self, iri: str, local_only: Optional[bool] = False) -> Dict: - if iri in self.context_cache: - return self.context_cache[iri] - context_to_resolve = ( self.store_local_context if iri == self.store_context else iri ) - url = "/".join((self.url_resolver, "_", quote_plus(context_to_resolve))) - try: - response = requests.get(url, headers=self.headers) - response.raise_for_status() - resource = response.json() - except Exception: - if not local_only: - try: - context = Context(context_to_resolve) - except URLError: - raise ValueError(f"{context_to_resolve} is not resolvable") + if context_to_resolve not in self.context_cache: - document = context.document["@context"] + url = "/".join((self.url_resolver, "_", quote_plus(context_to_resolve))) + + try: + response = requests.get(url, headers=self.headers) + response.raise_for_status() + resource = response.json() + except Exception as e: + if not local_only: + try: + context = Context(context_to_resolve) + except URLError: + raise ValueError(f"{context_to_resolve} is not resolvable") + + document = context.document["@context"] + else: + raise ValueError(f"{context_to_resolve} is not resolvable") else: - raise ValueError(f"{context_to_resolve} is not resolvable") - else: - # Make sure context is not deprecated - if '_deprecated' in resource and resource['_deprecated']: - raise ConfigurationError(f"Context {context_to_resolve} exists but was deprecated") - document = json.loads(json.dumps(resource["@context"])) + # Make sure context is not deprecated + if '_deprecated' in resource and resource['_deprecated']: + raise ConfigurationError( + f"Context {context_to_resolve} exists but was deprecated" + ) + document = json.loads(json.dumps(resource["@context"])) - if isinstance(document, list): - if self.store_context in document: - document.remove(self.store_context) - if self.store_local_context in document: - document.remove(self.store_local_context) + if isinstance(document, list): + if self.store_context in document: + document.remove(self.store_context) + if self.store_local_context in document: + document.remove(self.store_local_context) - self.context_cache[context_to_resolve] = document + self.context_cache[context_to_resolve] = document - # TODO context_to_resolve may be different from iri. Why is having it in the cache - # already leading to different outcome? (see first 2 lines of function) return self.context_cache[context_to_resolve] def batch_request( From 90196d207cfd69a92dda67a44d4540a87d5ef3f0 Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 24 Oct 2023 16:11:18 +0200 Subject: [PATCH 04/26] getter for store and model in forge --- kgforge/core/forge.py | 159 +++++++++--------- .../models/rdf/rdf_model_directory_service.py | 3 +- 2 files changed, 85 insertions(+), 77 deletions(-) diff --git a/kgforge/core/forge.py b/kgforge/core/forge.py index db28427c..c8950779 100644 --- a/kgforge/core/forge.py +++ b/kgforge/core/forge.py @@ -243,6 +243,14 @@ def __init__(self, configuration: Union[str, Dict], **kwargs) -> None: # Formatters. self._formatters: Optional[Dict[str, str]] = config.pop("Formatters", None) + def get_model(self) -> Model: + """Exposes the model.""" + return self._model + + def get_store(self) -> Store: + """Exposes the store.""" + return self._store + @catch def prefixes(self, pretty: bool = True) -> Optional[Dict[str, str]]: """ @@ -282,7 +290,7 @@ def types(self, pretty: bool = True) -> Optional[List[str]]: @catch def template( - self, type: str, only_required: bool = False, output: str = "hjson" + self, type: str, only_required: bool = False, output: str = "hjson" ) -> Optional[Dict]: """ Print the schema associated with a given resource type (must be listed in forge.types(...)) in hjson (output='hjson') or JSON (output='json') format. @@ -366,18 +374,18 @@ def resolvers(self, output: str = "print") -> Optional[Dict]: @catch def resolve( - self, - text: Union[str, List[str], Resource], - scope: Optional[str] = None, - resolver: Optional[str] = None, - target: Optional[str] = None, - type: Optional[str] = None, - strategy: Union[ResolvingStrategy, str] = ResolvingStrategy.BEST_MATCH, - resolving_context: Optional[Any] = None, - property_to_resolve: Optional[str] = None, - merge_inplace_as: Optional[str] = None, - limit: Optional[int] = 10, - threshold: Optional[float] = 0.5, + self, + text: Union[str, List[str], Resource], + scope: Optional[str] = None, + resolver: Optional[str] = None, + target: Optional[str] = None, + type: Optional[str] = None, + strategy: Union[ResolvingStrategy, str] = ResolvingStrategy.BEST_MATCH, + resolving_context: Optional[Any] = None, + property_to_resolve: Optional[str] = None, + merge_inplace_as: Optional[str] = None, + limit: Optional[int] = 10, + threshold: Optional[float] = 0.5, ) -> Optional[Union[Resource, List[Resource], Dict[str, List[Resource]]]]: """ Resolve text(s) or a resource into existing resources (from the configured Store) depending on the resolving strategy. @@ -462,7 +470,8 @@ def resolve( # Formatting User Interface. @catch - def format(self, what: str = None, *args, formatter: Union[Formatter, str] = Formatter.STR, uri: str = None, **kwargs) -> str: + def format(self, what: str = None, *args, formatter: Union[Formatter, str] = Formatter.STR, + uri: str = None, **kwargs) -> str: """ Select a configured formatter (see https://nexus-forge.readthedocs.io/en/latest/interaction.html#formatting) string (identified by 'what') and format it using provided '*args' :param what: a configured str format name. Required formatter:str = Formatter.STR @@ -520,7 +529,7 @@ def sources(self, pretty: bool = True) -> Optional[List[str]]: @catch def mappings( - self, source: str, pretty: bool = True + self, source: str, pretty: bool = True ) -> Optional[Dict[str, List[str]]]: """ Print(pretty=True) or return (pretty=False) configured mappings for a given source. @@ -534,7 +543,7 @@ def mappings( @catch def mapping( - self, entity: str, source: str, type: Callable = DictionaryMapping + self, entity: str, source: str, type: Callable = DictionaryMapping ) -> Mapping: """ Return a Mapping object of type 'type' for a resource type 'entity' and a source. @@ -548,11 +557,11 @@ def mapping( @catch def map( - self, - data: Any, - mapping: Union[Mapping, List[Mapping]], - mapper: Callable = DictionaryMapper, - na: Union[Any, List[Any]] = None, + self, + data: Any, + mapping: Union[Mapping, List[Mapping]], + mapper: Callable = DictionaryMapper, + na: Union[Any, List[Any]] = None, ) -> Union[Resource, List[Resource]]: """ Transform data to resources using transformations rules provided as mappings. The format of the data to transform @@ -570,10 +579,10 @@ def map( @catch def reshape( - self, - data: Union[Resource, List[Resource]], - keep: List[str], - versioned: bool = False, + self, + data: Union[Resource, List[Resource]], + keep: List[str], + versioned: bool = False, ) -> Union[Resource, List[Resource]]: """ Keep only a provided list of properties ('keep') from a resource of list of resources. @@ -591,11 +600,11 @@ def reshape( @catch def retrieve( - self, - id: str, - version: Optional[Union[int, str]] = None, - cross_bucket: bool = False, - **params + self, + id: str, + version: Optional[Union[int, str]] = None, + cross_bucket: bool = False, + **params ) -> Resource: """ Retrieve a resource by its identifier from the configured store and possibly at a given version. @@ -636,12 +645,12 @@ def search(self, *filters, **params) -> List[Resource]: @catch def sparql( - self, - query: str, - debug: bool = False, - limit: Optional[int] = None, - offset: Optional[int] = None, - **params + self, + query: str, + debug: bool = False, + limit: Optional[int] = None, + offset: Optional[int] = None, + **params ) -> List[Resource]: """ Search for resources using a SPARQL query. See SPARQL docs: https://www.w3.org/TR/sparql11-query. @@ -657,11 +666,11 @@ def sparql( @catch def elastic( - self, - query: str, - debug: bool = False, - limit: Optional[int] = None, - offset: Optional[int] = None, + self, + query: str, + debug: bool = False, + limit: Optional[int] = None, + offset: Optional[int] = None, ) -> List[Resource]: """ Search for resources using an ElasticSearch DSL query. See ElasticSearch DSL docs: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html. @@ -676,13 +685,13 @@ def elastic( @catch def download( - self, - data: Union[Resource, List[Resource]], - follow: str = "distribution.contentUrl", - path: str = ".", - overwrite: bool = False, - cross_bucket: bool = False, - content_type: str = None + self, + data: Union[Resource, List[Resource]], + follow: str = "distribution.contentUrl", + path: str = ".", + overwrite: bool = False, + cross_bucket: bool = False, + content_type: str = None ) -> None: """ Download files attached to a resource or a list of resources. @@ -701,7 +710,7 @@ def download( # No @catch because the error handling is done by execution.run(). def register( - self, data: Union[Resource, List[Resource]], schema_id: Optional[str] = None + self, data: Union[Resource, List[Resource]], schema_id: Optional[str] = None ) -> None: """ Store a resource or list of resources in the configured Store. @@ -714,7 +723,7 @@ def register( # No @catch because the error handling is done by execution.run(). def update( - self, data: Union[Resource, List[Resource]], schema_id: Optional[str] = None + self, data: Union[Resource, List[Resource]], schema_id: Optional[str] = None ) -> None: """ Update a resource or a list of resources in the configured Store. @@ -774,10 +783,10 @@ def attach(self, path: str, content_type: str = None) -> LazyAction: @catch def as_json( - self, - data: Union[Resource, List[Resource]], - expanded: bool = False, - store_metadata: bool = False, + self, + data: Union[Resource, List[Resource]], + expanded: bool = False, + store_metadata: bool = False, ) -> Union[Dict, List[Dict]]: """ Convert a resource or a list of resources to JSON. @@ -798,11 +807,11 @@ def as_json( @catch def as_jsonld( - self, - data: Union[Resource, List[Resource]], - form: str = Form.COMPACTED.value, - store_metadata: bool = False, - **params + self, + data: Union[Resource, List[Resource]], + form: str = Form.COMPACTED.value, + store_metadata: bool = False, + **params ) -> Union[Dict, List[Dict]]: """ Convert a resource or a list of resources to JSON-LD. @@ -825,7 +834,7 @@ def as_jsonld( @catch def as_graph( - self, data: Union[Resource, List[Resource]], store_metadata: bool = False + self, data: Union[Resource, List[Resource]], store_metadata: bool = False ) -> Graph: """ Convert a resource or a list of resources to a RDFLib Graph object: https://rdflib.readthedocs.io/en/stable/intro_to_graphs.html. @@ -844,12 +853,12 @@ def as_graph( @catch def as_dataframe( - self, - data: Union[Resource, List[Resource]], - na: Union[Any, List[Any]] = [None], - nesting: str = ".", - expanded: bool = False, - store_metadata: bool = False, + self, + data: Union[Resource, List[Resource]], + na: Union[Any, List[Any]] = [None], + nesting: str = ".", + expanded: bool = False, + store_metadata: bool = False, ) -> DataFrame: """ Convert a resource or a list of resources to pandas.DataFrame. @@ -874,7 +883,7 @@ def as_dataframe( @catch def from_json( - self, data: Union[Dict, List[Dict]], na: Union[Any, List[Any]] = None + self, data: Union[Dict, List[Dict]], na: Union[Any, List[Any]] = None ) -> Union[Resource, List[Resource]]: """ Convert a JSON document or a list of JSON documents to a resource or a list of resources. @@ -887,7 +896,7 @@ def from_json( @catch def from_jsonld( - self, data: Union[Dict, List[Dict]] + self, data: Union[Dict, List[Dict]] ) -> Union[Resource, List[Resource]]: """ Convert a JSON-LD document or a list of JSON-LD documents to a resource or a list of resources. @@ -899,11 +908,11 @@ def from_jsonld( @catch def from_graph( - self, - data: Graph, - type: Union[str, List[str]] = None, - frame: Dict = None, - use_model_context=False, + self, + data: Graph, + type: Union[str, List[str]] = None, + frame: Dict = None, + use_model_context=False, ) -> Union[Resource, List[Resource]]: """ Convert a RDFLib.Graph object to a resource or a list of resources. What to convert from the RDFLib.Graph can be @@ -920,7 +929,7 @@ def from_graph( @catch def from_dataframe( - self, data: DataFrame, na: Union[Any, List[Any]] = np.nan, nesting: str = "." + self, data: DataFrame, na: Union[Any, List[Any]] = np.nan, nesting: str = "." ) -> Union[Resource, List[Resource]]: """ Convert a pandas.DataFrame to a resource or a list of resources. @@ -942,7 +951,7 @@ def get_model_context(self): def prepare_resolvers( - config: Dict, store_config: Dict + config: Dict, store_config: Dict ) -> Dict[str, Dict[str, Resolver]]: return { scope: dict(prepare_resolver(x, store_config) for x in configs) diff --git a/kgforge/specializations/models/rdf/rdf_model_directory_service.py b/kgforge/specializations/models/rdf/rdf_model_directory_service.py index fa916a61..63bfde3a 100644 --- a/kgforge/specializations/models/rdf/rdf_model_directory_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_directory_service.py @@ -110,7 +110,6 @@ def load_rdf_files_into_graph(path: Path, memory_graph: Graph) -> Graph: file_format = guess_format(f.name) if file_format is None: file_format = "json-ld" - t = f.as_posix() - memory_graph.parse(t, format=file_format) + memory_graph.parse(f.as_posix(), format=file_format) return memory_graph From 1532fe0a14ca2db38ef441add1ede9d9cdcb3112 Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 24 Oct 2023 16:46:11 +0200 Subject: [PATCH 05/26] expose unimplemented method to tests --- kgforge/core/archetypes/model.py | 3 ++- .../{rdf_service.py => rdf_model_service.py} | 13 ++++++---- ...py => rdf_model_service_from_directory.py} | 4 +-- ...ice.py => rdf_model_service_from_store.py} | 4 +-- kgforge/specializations/models/rdf_model.py | 26 ++++++++++--------- .../specializations/models/test_rdf_model.py | 4 ++- 6 files changed, 31 insertions(+), 23 deletions(-) rename kgforge/specializations/models/rdf/{rdf_service.py => rdf_model_service.py} (95%) rename kgforge/specializations/models/rdf/{rdf_model_directory_service.py => rdf_model_service_from_directory.py} (96%) rename kgforge/specializations/models/rdf/{rdf_model_store_service.py => rdf_model_service_from_store.py} (98%) diff --git a/kgforge/core/archetypes/model.py b/kgforge/core/archetypes/model.py index 69c50e62..c5a8528d 100644 --- a/kgforge/core/archetypes/model.py +++ b/kgforge/core/archetypes/model.py @@ -19,6 +19,7 @@ import hjson from pandas import DataFrame +from rdflib import URIRef from kgforge.core import Resource from kgforge.core.archetypes import Mapping @@ -151,7 +152,7 @@ def mapping(self, entity: str, source: str, type: Callable) -> Mapping: # Validation. - def schema_id(self, type: str) -> str: + def schema_id(self, type: str) -> URIRef: # POLICY Should retrieve the schema id of the given type. not_supported() diff --git a/kgforge/specializations/models/rdf/rdf_service.py b/kgforge/specializations/models/rdf/rdf_model_service.py similarity index 95% rename from kgforge/specializations/models/rdf/rdf_service.py rename to kgforge/specializations/models/rdf/rdf_model_service.py index 2fdbef8c..549ba259 100644 --- a/kgforge/specializations/models/rdf/rdf_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service.py @@ -28,7 +28,10 @@ from kgforge.specializations.models.rdf.utils import as_term -class RdfService: +class RdfModelService: + + schema_to_source: Dict[URIRef, str] + classes_to_shapes: Dict[str, URIRef] def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: @@ -40,10 +43,10 @@ def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: self.label_to_ontology_id: Dict[str, URIRef] = self._build_ontology_map() self.context = Context(self.resolve_context(context_iri), context_iri) - self.types_to_shapes = self._build_types_to_shapes() + self.types_to_shapes: Dict[str, URIRef] = self._build_types_to_shapes() - def schema_source(self, schema_iri: str) -> str: - return self.schema_to_source[URIRef(schema_iri)] + def schema_source(self, schema_iri: URIRef) -> str: + return self.schema_to_source[schema_iri] @abstractmethod def materialize(self, iri: URIRef) -> NodeProperties: @@ -96,7 +99,7 @@ def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: """Queries the source and returns a map of owl:Class to sh:NodeShape""" raise NotImplementedError() - def _build_types_to_shapes(self): + def _build_types_to_shapes(self) -> Dict[str, URIRef]: """Iterates the classes_to_shapes dictionary to create a term to shape dictionary filtering the terms available in the context """ types_to_shapes: Dict = {} diff --git a/kgforge/specializations/models/rdf/rdf_model_directory_service.py b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py similarity index 96% rename from kgforge/specializations/models/rdf/rdf_model_directory_service.py rename to kgforge/specializations/models/rdf/rdf_model_service_from_directory.py index 63bfde3a..5d2f5c46 100644 --- a/kgforge/specializations/models/rdf/rdf_model_directory_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py @@ -21,11 +21,11 @@ from kgforge.core.commons.context import Context from kgforge.specializations.models.rdf.node_properties import NodeProperties -from kgforge.specializations.models.rdf.rdf_service import RdfService +from kgforge.specializations.models.rdf.rdf_model_service import RdfModelService from kgforge.specializations.models.rdf.pyshacl_shape_wrapper import ShapesGraphWrapper -class DirectoryService(RdfService): +class RdfModelServiceFromDirectory(RdfModelService): def __init__(self, dir_path: Path, context_iri: str) -> None: self._graph = load_rdf_files_into_graph(dir_path, Graph()) diff --git a/kgforge/specializations/models/rdf/rdf_model_store_service.py b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py similarity index 98% rename from kgforge/specializations/models/rdf/rdf_model_store_service.py rename to kgforge/specializations/models/rdf/rdf_model_service_from_store.py index bd29c4eb..60f343c3 100644 --- a/kgforge/specializations/models/rdf/rdf_model_store_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py @@ -24,11 +24,11 @@ from kgforge.specializations.models.rdf.node_properties import NodeProperties from kgforge.specializations.models.rdf.pyshacl_shape_wrapper import ShapesGraphWrapper, \ ShapeWrapper -from kgforge.specializations.models.rdf.rdf_service import RdfService +from kgforge.specializations.models.rdf.rdf_model_service import RdfModelService from kgforge.specializations.stores.nexus import Service -class RdfModelStoreService(RdfService): +class RdfModelServiceFromStore(RdfModelService): def __init__(self, default_store: Store, context_iri: Optional[str] = None, context_store: Optional[Store] = None) -> None: diff --git a/kgforge/specializations/models/rdf_model.py b/kgforge/specializations/models/rdf_model.py index a8573a2f..1cfb735c 100644 --- a/kgforge/specializations/models/rdf_model.py +++ b/kgforge/specializations/models/rdf_model.py @@ -27,9 +27,9 @@ from kgforge.core.commons.exceptions import ValidationError from kgforge.core.commons.execution import run from kgforge.specializations.models.rdf.collectors import NodeProperties -from kgforge.specializations.models.rdf.rdf_model_directory_service import DirectoryService -from kgforge.specializations.models.rdf.rdf_service import RdfService -from kgforge.specializations.models.rdf.rdf_model_store_service import RdfModelStoreService +from kgforge.specializations.models.rdf.rdf_model_service_from_directory import RdfModelServiceFromDirectory +from kgforge.specializations.models.rdf.rdf_model_service import RdfModelService +from kgforge.specializations.models.rdf.rdf_model_service_from_store import RdfModelServiceFromStore from kgforge.specializations.models.rdf.utils import as_term DEFAULT_VALUE = { @@ -95,13 +95,15 @@ def _template(self, type: str, only_required: bool) -> Dict: dictionary = parse_attributes(node_properties, only_required, None) return dictionary - def get_shape_from_type(self, type: str): + def get_shape_from_type(self, type: str) -> URIRef: if type not in self.service.types_to_shapes: raise ValueError(f"Type {type} not found") + return self.service.types_to_shapes[type] - def schema_id(self, type: str) -> str: - shape_iri = self.get_shape_from_type(type) - return str(self.service.schema_source(shape_iri)) + def schema_id(self, type: str) -> URIRef: + shape_iri: URIRef = self.get_shape_from_type(type) + e = self.service.schema_source(shape_iri) + return e # Validation. @@ -132,8 +134,8 @@ def _validate_one(self, resource: Resource, type_: str) -> None: # Utils. @staticmethod - def _service_from_directory(dir_path: Path, context_iri: str, **dir_config) -> RdfService: - return DirectoryService(dir_path=dir_path, context_iri=context_iri) + def _service_from_directory(dir_path: Path, context_iri: str, **dir_config) -> RdfModelService: + return RdfModelServiceFromDirectory(dir_path=dir_path, context_iri=context_iri) @staticmethod def _service_from_store(store: Callable, context_config: Optional[Dict], @@ -157,11 +159,11 @@ def _service_from_store(store: Callable, context_config: Optional[Dict], context_store: Store = store(context_endpoint, context_bucket, context_token, **source_config) # FIXME: define a store independent StoreService - service = RdfModelStoreService(default_store, context_iri, context_store) + service = RdfModelServiceFromStore(default_store, context_iri, context_store) else: - service = RdfModelStoreService(default_store, context_iri, None) + service = RdfModelServiceFromStore(default_store, context_iri, None) else: - service = RdfModelStoreService(default_store) + service = RdfModelServiceFromStore(default_store) return service diff --git a/tests/specializations/models/test_rdf_model.py b/tests/specializations/models/test_rdf_model.py index c3895c26..f7dedffc 100644 --- a/tests/specializations/models/test_rdf_model.py +++ b/tests/specializations/models/test_rdf_model.py @@ -13,6 +13,7 @@ # along with Blue Brain Nexus Forge. If not, see . import json import pytest +from rdflib import URIRef from kgforge.core import Resource from kgforge.core.commons.exceptions import ValidationError @@ -97,7 +98,8 @@ def valid_activity_resource(self, activity_json): @pytest.mark.parametrize("type_,", TYPES_SCHEMAS_MAP.keys()) def test_type_to_schema(self, rdf_model: RdfModel, type_): # FIXME TYPES_SCHEMAS_MAP should be a type to file dictionary - assert rdf_model.schema_id(type_) == TYPES_SCHEMAS_MAP[type_] + # see _build_shapes_map from RdfModelServiceFromDirectory + assert rdf_model.schema_id(type_) == URIRef(TYPES_SCHEMAS_MAP[type_]) def test_validate_one(self, rdf_model: RdfModel, valid_activity_resource): rdf_model.validate(valid_activity_resource, False, type_="Activity") From ff4dee1ce81632d595faf5eb1fc7819ca52a7594 Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 24 Oct 2023 17:08:41 +0200 Subject: [PATCH 06/26] sparql query rewriter out of store --- kgforge/core/archetypes/store.py | 152 ++--------------- kgforge/core/commons/sparql_query_rewriter.py | 161 ++++++++++++++++++ tests/core/archetypes/test_store.py | 2 +- 3 files changed, 179 insertions(+), 136 deletions(-) create mode 100644 kgforge/core/commons/sparql_query_rewriter.py diff --git a/kgforge/core/archetypes/store.py b/kgforge/core/archetypes/store.py index b65435d5..06e97ba4 100644 --- a/kgforge/core/archetypes/store.py +++ b/kgforge/core/archetypes/store.py @@ -34,62 +34,15 @@ QueryingError, ) from kgforge.core.commons.execution import not_supported, run +from kgforge.core.commons.sparql_query_rewriter import handle_query from kgforge.core.reshaping import collect_values # NB: Do not 'from kgforge.core.archetypes import Resolver' to avoid cyclic dependency. -# FIXME: need to find a comprehensive way (different than list) to get all SPARQL reserved clauses from kgforge.core.wrappings.dict import DictWrapper DEFAULT_LIMIT = 100 DEFAULT_OFFSET = 0 -SPARQL_CLAUSES = [ - "where", - "filter", - "select", - "union", - "limit", - "construct", - "optional", - "bind", - "values", - "offset", - "order by", - "prefix", - "graph", - "distinct", - "in", - "as", - "base", - "prefix", - "reduced", - "describe", - "ask", - "named", - "asc", - "desc", - "from", - "optional", - "graph", - "regex", - "union", - "str", - "lang", - "langmatches", - "datatype", - "bound", - "sameTerm", - "isIRI", - "isURI", - "isBLANK", - "isLITERAL", - "group", - "by", - "order", - "minus", - "not", - "exists" -] class Store(ABC): @@ -426,21 +379,28 @@ def search( not_supported() def sparql( - self, query: str, debug: bool, limit: int = DEFAULT_LIMIT, offset: int = DEFAULT_OFFSET, + self, query: str, + debug: bool, + limit: int = DEFAULT_LIMIT, + offset: int = DEFAULT_OFFSET, **params ) -> List[Resource]: rewrite = params.get("rewrite", True) - qr = ( - rewrite_sparql(query, self.model_context, self.service.metadata_context) - if self.model_context is not None and rewrite - else query + + qr = handle_query( + query=query, + model_context=self.model_context, + metadata_context=self.service.metadata_context, + rewrite=rewrite, + limit=limit, + offset=offset, + default_limit=DEFAULT_LIMIT, + default_offset=DEFAULT_OFFSET ) - if limit: - qr = _replace_in_sparql(qr, "LIMIT", limit, DEFAULT_LIMIT, r" LIMIT \d+") - if offset: - qr = _replace_in_sparql(qr, "OFFSET", offset, DEFAULT_OFFSET, r" OFFSET \d+") + if debug: self._debug_query(qr) + return self._sparql(qr) def _sparql(self, query: str) -> List[Resource]: @@ -527,81 +487,3 @@ def rewrite_uri(self, uri: str, context: Context, **kwargs) -> str: :return: str """ pass - - -def _replace_in_sparql(qr, what, value, default_value, search_regex, replace_if_in_query=True): - is_what_in_query = bool(re.search(f"{search_regex}", qr, flags=re.IGNORECASE)) - if is_what_in_query and value and not replace_if_in_query: - raise QueryingError( - f"Value for '{what}' is present in the provided query and set as argument: set 'replace_if_in_query' to True to replace '{what}' when present in the query.") - replace_value = f" {what} {value}" if value else ( - f" {what} {default_value}" if default_value else None) - if is_what_in_query and replace_if_in_query and replace_value: - qr = re.sub(f"{search_regex}", replace_value, qr, flags=re.IGNORECASE) - if not is_what_in_query and replace_value: - qr = f"{qr} {replace_value}" - return qr - - -def rewrite_sparql(query: str, context: Context, metadata_context) -> str: - """Rewrite local property and type names from Model.template() as IRIs. - - Local names are mapped to IRIs by using a JSON-LD context, i.e. { "@context": { ... }} from a kgforge.core.commons.Context. - In the case of contexts using prefixed names, prefixes are added to the SPARQL query prologue. - In the case of non available contexts and vocab then the query is returned unchanged. - """ - ctx = {} - if metadata_context and metadata_context.document: - ctx.update({ - k: v["@id"] if isinstance(v, Dict) and "@id" in v else v - for k, v in metadata_context.document["@context"].items() - }) - ctx.update({ - k: v["@id"] if isinstance(v, Dict) and "@id" in v else v - for k, v in context.document["@context"].items() - }) - prefixes = context.prefixes - has_prefixes = prefixes is not None and len(prefixes.keys()) > 0 - if ctx.get("type") == "@type": - if "rdf" in prefixes: - ctx["type"] = "rdf:type" - else: - ctx["type"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" - - def replace(match: Match) -> str: - m4 = match.group(4) - if m4 is None: - return match.group(0) - - v = ( - ctx.get(m4, ":" + m4 if context.has_vocab() else None) - if str(m4).lower() not in SPARQL_CLAUSES and not str(m4).startswith("https") - else m4 - ) - if v is None: - raise QueryingError( - f"Failed to construct a valid SPARQL query: add '{m4}'" - f", define an @vocab in the configured JSON-LD context or provide a fully correct SPARQL query." - ) - m5 = match.group(5) - if "//" in v: - return f"<{v}>{m5}" - - return f"{v}{m5}" - - g4 = r"([a-zA-Z_]+)" - g5 = r"([.;]?)" - g0 = rf"((?<=[\s,[(/|!^])((a|true|false)|{g4}){g5}(?=[\s,\])/|?*+]))" - g6 = r"(('[^']+')|('''[^\n\r]+''')|(\"[^\"]+\")|(\"\"\"[^\n\r]+\"\"\"))" - rx = rf"{g0}|{g6}|(?<=< )(.*)(?= >)" - qr = re.sub(rx, replace, query, flags=re.VERBOSE | re.MULTILINE) - - if not has_prefixes or "prefix" in str(qr).lower(): - return qr - - pfx = "\n".join(f"PREFIX {k}: <{v}>" for k, v in prefixes.items()) - - if context.has_vocab(): - pfx = "\n".join([pfx, f"PREFIX : <{context.vocab}>"]) - - return f"{pfx}\n{qr}" diff --git a/kgforge/core/commons/sparql_query_rewriter.py b/kgforge/core/commons/sparql_query_rewriter.py new file mode 100644 index 00000000..cd9c8c35 --- /dev/null +++ b/kgforge/core/commons/sparql_query_rewriter.py @@ -0,0 +1,161 @@ +import re +from typing import Any, Dict, List, Match, Optional, Tuple, Union, Type + +from kgforge.core.commons.context import Context +from kgforge.core.commons.exceptions import QueryingError + + +# FIXME: need to find a comprehensive way (different than list) to get all SPARQL reserved clauses +SPARQL_CLAUSES = [ + "where", + "filter", + "select", + "union", + "limit", + "construct", + "optional", + "bind", + "values", + "offset", + "order by", + "prefix", + "graph", + "distinct", + "in", + "as", + "base", + "prefix", + "reduced", + "describe", + "ask", + "named", + "asc", + "desc", + "from", + "optional", + "graph", + "regex", + "union", + "str", + "lang", + "langmatches", + "datatype", + "bound", + "sameTerm", + "isIRI", + "isURI", + "isBLANK", + "isLITERAL", + "group", + "by", + "order", + "minus", + "not", + "exists" +] + + +def rewrite_sparql(query: str, context: Context, metadata_context: Context) -> str: + """Rewrite local property and type names from Model.template() as IRIs. + + Local names are mapped to IRIs by using a JSON-LD context, i.e. { "@context": { ... }} + from a kgforge.core.commons.Context. + In the case of contexts using prefixed names, prefixes are added to the SPARQL query prologue. + In the case of non-available contexts and vocab then the query is returned unchanged. + """ + ctx = {} + if metadata_context and metadata_context.document: + ctx.update({ + k: v["@id"] if isinstance(v, Dict) and "@id" in v else v + for k, v in metadata_context.document["@context"].items() + }) + ctx.update({ + k: v["@id"] if isinstance(v, Dict) and "@id" in v else v + for k, v in context.document["@context"].items() + }) + prefixes = context.prefixes + has_prefixes = prefixes is not None and len(prefixes.keys()) > 0 + if ctx.get("type") == "@type": + if "rdf" in prefixes: + ctx["type"] = "rdf:type" + else: + ctx["type"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + + def replace(match: Match) -> str: + m4 = match.group(4) + if m4 is None: + return match.group(0) + else: + v = ( + ctx.get(m4, ":" + m4 if context.has_vocab() else None) + if str(m4).lower() not in SPARQL_CLAUSES + and not str(m4).startswith("https") + else m4 + ) + if v is None: + raise QueryingError( + f"Failed to construct a valid SPARQL query: add '{m4}'" + f", define an @vocab in the configured JSON-LD context or " + f"provide a fully correct SPARQL query." + ) + m5 = match.group(5) + if "//" in v: + return f"<{v}>{m5}" + else: + return f"{v}{m5}" + + g4 = r"([a-zA-Z_]+)" + g5 = r"([.;]?)" + g0 = rf"((?<=[\s,[(/|!^])((a|true|false)|{g4}){g5}(?=[\s,\])/|?*+]))" + g6 = r"(('[^']+')|('''[^\n\r]+''')|(\"[^\"]+\")|(\"\"\"[^\n\r]+\"\"\"))" + rx = rf"{g0}|{g6}|(?<=< )(.*)(?= >)" + qr = re.sub(rx, replace, query, flags=re.VERBOSE | re.MULTILINE) + + if not has_prefixes or "prefix" in str(qr).lower(): + return qr + else: + pfx = "\n".join(f"PREFIX {k}: <{v}>" for k, v in prefixes.items()) + if context.has_vocab(): + pfx = "\n".join([pfx, f"PREFIX : <{context.vocab}>"]) + return f"{pfx}\n{qr}" + + +def _replace_in_sparql(qr, what, value, default_value, search_regex, replace_if_in_query=True): + + is_what_in_query = bool(re.search(f"{search_regex}", qr, flags=re.IGNORECASE)) + if is_what_in_query and value and not replace_if_in_query: + raise QueryingError( + f"Value for '{what}' is present in the provided query and set as argument: " + f"set 'replace_if_in_query' to True to replace '{what}' when present in the query." + ) + replace_value = f" {what} {value}" if value else \ + (f" {what} {default_value}" if default_value else None) + + if is_what_in_query and replace_if_in_query and replace_value: + qr = re.sub(f"{search_regex}", replace_value, qr, flags=re.IGNORECASE) + + if not is_what_in_query and replace_value: + qr = f"{qr} {replace_value}" + return qr + + +def handle_query( + query: str, rewrite: bool, + limit: Optional[int], + offset: Optional[int], + default_limit: int, + default_offset: int, + model_context: Context, + metadata_context: Context +): + qr = ( + rewrite_sparql(query, model_context, metadata_context) + if model_context is not None and rewrite + else query + ) + if limit: + qr = _replace_in_sparql(qr, "LIMIT", limit, default_limit, r" LIMIT \d+") + if offset: + qr = _replace_in_sparql(qr, "OFFSET", offset, default_offset, r" OFFSET \d+") + + return qr diff --git a/tests/core/archetypes/test_store.py b/tests/core/archetypes/test_store.py index bfc0b460..02edbc8c 100644 --- a/tests/core/archetypes/test_store.py +++ b/tests/core/archetypes/test_store.py @@ -16,7 +16,7 @@ import pytest from kgforge.core import Resource, KnowledgeGraphForge -from kgforge.core.archetypes.store import rewrite_sparql, _replace_in_sparql +from kgforge.core.commons.sparql_query_rewriter import rewrite_sparql, _replace_in_sparql from kgforge.core.commons.context import Context from kgforge.core.commons.exceptions import DownloadingError, FreezingError, QueryingError from kgforge.specializations.resources import Dataset From 358a70c86970e11f373990a7f5483641bbf390ce Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 24 Oct 2023 17:32:34 +0200 Subject: [PATCH 07/26] enable sparql query from model --- kgforge/core/archetypes/model.py | 36 ++ kgforge/core/archetypes/store.py | 18 +- kgforge/core/commons/sparql_query_rewriter.py | 16 +- .../models/rdf/rdf_model_service.py | 14 +- .../rdf/rdf_model_service_from_directory.py | 28 +- .../rdf/rdf_model_service_from_store.py | 46 +-- kgforge/specializations/models/rdf_model.py | 8 +- tests/data/shacl-model/commons/ontology-1.ttl | 375 ++++++++++++++++++ .../specializations/models/test_rdf_model.py | 22 +- 9 files changed, 500 insertions(+), 63 deletions(-) create mode 100644 tests/data/shacl-model/commons/ontology-1.ttl diff --git a/kgforge/core/archetypes/model.py b/kgforge/core/archetypes/model.py index c5a8528d..0f8e97fa 100644 --- a/kgforge/core/archetypes/model.py +++ b/kgforge/core/archetypes/model.py @@ -20,6 +20,8 @@ import hjson from pandas import DataFrame from rdflib import URIRef +from rdflib.plugins.sparql.processor import SPARQLResult + from kgforge.core import Resource from kgforge.core.archetypes import Mapping @@ -28,6 +30,11 @@ from kgforge.core.commons.exceptions import ConfigurationError, ValidationError from kgforge.core.commons.execution import not_supported, run from kgforge.core.commons.imports import import_class +from kgforge.core.commons.sparql_query_rewriter import handle_sparql_query + + +DEFAULT_LIMIT = 100 +DEFAULT_OFFSET = 0 class Model(ABC): @@ -115,6 +122,35 @@ def _template(self, type: str, only_required: bool) -> Dict: # Mappings. + def sparql( + self, query: str, + debug: bool, + limit: int = DEFAULT_LIMIT, + offset: int = DEFAULT_OFFSET, + **params + ) -> List[Resource]: + rewrite = params.get("rewrite", True) + + qr = handle_sparql_query( + query=query, + model_context=self.context(), + metadata_context=None, # TODO something else? + rewrite=rewrite, + limit=limit, + offset=offset, + default_limit=DEFAULT_LIMIT, + default_offset=DEFAULT_OFFSET, + debug=debug + ) + + return self._sparql(qr) + + def _sparql(self, query: str) -> SPARQLResult: + # POLICY Should notify of failures with exception QueryingError including a message. + # POLICY Resource _store_metadata should not be set (default is None). + # POLICY Resource _synchronized should not be set (default is False). + not_supported() + def sources(self, pretty: bool) -> Optional[List[str]]: sources = sorted(self._sources()) if pretty: diff --git a/kgforge/core/archetypes/store.py b/kgforge/core/archetypes/store.py index 06e97ba4..c1bf8a26 100644 --- a/kgforge/core/archetypes/store.py +++ b/kgforge/core/archetypes/store.py @@ -34,7 +34,7 @@ QueryingError, ) from kgforge.core.commons.execution import not_supported, run -from kgforge.core.commons.sparql_query_rewriter import handle_query +from kgforge.core.commons.sparql_query_rewriter import handle_sparql_query, _debug_query from kgforge.core.reshaping import collect_values # NB: Do not 'from kgforge.core.archetypes import Resolver' to avoid cyclic dependency. @@ -387,7 +387,7 @@ def sparql( ) -> List[Resource]: rewrite = params.get("rewrite", True) - qr = handle_query( + qr = handle_sparql_query( query=query, model_context=self.model_context, metadata_context=self.service.metadata_context, @@ -395,12 +395,10 @@ def sparql( limit=limit, offset=offset, default_limit=DEFAULT_LIMIT, - default_offset=DEFAULT_OFFSET + default_offset=DEFAULT_OFFSET, + debug=debug ) - if debug: - self._debug_query(qr) - return self._sparql(qr) def _sparql(self, query: str) -> List[Resource]: @@ -472,14 +470,6 @@ def _initialize_service( # POLICY Should initialize the access to the store according to its configuration. pass - @staticmethod - def _debug_query(query): - if isinstance(query, Dict): - print("Submitted query:", query) - else: - print(*["Submitted query:", *query.splitlines()], sep="\n ") - print() - def rewrite_uri(self, uri: str, context: Context, **kwargs) -> str: """Rewrite a given uri using the store Context :param uri: a URI to rewrite. diff --git a/kgforge/core/commons/sparql_query_rewriter.py b/kgforge/core/commons/sparql_query_rewriter.py index cd9c8c35..2c334232 100644 --- a/kgforge/core/commons/sparql_query_rewriter.py +++ b/kgforge/core/commons/sparql_query_rewriter.py @@ -139,14 +139,15 @@ def _replace_in_sparql(qr, what, value, default_value, search_regex, replace_if_ return qr -def handle_query( +def handle_sparql_query( query: str, rewrite: bool, limit: Optional[int], offset: Optional[int], default_limit: int, default_offset: int, model_context: Context, - metadata_context: Context + metadata_context: Optional[Context], + debug: bool ): qr = ( rewrite_sparql(query, model_context, metadata_context) @@ -158,4 +159,15 @@ def handle_query( if offset: qr = _replace_in_sparql(qr, "OFFSET", offset, default_offset, r" OFFSET \d+") + if debug: + _debug_query(qr) + return qr + + +def _debug_query(query): + if isinstance(query, Dict): + print("Submitted query:", query) + else: + print(*["Submitted query:", *query.splitlines()], sep="\n ") + print() diff --git a/kgforge/specializations/models/rdf/rdf_model_service.py b/kgforge/specializations/models/rdf/rdf_model_service.py index 549ba259..fe7b683c 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service.py @@ -11,6 +11,7 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Blue Brain Nexus Forge. If not, see . + import types from typing import List, Dict, Tuple, Set, Optional from abc import abstractmethod @@ -18,6 +19,7 @@ from pyshacl.shape import Shape from pyshacl.shapes_graph import ShapesGraph from rdflib import Graph, URIRef, RDF, XSD +from rdflib.plugins.sparql.processor import SPARQLResult from kgforge.core import Resource from kgforge.core.commons.context import Context @@ -29,7 +31,6 @@ class RdfModelService: - schema_to_source: Dict[URIRef, str] classes_to_shapes: Dict[str, URIRef] @@ -40,7 +41,7 @@ def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: self._graph = graph self._context_cache = {} self.schema_to_source, self.classes_to_shapes = self._build_shapes_map() - self.label_to_ontology_id: Dict[str, URIRef] = self._build_ontology_map() + # self.label_to_ontology_id: Dict[str, URIRef] = self._build_ontology_map() self.context = Context(self.resolve_context(context_iri), context_iri) self.types_to_shapes: Dict[str, URIRef] = self._build_types_to_shapes() @@ -48,6 +49,9 @@ def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: def schema_source(self, schema_iri: URIRef) -> str: return self.schema_to_source[schema_iri] + def sparql(self, query: str) -> SPARQLResult: + return self._graph.query(query) + @abstractmethod def materialize(self, iri: URIRef) -> NodeProperties: """Triggers the collection of properties of a given Shape node @@ -183,6 +187,6 @@ def traverse_properties(properties) -> Tuple[Dict, Dict]: return {"@context": context} if len(context) > 0 else None - @abstractmethod - def _build_ontology_map(self): - pass + # @abstractmethod + # def _build_ontology_map(self): + # pass diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py index 5d2f5c46..f43af09a 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py @@ -58,20 +58,20 @@ def resolve_context(self, iri: str) -> Dict: def generate_context(self) -> Dict: return self._generate_context() - def _build_ontology_map(self) -> Dict[str, URIRef]: - query = """ - PREFIX rdfs: - PREFIX sh: - SELECT ?id ?label WHERE { - ?id a owl:Class ; - rdfs:label ?label - } - """ # TODO CHANGE - res = self._graph.query(query) - return { - row["label"]: URIRef(row["id"]) - for row in res - } + # def _build_ontology_map(self) -> Dict[str, URIRef]: + # query = """ + # PREFIX rdfs: + # PREFIX sh: + # SELECT ?id ?label WHERE { + # ?id a owl:Class ; + # rdfs:label ?label + # } + # """ # TODO CHANGE + # res = self._graph.query(query) + # return { + # row["label"]: URIRef(row["id"]) + # for row in res + # } def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: query = """ diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py index 60f343c3..f1ca4970 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py @@ -171,26 +171,26 @@ def _load_shape_and_reload_shapes_graph(self, iri: URIRef): # reloads the shapes graph self._shapes_graph = ShapesGraphWrapper(self._graph) - def _build_ontology_map(self): - query = """ - PREFIX rdfs: - PREFIX sh: - SELECT ?id ?label WHERE { - ?id a owl:Class ; - rdfs:label ?label - } - """ - # make sure to get all types - limit = 100 - offset = 0 - count = limit - class_resource: Dict[URIRef, URIRef] = dict() - - while count == limit: - resources = self.context_store.sparql(query, debug=False, limit=limit, offset=offset) - for r in resources: - class_resource[r.label] = URIRef(r.id) - count = len(resources) - offset += count - - return class_resource + # def _build_ontology_map(self): + # query = """ + # PREFIX rdfs: + # PREFIX sh: + # SELECT ?id ?label WHERE { + # ?id a owl:Class ; + # rdfs:label ?label + # } + # """ + # # make sure to get all types + # limit = 100 + # offset = 0 + # count = limit + # class_resource: Dict[URIRef, URIRef] = dict() + # + # while count == limit: + # resources = self.context_store.sparql(query, debug=False, limit=limit, offset=offset) + # for r in resources: + # class_resource[r.label] = URIRef(r.id) + # count = len(resources) + # offset += count + # + # return class_resource diff --git a/kgforge/specializations/models/rdf_model.py b/kgforge/specializations/models/rdf_model.py index 1cfb735c..528f0f77 100644 --- a/kgforge/specializations/models/rdf_model.py +++ b/kgforge/specializations/models/rdf_model.py @@ -19,6 +19,8 @@ from pyshacl.consts import SH from rdflib import URIRef, Literal from rdflib.namespace import XSD +from rdflib.query import Result +from rdflib.plugins.sparql.processor import SPARQLResult from kgforge.core import Resource from kgforge.core.archetypes import Model, Store @@ -87,6 +89,9 @@ def _generate_context(self) -> Context: if document: return Context(document) + def _sparql(self, query) -> SPARQLResult: + return self.service.sparql(query) + # Templates. def _template(self, type: str, only_required: bool) -> Dict: @@ -102,8 +107,7 @@ def get_shape_from_type(self, type: str) -> URIRef: def schema_id(self, type: str) -> URIRef: shape_iri: URIRef = self.get_shape_from_type(type) - e = self.service.schema_source(shape_iri) - return e + return self.service.schema_source(shape_iri) # Validation. diff --git a/tests/data/shacl-model/commons/ontology-1.ttl b/tests/data/shacl-model/commons/ontology-1.ttl new file mode 100644 index 00000000..a416005a --- /dev/null +++ b/tests/data/shacl-model/commons/ontology-1.ttl @@ -0,0 +1,375 @@ +@prefix bmo: . +@prefix ns1: . +@prefix nsg: . +@prefix owl: . +@prefix rdf: . +@prefix xsd: . +@prefix prov: . +@prefix rdfs: . +@prefix skos: . +@prefix parms: . +@prefix schema: . + + + rdf:type owl:Ontology ; + "parms"^^xsd:string ; + schema:title "Brain Modeling Parameter Ontology"^^xsd:string ; + rdfs:label "Brain Modeling Parameter Ontology"^^xsd:string ; + owl:versionInfo "R4"^^xsd:string . + +ns1:preferredNamespacePrefix a owl:AnnotationProperty . + +schema:Dataset a owl:Class . + +schema:name a owl:AnnotationProperty ; + rdfs:label "name"@en ; + skos:altLabel "name"@en . + +schema:sameAs a owl:ObjectProperty ; + rdfs:label "sameAs"@en ; + rdfs:subPropertyOf owl:topObjectProperty . + +schema:unitCode a owl:AnnotationProperty ; + rdfs:label "unitCode"@en ; + skos:altLabel "units"@en . + +rdfs:isDefinedBy rdfs:label "isDefinedBy"@en . + +rdfs:label rdfs:label "label"@en . + +owl:equivalentClass a owl:AnnotationProperty ; + rdfs:label "equivalentClass"@en . + +owl:topDataProperty rdfs:label "Attributes"@en . + +skos:altLabel a owl:AnnotationProperty ; + skos:altLabel "altLabel"@en . + +skos:definition a owl:AnnotationProperty ; + rdfs:label "definition"@en . + +skos:example a owl:AnnotationProperty . + +skos:notation a owl:AnnotationProperty . + +skos:note a owl:AnnotationProperty . + +skos:prefLabel a owl:AnnotationProperty ; + rdfs:label "prefLabel"@en . + +bmo:AtlasDistanceToLayer a owl:Class ; + rdfs:label "AtlasDistanceToLayer"@en ; + rdfs:subClassOf bmo:ModelBrainParameter . + +bmo:AtlasETypeRatio a owl:Class ; + rdfs:label "AtlasETypeRatio"@en ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:altLabel "Atlas e-type ratio"@en ; + skos:prefLabel "Atlas EType Ratio"@en . + +bmo:BrainVolumeParameter a owl:Class ; + rdfs:label "Model Brain Volume Parameter"@en ; + rdfs:subClassOf bmo:ModelBrainParameter . + +bmo:ConductanceDensity a owl:Class ; + rdfs:label "Conductance Density"@en ; + rdfs:subClassOf [ a owl:Restriction ; + owl:onProperty bmo:compartment ; + owl:someValuesFrom bmo:NeuronPart ], + [ a owl:Restriction ; + owl:onProperty nsg:ion ; + owl:someValuesFrom bmo:Ion ], + bmo:EModelParameter, + bmo:NeuronPartFeature . + +bmo:ConnectivityModelCoefficient a owl:Class ; + rdfs:label "Connectivity Model Coefficient"@en ; + rdfs:subClassOf bmo:ModelConnectivityParameter . + +bmo:DendriteSynapseDensity a owl:Class ; + rdfs:label "DendriteSynapseDensity"@en ; + rdfs:subClassOf bmo:ModelBrainParameter . + +bmo:EModelParameterConstraint a owl:Class ; + rdfs:label "EModel Parameter Constraint"@en ; + rdfs:subClassOf bmo:ModelBrainParameterConstraint . + +bmo:ETypeRatio a owl:Class ; + rdfs:label "E-TypeRatio"@en ; + rdfs:seeAlso ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "The E-type ratio is the ratio of an e-type for a given m-type, brain region and layer. For a given m-type, all e-type ratios add up to 1."^^xsd:string ; + skos:prefLabel "E-Type Ratio"^^xsd:string . + +bmo:ElectricalStimulus a owl:Class ; + rdfs:label "Electrical Stimulus"@en ; + rdfs:isDefinedBy ; + rdfs:seeAlso ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:editorialNote "How is this related to 'Protocol' in BluePyEModel"^^xsd:string . + +bmo:METypeRatio a owl:Class ; + rdfs:label "ME-Type Ratio"@en ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:prefLabel "ME-Type Ratio"^^xsd:string . + +bmo:ModelBiochemicalReactionParameter a owl:Class ; + rdfs:label "Model Biochemical Reaction Parameter"@en ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Parameter used to represent a specific biological or theoretical value that is used in the modeling of biochemical reactions."^^xsd:string . + + a owl:Ontology ; + rdfs:label "Brain Modeling Parameter Ontology"^^xsd:string ; + ns1:preferredNamespacePrefix "parms"^^xsd:string ; + schema:title "Brain Modeling Parameter Ontology"^^xsd:string ; + owl:versionInfo "R4"^^xsd:string . + +parms:Dep a owl:Class ; + rdfs:label "Dep"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Relaxation time constant for depression, as measured by fitting the TM model to electrophysiological traces."@en . + +parms:Fac a owl:Class ; + rdfs:label "Fac"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Relaxation time constant for facilitation, as measured by fitting the TM model to electrophysiological traces."@en . + +parms:GABAB_ratio a owl:Class ; + rdfs:label "GABAB_ratio"@en ; + schema:unitCode "Unitless" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Synaptic conductance of GABAB proportional to the value for GABAA."@en . + +parms:NMDA_ratio a owl:Class ; + rdfs:label "NMDA_ratio"@en ; + schema:unitCode "Unitless" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Synaptic conductance of NMDA proportional to the value for AMPA."@en . + +parms:Nrrp a owl:Class ; + rdfs:label "Nrrp"@en ; + schema:unitCode "Unitless" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Number of tital release sites for given contact."@en . + +parms:Use a owl:Class ; + rdfs:label "Use"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Utilization of synaptic efficiency as measured by fitting the TM model to electrophysiological traces."@en . + +parms:conductance a owl:Class ; + rdfs:label "conductance"@en ; + schema:unitCode "uS" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Synaptic conductance."@en . + +parms:TimeConstantInMsForRecoveryFromDepression a owl:Class ; + rdfs:label "d"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "time constant (in ms) for recovery from depression, following a Gamma distribution"@en . + +parms:DecayTimeConstant a owl:Class ; + rdfs:label "dtc"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "decay time constant (in ms), following a truncated Normal distribution"@en . + +parms:SynapticReversalPotential a owl:Class ; + rdfs:label "e"@en ; + schema:unitCode "mV" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Synaptic reversal potential"@en . + +parms:e_GABAA a owl:Class ; + rdfs:label "e_GABAA"@en ; + schema:unitCode "mV" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "GABAA reversal potential"@en . + +parms:e_GABAB a owl:Class ; + rdfs:label "e_GABAB"@en ; + schema:unitCode "mV" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "GABAB reversal potential"@en . + +parms:TimeConstantInMsForRecoveryFromFacilitation a owl:Class ; + rdfs:label "f"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "time constant (in ms) for recovery from facilitation, following a Gamma distribution"@en . + +parms:gmax a owl:Class ; + rdfs:label "gmax"@en ; + schema:unitCode "uS" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "weight conversion factor (from nS to uS)"@en . + +parms:gsyn a owl:Class ; + rdfs:label "gsyn"@en ; + schema:unitCode "nS" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "the peak conductance (in nS) for a single synaptic contact, following a Gamma distribution"@en . + +parms:gsynSRSF a owl:Class ; + rdfs:label "gsynSRSF"@en ; + schema:unitCode "Unitless" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "the scale factor for the conductance; SRSF: 'synaptic receptor scaling factor'"@en . + +parms:InitialConcentrationOfMg a owl:Class ; + rdfs:label "mg"@en ; + schema:unitCode "mM" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Initial concentration of Mg2+"@en . + +parms:nrrp a owl:Class ; + rdfs:label "nrrp"@en ; + schema:unitCode "Unitless" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "number of vesicles in readily releasable pool, following a Poisson distribution"@en . + +parms:scale_mg a owl:Class ; + rdfs:label "scale_mg"@en ; + schema:unitCode "mM" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Scale parameter for determining mg concentration"@en . + +parms:slope_mg a owl:Class ; + rdfs:label "slope_mg"@en ; + schema:unitCode "1/mV" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Slope parameter for determining mg concentration"@en . + +parms:tau_d_AMPA a owl:Class ; + rdfs:label "tau_d_AMPA"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Decay time for AMPA currents."@en . + +parms:tau_d_GABAA a owl:Class ; + rdfs:label "tau_d_GABAA"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "GABAA-R synaptic conductance decay time constant"@en . + +parms:tau_d_GABAB a owl:Class ; + rdfs:label "tau_d_GABAB"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "GABAB-R synaptic conductance decay time constant"@en . + +parms:tau_d_NMDA a owl:Class ; + rdfs:label "tau_d_NMDA"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Decay time for NMDA currents."@en . + +parms:tau_r_AMPA a owl:Class ; + rdfs:label "tau_r_AMPA"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Rise time for AMPA currents."@en . + +parms:tau_r_GABAA a owl:Class ; + rdfs:label "tau_r_GABAA"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "GABAA-R synaptic conductance rise time constant"@en . + +parms:tau_r_GABAB a owl:Class ; + rdfs:label "tau_r_GABAB"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "GABAB-R synaptic conductance rise time constant"@en . + +parms:tau_r_NMDA a owl:Class ; + rdfs:label "tau_r_NMDA"@en ; + schema:unitCode "ms" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Rise time for NMDA currents."@en . + +parms:u a owl:Class ; + rdfs:label "u"@en ; + schema:unitCode "Unitless" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "utilization of synaptic efficacy, following a truncated Normal distribution"@en . + +parms:u0 a owl:Class ; + rdfs:label "u0"@en ; + schema:unitCode "Unitless" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Initial value of u, which is the running value of Use"@en . + +parms:uHillCoefficient a owl:Class ; + rdfs:label "uHillCoefficient"@en ; + schema:unitCode "Unitless" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "A coefficient describing the scaling of u to be done by the simulator"@en . + +parms:SynapticConductanceWeight a owl:Class ; + rdfs:label "weight"@en ; + schema:unitCode "uS" ; + rdfs:subClassOf bmo:ModelBrainParameter ; + skos:definition "Synaptic conductance."@en . + +nsg:hasBaselDendriteFeature rdfs:label "hasBaselDendriteFeature"@en . + +nsg:hasConstraint a owl:ObjectProperty ; + rdfs:label "hasConstraint"@en . + +nsg:hasLayerLocation rdfs:label "hasLayerLocation"@en . + +nsg:hasObliqDendrite rdfs:label "hasObliqDendrite"@en . + +nsg:hasTuftDendriteFeature rdfs:label "hasTuftDendriteFeature"@en . + +schema:QuantitativeValue a owl:Class ; + rdfs:label "Quantitative Value"@en ; + rdfs:subClassOf prov:Entity . + +schema:value a owl:ObjectProperty ; + rdfs:label "value"@en ; + rdfs:subPropertyOf owl:topObjectProperty ; + skos:altLabel "value"@en . + +bmo:EModelParameter a owl:Class ; + rdfs:label "EModel Parameter"^^xsd:string ; + rdfs:subClassOf bmo:ModelBrainParameter . + +bmo:Ion a owl:Class . + +bmo:ModelBrainParameterConstraint a owl:Class ; + rdfs:label "Model Brain Parameter Constraint"@en ; + rdfs:subClassOf [ a owl:Restriction ; + owl:onProperty bmo:constraints ; + owl:someValuesFrom bmo:ModelBrainParameter ], + prov:Entity . + +bmo:ModelConnectivityParameter a owl:Class ; + rdfs:label "Model Connectivity Parameter"@en ; + rdfs:subClassOf bmo:ModelBrainParameter . + +bmo:NeuronPart a owl:Class . + +bmo:NeuronPartFeature a owl:Class . + +bmo:constraints a owl:ObjectProperty ; + rdfs:label "constraints"@en ; + skos:altLabel "constraint"@en . + +prov:Entity a owl:Class ; + rdfs:label "Entity"@en ; + skos:prefLabel "Entity"^^xsd:string . + +bmo:ModelBrainParameter a owl:Class ; + rdfs:label "Model Brain Parameter"@en ; + rdfs:subClassOf [ a owl:Restriction ; + owl:onProperty schema:value ; + owl:someValuesFrom schema:QuantitativeValue ], + prov:Entity ; + skos:definition "A brain parameter is a parameter used to represent a specific biological or theoretical value that is used in the construction of a brain model."^^xsd:string . + diff --git a/tests/specializations/models/test_rdf_model.py b/tests/specializations/models/test_rdf_model.py index f7dedffc..4968b44d 100644 --- a/tests/specializations/models/test_rdf_model.py +++ b/tests/specializations/models/test_rdf_model.py @@ -24,9 +24,11 @@ @pytest.fixture def rdf_model(context_iri_file): - return RdfModel(full_path_relative_to_root("tests/data/shacl-model"), - context={"iri": context_iri_file}, - origin="directory") + return RdfModel( + full_path_relative_to_root("tests/data/shacl-model"), + context={"iri": context_iri_file}, + origin="directory" + ) class TestVocabulary: @@ -121,3 +123,17 @@ def test_validate_many(self, rdf_model: RdfModel, valid_activity_resource, assert (valid_activity_resource._last_action.operation == invalid_activity_resource._last_action.operation == rdf_model._validate_many.__name__) + + + def test_query_model(self, rdf_model: RdfModel): + query = """ + PREFIX rdfs: + PREFIX sh: + SELECT ?id ?label WHERE { + ?id a owl:Class ; + rdfs:label ?label + } + """ + res = rdf_model.sparql(query, debug=True) + print(res) + res.serialize() From 02f05551f03ed268b8ca5b3dd421da7ca3102f41 Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 24 Oct 2023 17:33:57 +0200 Subject: [PATCH 08/26] rm comments --- .../models/rdf/rdf_model_service.py | 6 ----- .../rdf/rdf_model_service_from_directory.py | 15 +----------- .../rdf/rdf_model_service_from_store.py | 23 ------------------- 3 files changed, 1 insertion(+), 43 deletions(-) diff --git a/kgforge/specializations/models/rdf/rdf_model_service.py b/kgforge/specializations/models/rdf/rdf_model_service.py index fe7b683c..734a626c 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service.py @@ -41,8 +41,6 @@ def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: self._graph = graph self._context_cache = {} self.schema_to_source, self.classes_to_shapes = self._build_shapes_map() - # self.label_to_ontology_id: Dict[str, URIRef] = self._build_ontology_map() - self.context = Context(self.resolve_context(context_iri), context_iri) self.types_to_shapes: Dict[str, URIRef] = self._build_types_to_shapes() @@ -186,7 +184,3 @@ def traverse_properties(properties) -> Tuple[Dict, Dict]: context.update({key: terms[key] for key in sorted(terms)}) return {"@context": context} if len(context) > 0 else None - - # @abstractmethod - # def _build_ontology_map(self): - # pass diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py index f43af09a..8248fb19 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py @@ -58,20 +58,7 @@ def resolve_context(self, iri: str) -> Dict: def generate_context(self) -> Dict: return self._generate_context() - # def _build_ontology_map(self) -> Dict[str, URIRef]: - # query = """ - # PREFIX rdfs: - # PREFIX sh: - # SELECT ?id ?label WHERE { - # ?id a owl:Class ; - # rdfs:label ?label - # } - # """ # TODO CHANGE - # res = self._graph.query(query) - # return { - # row["label"]: URIRef(row["id"]) - # for row in res - # } + def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: query = """ diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py index f1ca4970..ce535f01 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py @@ -171,26 +171,3 @@ def _load_shape_and_reload_shapes_graph(self, iri: URIRef): # reloads the shapes graph self._shapes_graph = ShapesGraphWrapper(self._graph) - # def _build_ontology_map(self): - # query = """ - # PREFIX rdfs: - # PREFIX sh: - # SELECT ?id ?label WHERE { - # ?id a owl:Class ; - # rdfs:label ?label - # } - # """ - # # make sure to get all types - # limit = 100 - # offset = 0 - # count = limit - # class_resource: Dict[URIRef, URIRef] = dict() - # - # while count == limit: - # resources = self.context_store.sparql(query, debug=False, limit=limit, offset=offset) - # for r in resources: - # class_resource[r.label] = URIRef(r.id) - # count = len(resources) - # offset += count - # - # return class_resource From 246027d1ddc959df9793091caac6a942cd31cb09 Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 24 Oct 2023 17:36:02 +0200 Subject: [PATCH 09/26] mark test that is expected to fail --- tests/specializations/models/test_rdf_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/specializations/models/test_rdf_model.py b/tests/specializations/models/test_rdf_model.py index 4968b44d..7e8cbced 100644 --- a/tests/specializations/models/test_rdf_model.py +++ b/tests/specializations/models/test_rdf_model.py @@ -98,6 +98,7 @@ def valid_activity_resource(self, activity_json): return resource @pytest.mark.parametrize("type_,", TYPES_SCHEMAS_MAP.keys()) + @pytest.mark.xfail def test_type_to_schema(self, rdf_model: RdfModel, type_): # FIXME TYPES_SCHEMAS_MAP should be a type to file dictionary # see _build_shapes_map from RdfModelServiceFromDirectory @@ -135,5 +136,4 @@ def test_query_model(self, rdf_model: RdfModel): } """ res = rdf_model.sparql(query, debug=True) - print(res) - res.serialize() + # TODO assertion \ No newline at end of file From ded1a4020f6d822a624c83c910598f0a174c3a49 Mon Sep 17 00:00:00 2001 From: mouffok Date: Wed, 25 Oct 2023 10:39:24 +0200 Subject: [PATCH 10/26] query rewriting rewriting --- kgforge/core/archetypes/resolver.py | 4 +- kgforge/core/commons/es_query_builder.py | 4 +- kgforge/core/commons/query_builder.py | 3 +- kgforge/core/commons/sparql_query_builder.py | 53 +++++++++------ kgforge/core/commons/sparql_query_rewriter.py | 64 ++++++++++++------- .../specializations/stores/bluebrain_nexus.py | 24 +++---- .../stores/test_bluebrain_nexus.py | 4 +- 7 files changed, 94 insertions(+), 62 deletions(-) diff --git a/kgforge/core/archetypes/resolver.py b/kgforge/core/archetypes/resolver.py index 696197ad..c37b81d9 100644 --- a/kgforge/core/archetypes/resolver.py +++ b/kgforge/core/archetypes/resolver.py @@ -227,9 +227,9 @@ def _build_resolving_query(text, query_template, deprecated_property, filters, s Filter(operator=FilterOperator.EQUAL, path=path, value=value) ) target_query_statements, target_query_filters = query_builder.build( - None, None, resolving_context, *configured_target_filters + None, None, resolving_context, configured_target_filters ) - + target_query_statements = ";\n ".join(target_query_statements) target_query_filters = "\n ".join(target_query_filters) first_filters = f"{first_filters} ; \n {target_query_statements}" diff --git a/kgforge/core/commons/es_query_builder.py b/kgforge/core/commons/es_query_builder.py index ee42fc8f..c2402bf4 100644 --- a/kgforge/core/commons/es_query_builder.py +++ b/kgforge/core/commons/es_query_builder.py @@ -40,7 +40,7 @@ def build( schema: Dict, resolvers: Optional[List["Resolver"]], context: Context, - *filters, + filters: List[Filter], **params, ) -> Tuple[List, List, List]: @@ -59,7 +59,7 @@ def build( m._update_from_dict(schema) dynamic = m._meta["dynamic"] if "dynamic" in m._meta else dynamic - for index, f in enumerate(*filters): + for index, f in enumerate(filters): _filter = None must = None must_not = None diff --git a/kgforge/core/commons/query_builder.py b/kgforge/core/commons/query_builder.py index 4ef4cd53..72c2cc25 100644 --- a/kgforge/core/commons/query_builder.py +++ b/kgforge/core/commons/query_builder.py @@ -18,6 +18,7 @@ from kgforge.core.commons.attributes import repr_class from kgforge.core.commons.context import Context from kgforge.core.resource import Resource +from kgforge.core.wrappings import Filter class QueryBuilder(ABC): @@ -30,7 +31,7 @@ def build( schema: Any, resolvers: Optional[List["Resolver"]], context: Context, - *filters, + filters: List[Filter], **params ) -> Any: pass diff --git a/kgforge/core/commons/sparql_query_builder.py b/kgforge/core/commons/sparql_query_builder.py index 7cf7ee1c..0fb28792 100644 --- a/kgforge/core/commons/sparql_query_builder.py +++ b/kgforge/core/commons/sparql_query_builder.py @@ -27,6 +27,7 @@ from kgforge.core.commons.files import is_valid_url from kgforge.core.commons.parser import _parse_type from kgforge.core.commons.query_builder import QueryBuilder +from kgforge.core.wrappings.paths import Filter class CategoryDataType(Enum): @@ -49,7 +50,7 @@ class CategoryDataType(Enum): CategoryDataType.DATETIME: lambda x: f'"{x}"^^xsd:dateTime', CategoryDataType.NUMBER: lambda x: x, CategoryDataType.LITERAL: lambda x: f'"{x}"', - CategoryDataType.BOOLEAN: lambda x: "'true'^^xsd:boolean" if x is True else "'false'^^xsd:boolean", + CategoryDataType.BOOLEAN: lambda x: "'true'^^xsd:boolean" if x else "'false'^^xsd:boolean", } sparql_operator_map = { @@ -66,11 +67,11 @@ class SPARQLQueryBuilder(QueryBuilder): @staticmethod def build( - schema: Dict, - resolvers: Optional[List[Resolver]], - context: Context, - *filters, - **params, + schema: Dict, + resolvers: Optional[List[Resolver]], + context: Context, + filters: List[Filter], + **params, ) -> Tuple[List, List]: statements = [] @@ -91,9 +92,9 @@ def build( property_path = "/".join(f.path) try: if ( - last_path in ["type", "@type"] - or last_path in ["id", "@id"] - or (last_term is not None and last_term.type == "@id") + last_path in ["type", "@type"] + or last_path in ["id", "@id"] + or (last_term is not None and last_term.type == "@id") ): if f.operator == "__eq__": statements.append(f"{property_path} {_box_value_as_full_iri(f.value)}") @@ -112,14 +113,18 @@ def build( if f.operator not in ["__eq__", "__ne__"]: raise NotImplementedError("supported operators are '==' and '!=' when filtering with a str.") statements.append(f"{property_path} ?v{index}") - sparql_filters.append(f"FILTER(?v{index} = {_box_value_as_full_iri(value)})") + sparql_filters.append( + f"FILTER(?v{index} = {_box_value_as_full_iri(value)})") else: statements.append(f"{property_path} ?v{index}") sparql_filters.append( f"FILTER(?v{index} {sparql_operator_map[f.operator]} {_box_value_as_full_iri(value)})" ) except NotImplementedError as nie: - raise ValueError(f"Operator '{sparql_operator_map[f.operator]}' is not supported with the value '{f.value}': {str(nie)}") + raise ValueError( + f"Operator '{sparql_operator_map[f.operator]}' " + f"is not supported with the value '{f.value}': {str(nie)}" + ) return statements, sparql_filters @staticmethod @@ -164,17 +169,23 @@ def triples_to_resource(iri, triples): else: # SELECT QUERY results = response["results"]["bindings"] + + def process_v(v): + if v['type'] == 'literal' and 'datatype' in v and v['datatype'] == \ + 'http://www.w3.org/2001/XMLSchema#boolean': + + return json.loads(str(v["value"]).lower()) + + elif v['type'] == 'literal' and 'datatype' in v and v['datatype'] == \ + 'http://www.w3.org/2001/XMLSchema#integer': + + return int(v["value"]) + + else: + return v["value"] + return [ - Resource(**{ - k: json.loads(str(v["value"]).lower()) - if v['type'] == 'literal' and - ('datatype' in v and v['datatype'] == 'http://www.w3.org/2001/XMLSchema#boolean') - else ( - int(v["value"]) - if v['type'] == 'literal' and ('datatype' in v and v['datatype'] == 'http://www.w3.org/2001/XMLSchema#integer') - else v["value"] - ) - for k, v in x.items()}) + Resource(**{k: process_v(v) for k, v in x.items()}) for x in results ] diff --git a/kgforge/core/commons/sparql_query_rewriter.py b/kgforge/core/commons/sparql_query_rewriter.py index 2c334232..866cb5a2 100644 --- a/kgforge/core/commons/sparql_query_rewriter.py +++ b/kgforge/core/commons/sparql_query_rewriter.py @@ -1,5 +1,5 @@ import re -from typing import Any, Dict, List, Match, Optional, Tuple, Union, Type +from typing import Any, Dict, List, Match, Optional, Tuple, Union, Type, Pattern from kgforge.core.commons.context import Context from kgforge.core.commons.exceptions import QueryingError @@ -64,15 +64,17 @@ def rewrite_sparql(query: str, context: Context, metadata_context: Context) -> s In the case of non-available contexts and vocab then the query is returned unchanged. """ ctx = {} - if metadata_context and metadata_context.document: - ctx.update({ + + def _context_to_dict(c: Context): + return { k: v["@id"] if isinstance(v, Dict) and "@id" in v else v - for k, v in metadata_context.document["@context"].items() - }) - ctx.update({ - k: v["@id"] if isinstance(v, Dict) and "@id" in v else v - for k, v in context.document["@context"].items() - }) + for k, v in c.document["@context"].items() + } + if metadata_context and metadata_context.document: + ctx.update(_context_to_dict(metadata_context)) + + ctx.update(_context_to_dict(context)) + prefixes = context.prefixes has_prefixes = prefixes is not None and len(prefixes.keys()) > 0 if ctx.get("type") == "@type": @@ -120,22 +122,33 @@ def replace(match: Match) -> str: return f"{pfx}\n{qr}" -def _replace_in_sparql(qr, what, value, default_value, search_regex, replace_if_in_query=True): +def _replace_in_sparql( + qr: str, + what: str, + value: Optional[int], + default_value: int, + search_regex: Pattern, + replace_if_in_query=True +) -> str: + + is_what_in_query = bool(re.search(pattern=search_regex, string=qr)) - is_what_in_query = bool(re.search(f"{search_regex}", qr, flags=re.IGNORECASE)) - if is_what_in_query and value and not replace_if_in_query: - raise QueryingError( - f"Value for '{what}' is present in the provided query and set as argument: " - f"set 'replace_if_in_query' to True to replace '{what}' when present in the query." - ) replace_value = f" {what} {value}" if value else \ (f" {what} {default_value}" if default_value else None) - if is_what_in_query and replace_if_in_query and replace_value: - qr = re.sub(f"{search_regex}", replace_value, qr, flags=re.IGNORECASE) + if is_what_in_query: + if not replace_if_in_query and value: + raise QueryingError( + f"Value for '{what}' is present in the provided query and set as argument: " + f"set 'replace_if_in_query' to True to replace '{what}' when present in the query." + ) + + if replace_if_in_query and replace_value: + qr = re.sub(pattern=search_regex, repl=replace_value, string=qr) + else: + if replace_value: + qr = f"{qr} {replace_value}" # Added to the end of the query (not very general) - if not is_what_in_query and replace_value: - qr = f"{qr} {replace_value}" return qr @@ -155,9 +168,15 @@ def handle_sparql_query( else query ) if limit: - qr = _replace_in_sparql(qr, "LIMIT", limit, default_limit, r" LIMIT \d+") + qr = _replace_in_sparql( + qr, "LIMIT", limit, default_limit, + re.compile(r" LIMIT \d+", flags=re.IGNORECASE) + ) if offset: - qr = _replace_in_sparql(qr, "OFFSET", offset, default_offset, r" OFFSET \d+") + qr = _replace_in_sparql( + qr, "OFFSET", offset, default_offset, + re.compile(r" OFFSET \d+", flags=re.IGNORECASE) + ) if debug: _debug_query(qr) @@ -170,4 +189,3 @@ def _debug_query(query): print("Submitted query:", query) else: print(*["Submitted query:", *query.splitlines()], sep="\n ") - print() diff --git a/kgforge/specializations/stores/bluebrain_nexus.py b/kgforge/specializations/stores/bluebrain_nexus.py index 273c7b61..8a216dca 100644 --- a/kgforge/specializations/stores/bluebrain_nexus.py +++ b/kgforge/specializations/stores/bluebrain_nexus.py @@ -33,7 +33,7 @@ from aiohttp import ClientSession, MultipartWriter from aiohttp.hdrs import CONTENT_DISPOSITION, CONTENT_TYPE - +from kgforge.core.archetypes.store import DEFAULT_LIMIT from kgforge.core.commons.dictionaries import update_dict from kgforge.core.commons.es_query_builder import ESQueryBuilder from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder @@ -687,7 +687,7 @@ def search( raise ValueError("context model missing") debug = params.get("debug", False) - limit = params.get("limit", 100) + limit = params.get("limit", DEFAULT_LIMIT) offset = params.get("offset", None) deprecated = params.get("deprecated", False) cross_bucket = params.get("cross_bucket", False) @@ -699,24 +699,26 @@ def search( search_endpoint = params.get( "search_endpoint", self.service.sparql_endpoint["type"] ) - if search_endpoint not in [ - self.service.sparql_endpoint["type"], - self.service.elastic_endpoint["type"], - ]: + + supported_search_endpoints = [ + self.service.sparql_endpoint["type"], self.service.elastic_endpoint["type"], + ] + if search_endpoint not in supported_search_endpoints: raise ValueError( - f"The provided search_endpoint value '{search_endpoint}' is not supported. Supported " - f"search_endpoint values are: '{self.service.sparql_endpoint['type'], self.service.elastic_endpoint['type']}'" + f"The provided search_endpoint value '{search_endpoint}' is not supported. " + f"Supported search_endpoint values are: {supported_search_endpoints}" ) if "filters" in params: raise ValueError( - "A 'filters' key was provided as params. Filters should be provided as iterable to be unpacked.") + "A 'filters' key was provided as params. " + "Filters should be provided as iterable to be unpacked." + ) if bucket and not cross_bucket: not_supported(("bucket", True)) if filters and isinstance(filters[0], dict): - filters = create_filters_from_dict(filters[0]) - filters = list(filters) if not isinstance(filters, list) else filters + filters: List[Filter] = create_filters_from_dict(filters[0]) if search_endpoint == self.service.sparql_endpoint["type"]: if includes or excludes: diff --git a/tests/specializations/stores/test_bluebrain_nexus.py b/tests/specializations/stores/test_bluebrain_nexus.py index 14af735f..408d91a8 100644 --- a/tests/specializations/stores/test_bluebrain_nexus.py +++ b/tests/specializations/stores/test_bluebrain_nexus.py @@ -433,7 +433,7 @@ def context(self): ], ) def test_filter_to_query_statements(self, context, filters, expected): - statements = SPARQLQueryBuilder.build(None, None, context, *list(filters)) + statements = SPARQLQueryBuilder.build(None, None, context, list(filters)) assert statements == expected @pytest.mark.parametrize( @@ -447,7 +447,7 @@ def test_filter_to_query_statements(self, context, filters, expected): ) def test_filter_to_query_statements_exceptions(self, context, filters): with pytest.raises(ValueError): - SPARQLQueryBuilder.build(None, None, context, *list(filters)) + SPARQLQueryBuilder.build(None, None, context, list(filters)) def test_create_select_query(self): statements = f"?id type " From 285baeff86f6c31db5ccc2d51b317eabc4bbe31e Mon Sep 17 00:00:00 2001 From: mouffok Date: Wed, 25 Oct 2023 11:27:01 +0200 Subject: [PATCH 11/26] model sparql query returned as resource --- kgforge/core/archetypes/model.py | 3 +- kgforge/core/commons/sparql_query_builder.py | 33 +++++++++++-------- .../models/rdf/rdf_model_service.py | 12 ++++--- kgforge/specializations/models/rdf_model.py | 4 +-- .../specializations/models/test_rdf_model.py | 14 +++----- 5 files changed, 33 insertions(+), 33 deletions(-) diff --git a/kgforge/core/archetypes/model.py b/kgforge/core/archetypes/model.py index 0f8e97fa..205e151c 100644 --- a/kgforge/core/archetypes/model.py +++ b/kgforge/core/archetypes/model.py @@ -20,7 +20,6 @@ import hjson from pandas import DataFrame from rdflib import URIRef -from rdflib.plugins.sparql.processor import SPARQLResult from kgforge.core import Resource @@ -145,7 +144,7 @@ def sparql( return self._sparql(qr) - def _sparql(self, query: str) -> SPARQLResult: + def _sparql(self, query: str) -> List[Resource]: # POLICY Should notify of failures with exception QueryingError including a message. # POLICY Resource _store_metadata should not be set (default is None). # POLICY Resource _synchronized should not be set (default is False). diff --git a/kgforge/core/commons/sparql_query_builder.py b/kgforge/core/commons/sparql_query_builder.py index 0fb28792..08119245 100644 --- a/kgforge/core/commons/sparql_query_builder.py +++ b/kgforge/core/commons/sparql_query_builder.py @@ -168,26 +168,31 @@ def triples_to_resource(iri, triples): return [triples_to_resource(s, t) for s, t in subject_triples.items()] else: # SELECT QUERY - results = response["results"]["bindings"] + return SPARQLQueryBuilder.build_resource_from_select_query( + response["results"]["bindings"] + ) - def process_v(v): - if v['type'] == 'literal' and 'datatype' in v and v['datatype'] == \ - 'http://www.w3.org/2001/XMLSchema#boolean': + @staticmethod + def build_resource_from_select_query(results: List): - return json.loads(str(v["value"]).lower()) + def process_v(v): + if v['type'] == 'literal' and 'datatype' in v and v['datatype'] == \ + 'http://www.w3.org/2001/XMLSchema#boolean': - elif v['type'] == 'literal' and 'datatype' in v and v['datatype'] == \ - 'http://www.w3.org/2001/XMLSchema#integer': + return json.loads(str(v["value"]).lower()) - return int(v["value"]) + elif v['type'] == 'literal' and 'datatype' in v and v['datatype'] == \ + 'http://www.w3.org/2001/XMLSchema#integer': - else: - return v["value"] + return int(v["value"]) + + else: + return v["value"] - return [ - Resource(**{k: process_v(v) for k, v in x.items()}) - for x in results - ] + return [ + Resource(**{k: process_v(v) for k, v in x.items()}) + for x in results + ] def _box_value_as_full_iri(value): diff --git a/kgforge/specializations/models/rdf/rdf_model_service.py b/kgforge/specializations/models/rdf/rdf_model_service.py index 734a626c..4f0f8072 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service.py @@ -11,16 +11,18 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Blue Brain Nexus Forge. If not, see . - +import json import types +from io import StringIO from typing import List, Dict, Tuple, Set, Optional from abc import abstractmethod from typing import List, Dict, Tuple, Set, Optional from pyshacl.shape import Shape from pyshacl.shapes_graph import ShapesGraph from rdflib import Graph, URIRef, RDF, XSD -from rdflib.plugins.sparql.processor import SPARQLResult +from rdflib.plugins.sparql.results.jsonresults import JSONResultSerializer +from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder from kgforge.core import Resource from kgforge.core.commons.context import Context from kgforge.core.commons.exceptions import ConfigurationError @@ -47,8 +49,10 @@ def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: def schema_source(self, schema_iri: URIRef) -> str: return self.schema_to_source[schema_iri] - def sparql(self, query: str) -> SPARQLResult: - return self._graph.query(query) + def sparql(self, query: str) -> List[Resource]: + e = self._graph.query(query) + results = json.loads(e.serialize(format="json")) + return SPARQLQueryBuilder.build_resource_from_select_query(results["results"]["bindings"]) @abstractmethod def materialize(self, iri: URIRef) -> NodeProperties: diff --git a/kgforge/specializations/models/rdf_model.py b/kgforge/specializations/models/rdf_model.py index 528f0f77..4ab06c69 100644 --- a/kgforge/specializations/models/rdf_model.py +++ b/kgforge/specializations/models/rdf_model.py @@ -19,8 +19,6 @@ from pyshacl.consts import SH from rdflib import URIRef, Literal from rdflib.namespace import XSD -from rdflib.query import Result -from rdflib.plugins.sparql.processor import SPARQLResult from kgforge.core import Resource from kgforge.core.archetypes import Model, Store @@ -89,7 +87,7 @@ def _generate_context(self) -> Context: if document: return Context(document) - def _sparql(self, query) -> SPARQLResult: + def _sparql(self, query) -> List[Resource]: return self.service.sparql(query) # Templates. diff --git a/tests/specializations/models/test_rdf_model.py b/tests/specializations/models/test_rdf_model.py index 7e8cbced..7207c5a5 100644 --- a/tests/specializations/models/test_rdf_model.py +++ b/tests/specializations/models/test_rdf_model.py @@ -14,6 +14,7 @@ import json import pytest from rdflib import URIRef +from rdflib.plugins.sparql import prepareQuery from kgforge.core import Resource from kgforge.core.commons.exceptions import ValidationError @@ -125,15 +126,8 @@ def test_validate_many(self, rdf_model: RdfModel, valid_activity_resource, invalid_activity_resource._last_action.operation == rdf_model._validate_many.__name__) - def test_query_model(self, rdf_model: RdfModel): - query = """ - PREFIX rdfs: - PREFIX sh: - SELECT ?id ?label WHERE { - ?id a owl:Class ; - rdfs:label ?label - } - """ - res = rdf_model.sparql(query, debug=True) + + q = "SELECT ?id ?label WHERE { ?id a owl:Class ; rdfs:label ?label }" + res = rdf_model.sparql(q, debug=True) # TODO assertion \ No newline at end of file From 1821b90a36ebd3babe795db7c2ca001984914b39 Mon Sep 17 00:00:00 2001 From: mouffok Date: Wed, 25 Oct 2023 11:37:24 +0200 Subject: [PATCH 12/26] merge sparql query builder and rewriter --- kgforge/core/archetypes/model.py | 4 +- kgforge/core/archetypes/store.py | 7 +- kgforge/core/commons/query_builder.py | 8 + kgforge/core/commons/sparql_query_builder.py | 187 ++++++++++++++++- kgforge/core/commons/sparql_query_rewriter.py | 191 ------------------ tests/core/archetypes/test_store.py | 84 +++++--- 6 files changed, 249 insertions(+), 232 deletions(-) delete mode 100644 kgforge/core/commons/sparql_query_rewriter.py diff --git a/kgforge/core/archetypes/model.py b/kgforge/core/archetypes/model.py index 205e151c..5da6ba50 100644 --- a/kgforge/core/archetypes/model.py +++ b/kgforge/core/archetypes/model.py @@ -29,7 +29,7 @@ from kgforge.core.commons.exceptions import ConfigurationError, ValidationError from kgforge.core.commons.execution import not_supported, run from kgforge.core.commons.imports import import_class -from kgforge.core.commons.sparql_query_rewriter import handle_sparql_query +from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder DEFAULT_LIMIT = 100 @@ -130,7 +130,7 @@ def sparql( ) -> List[Resource]: rewrite = params.get("rewrite", True) - qr = handle_sparql_query( + qr = SPARQLQueryBuilder.handle_sparql_query( query=query, model_context=self.context(), metadata_context=None, # TODO something else? diff --git a/kgforge/core/archetypes/store.py b/kgforge/core/archetypes/store.py index c1bf8a26..ee21a997 100644 --- a/kgforge/core/archetypes/store.py +++ b/kgforge/core/archetypes/store.py @@ -23,6 +23,7 @@ from kgforge.core.archetypes import Mapping, Mapper from kgforge.core.commons.attributes import repr_class from kgforge.core.commons.context import Context +from kgforge.core.commons.es_query_builder import ESQueryBuilder from kgforge.core.commons.exceptions import ( DeprecationError, DownloadingError, @@ -34,7 +35,7 @@ QueryingError, ) from kgforge.core.commons.execution import not_supported, run -from kgforge.core.commons.sparql_query_rewriter import handle_sparql_query, _debug_query +from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder from kgforge.core.reshaping import collect_values # NB: Do not 'from kgforge.core.archetypes import Resolver' to avoid cyclic dependency. @@ -387,7 +388,7 @@ def sparql( ) -> List[Resource]: rewrite = params.get("rewrite", True) - qr = handle_sparql_query( + qr = SPARQLQueryBuilder.handle_sparql_query( query=query, model_context=self.model_context, metadata_context=self.service.metadata_context, @@ -416,7 +417,7 @@ def elastic( if offset: query_dict["from"] = offset if debug: - self._debug_query(query_dict) + ESQueryBuilder.debug_query(query_dict) return self._elastic(json.dumps(query_dict)) def _elastic(self, query: str) -> List[Resource]: diff --git a/kgforge/core/commons/query_builder.py b/kgforge/core/commons/query_builder.py index 72c2cc25..e40c9071 100644 --- a/kgforge/core/commons/query_builder.py +++ b/kgforge/core/commons/query_builder.py @@ -40,3 +40,11 @@ def build( @abstractmethod def build_resource_from_response(query: str, response: Dict, context: Context, *args, **params) -> List[Resource]: pass + + @staticmethod + def debug_query(query): + if isinstance(query, Dict): + print("Submitted query:", query) + else: + print(*["Submitted query:", *query.splitlines()], sep="\n ") + diff --git a/kgforge/core/commons/sparql_query_builder.py b/kgforge/core/commons/sparql_query_builder.py index 08119245..7b2cfa62 100644 --- a/kgforge/core/commons/sparql_query_builder.py +++ b/kgforge/core/commons/sparql_query_builder.py @@ -14,14 +14,17 @@ from datetime import datetime from enum import Enum -from typing import Tuple, List, Dict, Optional, Any import json from pyld import jsonld import rdflib +import re +from rdflib import Graph from rdflib.plugins.sparql.parser import Query +from typing import Any, Dict, List, Match, Optional, Tuple, Union, Type, Pattern -from kgforge.core.conversions.rdf import from_jsonld +from kgforge.core.commons.exceptions import QueryingError from kgforge.core.resource import Resource +from kgforge.core.conversions.rdf import from_jsonld from kgforge.core.archetypes.resolver import Resolver from kgforge.core.commons.context import Context from kgforge.core.commons.files import is_valid_url @@ -36,6 +39,54 @@ class CategoryDataType(Enum): BOOLEAN = "boolean" LITERAL = "literal" +# FIXME: need to find a comprehensive way (different than list) to get all SPARQL reserved clauses +SPARQL_CLAUSES = [ + "where", + "filter", + "select", + "union", + "limit", + "construct", + "optional", + "bind", + "values", + "offset", + "order by", + "prefix", + "graph", + "distinct", + "in", + "as", + "base", + "prefix", + "reduced", + "describe", + "ask", + "named", + "asc", + "desc", + "from", + "optional", + "graph", + "regex", + "union", + "str", + "lang", + "langmatches", + "datatype", + "bound", + "sameTerm", + "isIRI", + "isURI", + "isBLANK", + "isLITERAL", + "group", + "by", + "order", + "minus", + "not", + "exists" +] type_map = { datetime: CategoryDataType.DATETIME, @@ -151,7 +202,7 @@ def build_resource_from_response( subject_triples[subject] = f"{s} {p} {o} . " def triples_to_resource(iri, triples): - graph = rdflib.Graph().parse(data=triples, format="nt") + graph = Graph().parse(data=triples, format="nt") data_expanded = json.loads(graph.serialize(format="json-ld")) data_expanded = json.loads(graph.serialize(format="json-ld")) frame = {"@id": iri} @@ -195,5 +246,135 @@ def process_v(v): ] + def rewrite_sparql(query: str, context: Context, metadata_context: Context) -> str: + """Rewrite local property and type names from Model.template() as IRIs. + + Local names are mapped to IRIs by using a JSON-LD context, i.e. { "@context": { ... }} + from a kgforge.core.commons.Context. + In the case of contexts using prefixed names, prefixes are added to the SPARQL query prologue. + In the case of non-available contexts and vocab then the query is returned unchanged. + """ + ctx = {} + + def _context_to_dict(c: Context): + return { + k: v["@id"] if isinstance(v, Dict) and "@id" in v else v + for k, v in c.document["@context"].items() + } + + if metadata_context and metadata_context.document: + ctx.update(_context_to_dict(metadata_context)) + + ctx.update(_context_to_dict(context)) + + prefixes = context.prefixes + has_prefixes = prefixes is not None and len(prefixes.keys()) > 0 + if ctx.get("type") == "@type": + if "rdf" in prefixes: + ctx["type"] = "rdf:type" + else: + ctx["type"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + + def replace(match: Match) -> str: + m4 = match.group(4) + if m4 is None: + return match.group(0) + else: + v = ( + ctx.get(m4, ":" + m4 if context.has_vocab() else None) + if str(m4).lower() not in SPARQL_CLAUSES + and not str(m4).startswith("https") + else m4 + ) + if v is None: + raise QueryingError( + f"Failed to construct a valid SPARQL query: add '{m4}'" + f", define an @vocab in the configured JSON-LD context or " + f"provide a fully correct SPARQL query." + ) + m5 = match.group(5) + if "//" in v: + return f"<{v}>{m5}" + else: + return f"{v}{m5}" + + g4 = r"([a-zA-Z_]+)" + g5 = r"([.;]?)" + g0 = rf"((?<=[\s,[(/|!^])((a|true|false)|{g4}){g5}(?=[\s,\])/|?*+]))" + g6 = r"(('[^']+')|('''[^\n\r]+''')|(\"[^\"]+\")|(\"\"\"[^\n\r]+\"\"\"))" + rx = rf"{g0}|{g6}|(?<=< )(.*)(?= >)" + qr = re.sub(rx, replace, query, flags=re.VERBOSE | re.MULTILINE) + + if not has_prefixes or "prefix" in str(qr).lower(): + return qr + else: + pfx = "\n".join(f"PREFIX {k}: <{v}>" for k, v in prefixes.items()) + if context.has_vocab(): + pfx = "\n".join([pfx, f"PREFIX : <{context.vocab}>"]) + return f"{pfx}\n{qr}" + + @staticmethod + def _replace_in_sparql( + qr: str, + what: str, + value: Optional[int], + default_value: int, + search_regex: Pattern, + replace_if_in_query=True + ) -> str: + + is_what_in_query = bool(re.search(pattern=search_regex, string=qr)) + + replace_value = f" {what} {value}" if value else \ + (f" {what} {default_value}" if default_value else None) + + if is_what_in_query: + if not replace_if_in_query and value: + raise QueryingError( + f"Value for '{what}' is present in the provided query and set as argument: " + f"set 'replace_if_in_query' to True to replace '{what}' when present in the query." + ) + + if replace_if_in_query and replace_value: + qr = re.sub(pattern=search_regex, repl=replace_value, string=qr) + else: + if replace_value: + qr = f"{qr} {replace_value}" # Added to the end of the query (not very general) + + return qr + + @staticmethod + def handle_sparql_query( + query: str, rewrite: bool, + limit: Optional[int], + offset: Optional[int], + default_limit: int, + default_offset: int, + model_context: Context, + metadata_context: Optional[Context], + debug: bool + ): + qr = ( + SPARQLQueryBuilder.rewrite_sparql(query, model_context, metadata_context) + if model_context is not None and rewrite + else query + ) + if limit: + qr = SPARQLQueryBuilder._replace_in_sparql( + qr, "LIMIT", limit, default_limit, + re.compile(r" LIMIT \d+", flags=re.IGNORECASE) + ) + if offset: + qr = SPARQLQueryBuilder._replace_in_sparql( + qr, "OFFSET", offset, default_offset, + re.compile(r" OFFSET \d+", flags=re.IGNORECASE) + ) + + if debug: + SPARQLQueryBuilder.debug_query(qr) + + return qr + + def _box_value_as_full_iri(value): return f"<{value}>" if is_valid_url(value) else value diff --git a/kgforge/core/commons/sparql_query_rewriter.py b/kgforge/core/commons/sparql_query_rewriter.py deleted file mode 100644 index 866cb5a2..00000000 --- a/kgforge/core/commons/sparql_query_rewriter.py +++ /dev/null @@ -1,191 +0,0 @@ -import re -from typing import Any, Dict, List, Match, Optional, Tuple, Union, Type, Pattern - -from kgforge.core.commons.context import Context -from kgforge.core.commons.exceptions import QueryingError - - -# FIXME: need to find a comprehensive way (different than list) to get all SPARQL reserved clauses -SPARQL_CLAUSES = [ - "where", - "filter", - "select", - "union", - "limit", - "construct", - "optional", - "bind", - "values", - "offset", - "order by", - "prefix", - "graph", - "distinct", - "in", - "as", - "base", - "prefix", - "reduced", - "describe", - "ask", - "named", - "asc", - "desc", - "from", - "optional", - "graph", - "regex", - "union", - "str", - "lang", - "langmatches", - "datatype", - "bound", - "sameTerm", - "isIRI", - "isURI", - "isBLANK", - "isLITERAL", - "group", - "by", - "order", - "minus", - "not", - "exists" -] - - -def rewrite_sparql(query: str, context: Context, metadata_context: Context) -> str: - """Rewrite local property and type names from Model.template() as IRIs. - - Local names are mapped to IRIs by using a JSON-LD context, i.e. { "@context": { ... }} - from a kgforge.core.commons.Context. - In the case of contexts using prefixed names, prefixes are added to the SPARQL query prologue. - In the case of non-available contexts and vocab then the query is returned unchanged. - """ - ctx = {} - - def _context_to_dict(c: Context): - return { - k: v["@id"] if isinstance(v, Dict) and "@id" in v else v - for k, v in c.document["@context"].items() - } - if metadata_context and metadata_context.document: - ctx.update(_context_to_dict(metadata_context)) - - ctx.update(_context_to_dict(context)) - - prefixes = context.prefixes - has_prefixes = prefixes is not None and len(prefixes.keys()) > 0 - if ctx.get("type") == "@type": - if "rdf" in prefixes: - ctx["type"] = "rdf:type" - else: - ctx["type"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" - - def replace(match: Match) -> str: - m4 = match.group(4) - if m4 is None: - return match.group(0) - else: - v = ( - ctx.get(m4, ":" + m4 if context.has_vocab() else None) - if str(m4).lower() not in SPARQL_CLAUSES - and not str(m4).startswith("https") - else m4 - ) - if v is None: - raise QueryingError( - f"Failed to construct a valid SPARQL query: add '{m4}'" - f", define an @vocab in the configured JSON-LD context or " - f"provide a fully correct SPARQL query." - ) - m5 = match.group(5) - if "//" in v: - return f"<{v}>{m5}" - else: - return f"{v}{m5}" - - g4 = r"([a-zA-Z_]+)" - g5 = r"([.;]?)" - g0 = rf"((?<=[\s,[(/|!^])((a|true|false)|{g4}){g5}(?=[\s,\])/|?*+]))" - g6 = r"(('[^']+')|('''[^\n\r]+''')|(\"[^\"]+\")|(\"\"\"[^\n\r]+\"\"\"))" - rx = rf"{g0}|{g6}|(?<=< )(.*)(?= >)" - qr = re.sub(rx, replace, query, flags=re.VERBOSE | re.MULTILINE) - - if not has_prefixes or "prefix" in str(qr).lower(): - return qr - else: - pfx = "\n".join(f"PREFIX {k}: <{v}>" for k, v in prefixes.items()) - if context.has_vocab(): - pfx = "\n".join([pfx, f"PREFIX : <{context.vocab}>"]) - return f"{pfx}\n{qr}" - - -def _replace_in_sparql( - qr: str, - what: str, - value: Optional[int], - default_value: int, - search_regex: Pattern, - replace_if_in_query=True -) -> str: - - is_what_in_query = bool(re.search(pattern=search_regex, string=qr)) - - replace_value = f" {what} {value}" if value else \ - (f" {what} {default_value}" if default_value else None) - - if is_what_in_query: - if not replace_if_in_query and value: - raise QueryingError( - f"Value for '{what}' is present in the provided query and set as argument: " - f"set 'replace_if_in_query' to True to replace '{what}' when present in the query." - ) - - if replace_if_in_query and replace_value: - qr = re.sub(pattern=search_regex, repl=replace_value, string=qr) - else: - if replace_value: - qr = f"{qr} {replace_value}" # Added to the end of the query (not very general) - - return qr - - -def handle_sparql_query( - query: str, rewrite: bool, - limit: Optional[int], - offset: Optional[int], - default_limit: int, - default_offset: int, - model_context: Context, - metadata_context: Optional[Context], - debug: bool -): - qr = ( - rewrite_sparql(query, model_context, metadata_context) - if model_context is not None and rewrite - else query - ) - if limit: - qr = _replace_in_sparql( - qr, "LIMIT", limit, default_limit, - re.compile(r" LIMIT \d+", flags=re.IGNORECASE) - ) - if offset: - qr = _replace_in_sparql( - qr, "OFFSET", offset, default_offset, - re.compile(r" OFFSET \d+", flags=re.IGNORECASE) - ) - - if debug: - _debug_query(qr) - - return qr - - -def _debug_query(query): - if isinstance(query, Dict): - print("Submitted query:", query) - else: - print(*["Submitted query:", *query.splitlines()], sep="\n ") diff --git a/tests/core/archetypes/test_store.py b/tests/core/archetypes/test_store.py index 02edbc8c..b3397e8d 100644 --- a/tests/core/archetypes/test_store.py +++ b/tests/core/archetypes/test_store.py @@ -11,12 +11,13 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Blue Brain Nexus Forge. If not, see . +import re # Placeholder for the test suite for actions. import pytest from kgforge.core import Resource, KnowledgeGraphForge -from kgforge.core.commons.sparql_query_rewriter import rewrite_sparql, _replace_in_sparql +from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder from kgforge.core.commons.context import Context from kgforge.core.commons.exceptions import DownloadingError, FreezingError, QueryingError from kgforge.specializations.resources import Dataset @@ -25,7 +26,7 @@ context = { "@context": { - "@vocab":"http://example.org/vocab/", + "@vocab": "http://example.org/vocab/", "type": { "@id": "rdf:type", "@type": "@id" @@ -44,14 +45,12 @@ } } - prefixes = { "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "prov": "http://www.w3.org/ns/prov#", "schema": "http://schema.org/", } - prefixes_string = "\n".join([f"PREFIX {k}: <{v}>" for k, v in prefixes.items()]) form_store_metadata_combinations = [ @@ -69,9 +68,10 @@ "\nSELECT ?x WHERE { ?x }"), ("SELECT ?x WHERE { a TypeNotInContext }", "\nSELECT ?x WHERE { a :TypeNotInContext }"), - ("SELECT ?x WHERE { a TypeNotInContext, AnotherNotTypeInContext, Person }", - "\nSELECT ?x WHERE { a :TypeNotInContext, :AnotherNotTypeInContext," - " schema:Person }"), + ( + "SELECT ?x WHERE { a TypeNotInContext, AnotherNotTypeInContext, Person }", + "\nSELECT ?x WHERE { a :TypeNotInContext, :AnotherNotTypeInContext," + " schema:Person }"), ("SELECT ?x WHERE { ?id propertyNotInContext ?x }", "\nSELECT ?x WHERE { ?id :propertyNotInContext ?x }"), ("SELECT ?x WHERE { ?id propertyNotInContext/name/anotherPropertyNotInContext ?x }", @@ -80,8 +80,9 @@ "\nSELECT DISTINCT ?x WHERE { ?id :propertyNotInContext/schema:name/:anotherPropertyNotInContext ?x }"), ("SELECT ?x WHERE { Graph ?g { ?id propertyNotInContext/name/anotherPropertyNotInContext ?x }}", "\nSELECT ?x WHERE { Graph ?g { ?id :propertyNotInContext/schema:name/:anotherPropertyNotInContext ?x }}"), - ("SELECT * WHERE { a TypeNotInContext, AnotherNotTypeInContext, Person; deprecated false.}", - "\nSELECT * WHERE { a :TypeNotInContext, :AnotherNotTypeInContext, schema:Person; false.}") + ( + "SELECT * WHERE { a TypeNotInContext, AnotherNotTypeInContext, Person; deprecated false.}", + "\nSELECT * WHERE { a :TypeNotInContext, :AnotherNotTypeInContext, schema:Person; false.}") ] @@ -89,7 +90,9 @@ def test_rewrite_sparql(query, expected, metadata_context): prefixes_string_vocab = "\n".join([prefixes_string, f"PREFIX : "]) context_object = Context(document=context) - result = rewrite_sparql(query, context_object, metadata_context=metadata_context) + result = SPARQLQueryBuilder.rewrite_sparql( + query, context_object, metadata_context=metadata_context + ) assert result == prefixes_string_vocab + expected @@ -98,17 +101,18 @@ def test_rewrite_sparql_unknownterm_missing_vocab(custom_context, metadata_conte assert not context_object.has_vocab() with pytest.raises(QueryingError): query = "SELECT ?x WHERE { Graph ?g { ?id propertyNotInContext/name/anotherPropertyNotInContext ?x }}" - rewrite_sparql(query, context_object, metadata_context) + SPARQLQueryBuilder.rewrite_sparql(query, context_object, metadata_context) def test_rewrite_sparql_missingvocab(custom_context, metadata_context): query = "SELECT ?name WHERE { name ?name }" - expected = "PREFIX foaf: \nPREFIX skos: \nPREFIX schema: \n"\ + expected = "PREFIX foaf: \nPREFIX skos: \nPREFIX schema: \n" \ "PREFIX owl: \nPREFIX rdfs: \nPREFIX mba: \nPREFIX nsg: \nPREFIX obo: \nSELECT ?name WHERE { foaf:name ?name }" context_object = Context(document=custom_context) - result = rewrite_sparql(query, context_object, metadata_context) + result = SPARQLQueryBuilder.rewrite_sparql(query, context_object, metadata_context) assert result == expected + replace_in_sparql_combinations = [ ("SELECT ?agent WHERE { prov:agent ?agent }", "LIMIT", 3, 100, r" LIMIT \d+", True, @@ -129,31 +133,40 @@ def test_rewrite_sparql_missingvocab(custom_context, metadata_context): "LIMIT", None, 100, r" LIMIT \d+", False, "SELECT ?agent WHERE { prov:agent ?agent } LIMIT 10"), ("SELECT ?agent WHERE { prov:agent ?agent }", - "OFFSET", 1, 0, r" OFFSET \d+", True, - "SELECT ?agent WHERE { prov:agent ?agent } OFFSET 1"), + "OFFSET", 1, 0, r" OFFSET \d+", True, + "SELECT ?agent WHERE { prov:agent ?agent } OFFSET 1"), ("SELECT ?agent WHERE { prov:agent ?agent }", - "OFFSET", None, 0, r" OFFSET \d+", True, - "SELECT ?agent WHERE { prov:agent ?agent }"), + "OFFSET", None, 0, r" OFFSET \d+", True, + "SELECT ?agent WHERE { prov:agent ?agent }"), ("SELECT ?agent WHERE { prov:agent ?agent } OFFSET 3", - "OFFSET", None, 20, r" OFFSET \d+", True, - "SELECT ?agent WHERE { prov:agent ?agent } OFFSET 20"), + "OFFSET", None, 20, r" OFFSET \d+", True, + "SELECT ?agent WHERE { prov:agent ?agent } OFFSET 20"), ("SELECT ?agent WHERE { prov:agent ?agent } LIMIT 10 OFFSET 3", - "OFFSET", 5, None, r" OFFSET \d+", True, - "SELECT ?agent WHERE { prov:agent ?agent } LIMIT 10 OFFSET 5") + "OFFSET", 5, None, r" OFFSET \d+", True, + "SELECT ?agent WHERE { prov:agent ?agent } LIMIT 10 OFFSET 5") ] -@pytest.mark.parametrize("query, what, value, default_value, search_regex, replace_if_in_query, expected", - replace_in_sparql_combinations) -def test__replace_in_sparql(query, what, value, default_value, search_regex, replace_if_in_query, expected): - result = _replace_in_sparql(query, what, value, default_value, search_regex, replace_if_in_query) + +@pytest.mark.parametrize( + "query, what, value, default_value, search_regex, replace_if_in_query, expected", + replace_in_sparql_combinations) +def test__replace_in_sparql(query, what, value, default_value, search_regex, replace_if_in_query, + expected): + result = SPARQLQueryBuilder._replace_in_sparql( + query, what, value, default_value, re.compile(search_regex, flags=re.IGNORECASE), + replace_if_in_query + ) assert result == expected def test__replace_in_sparql_exception(): with pytest.raises(QueryingError): query = "SELECT ?agent WHERE { prov:agent ?agent } LIMIT 10" - _replace_in_sparql(query, what="LIMIT", value=10, default_value=None, search_regex=r" LIMIT \d+", - replace_if_in_query=False) + SPARQLQueryBuilder._replace_in_sparql( + query, what="LIMIT", value=10, default_value=None, + search_regex=re.compile(r"LIMIT \d+", flags=re.IGNORECASE), + replace_if_in_query=False + ) def test_download(config): @@ -162,8 +175,8 @@ def test_download(config): forge = KnowledgeGraphForge(config) forge._store.download(simple, "fake.path", "./", overwrite=False, cross_bucket=False) -def test_freeze(config, store_metadata_value): +def test_freeze(config, store_metadata_value): forge = KnowledgeGraphForge(config, debug=True) derivation1 = Dataset(forge, type="Dataset", name="A derivation dataset") derivation1.id = "http://derivation1" @@ -188,20 +201,25 @@ def test_freeze(config, store_metadata_value): dataset.add_invalidation(invalidation1, versioned=False) dataset.add_contribution(contribution1, versioned=False) - expected_derivation = json.loads(json.dumps({"type":"Derivation", "entity":{"id": "http://derivation1", - "type":"Dataset", "name":"A derivation dataset"}})) + expected_derivation = json.loads( + json.dumps({"type": "Derivation", "entity": {"id": "http://derivation1", + "type": "Dataset", + "name": "A derivation dataset"}})) assert forge.as_json(dataset.derivation) == expected_derivation expected_generation = json.loads(json.dumps({"type": "Generation", - "activity": {"id": "http://generation1", "type": "Dataset"}})) + "activity": {"id": "http://generation1", + "type": "Dataset"}})) assert forge.as_json(dataset.generation) == expected_generation expected_contribution = json.loads(json.dumps({"type": "Contribution", - "agent": {"id": "http://contribution1", "type": "Person"}})) + "agent": {"id": "http://contribution1", + "type": "Person"}})) assert forge.as_json(dataset.contribution) == expected_contribution expected_invalidation = json.loads(json.dumps({"type": "Invalidation", - "activity": {"id": "http://invalidation1", "type": "Activity"}})) + "activity": {"id": "http://invalidation1", + "type": "Activity"}})) assert forge.as_json(dataset.invalidation) == expected_invalidation dataset.id = "http://dataset" From 394382bbecfa3d5c502013300aa34451c8a860f4 Mon Sep 17 00:00:00 2001 From: mouffok Date: Wed, 25 Oct 2023 11:52:35 +0200 Subject: [PATCH 13/26] apply limit and offset in query builder --- kgforge/core/archetypes/model.py | 23 +++++++++---- kgforge/core/archetypes/store.py | 35 ++++++++++++++------ kgforge/core/commons/es_query_builder.py | 10 ++++++ kgforge/core/commons/query_builder.py | 4 +++ kgforge/core/commons/sparql_query_builder.py | 32 +++++------------- 5 files changed, 62 insertions(+), 42 deletions(-) diff --git a/kgforge/core/archetypes/model.py b/kgforge/core/archetypes/model.py index 5da6ba50..2972b4f3 100644 --- a/kgforge/core/archetypes/model.py +++ b/kgforge/core/archetypes/model.py @@ -130,18 +130,27 @@ def sparql( ) -> List[Resource]: rewrite = params.get("rewrite", True) - qr = SPARQLQueryBuilder.handle_sparql_query( - query=query, - model_context=self.context(), - metadata_context=None, # TODO something else? - rewrite=rewrite, + qr = ( + SPARQLQueryBuilder.rewrite_sparql( + query, + self.context(), + metadata_context=None # TODO smth else? + ) + if self.context() is not None and rewrite + else query + ) + + qr = SPARQLQueryBuilder.apply_limit_and_offset_to_query( + query=qr, limit=limit, offset=offset, default_limit=DEFAULT_LIMIT, - default_offset=DEFAULT_OFFSET, - debug=debug + default_offset=DEFAULT_OFFSET ) + if debug: + SPARQLQueryBuilder.debug_query(qr) + return self._sparql(qr) def _sparql(self, query: str) -> List[Resource]: diff --git a/kgforge/core/archetypes/store.py b/kgforge/core/archetypes/store.py index ee21a997..cb6677f6 100644 --- a/kgforge/core/archetypes/store.py +++ b/kgforge/core/archetypes/store.py @@ -388,18 +388,27 @@ def sparql( ) -> List[Resource]: rewrite = params.get("rewrite", True) - qr = SPARQLQueryBuilder.handle_sparql_query( - query=query, - model_context=self.model_context, - metadata_context=self.service.metadata_context, - rewrite=rewrite, + qr = ( + SPARQLQueryBuilder.rewrite_sparql( + query, + context=self.model_context, + metadata_context=self.service.metadata_context, + ) + if self.model_context is not None and rewrite + else query + ) + + qr = SPARQLQueryBuilder.apply_limit_and_offset_to_query( + qr, limit=limit, offset=offset, default_limit=DEFAULT_LIMIT, - default_offset=DEFAULT_OFFSET, - debug=debug + default_offset=DEFAULT_OFFSET ) + if debug: + SPARQLQueryBuilder.debug_query(qr) + return self._sparql(qr) def _sparql(self, query: str) -> List[Resource]: @@ -412,12 +421,16 @@ def elastic( self, query: str, debug: bool, limit: int = DEFAULT_LIMIT, offset: int = DEFAULT_OFFSET ) -> List[Resource]: query_dict = json.loads(query) - if limit: - query_dict["size"] = limit - if offset: - query_dict["from"] = offset + + query_dict = ESQueryBuilder.apply_limit_and_offset_to_query( + query_dict, + limit=limit, default_limit=None, + offset=offset, default_offset=None + ) + if debug: ESQueryBuilder.debug_query(query_dict) + return self._elastic(json.dumps(query_dict)) def _elastic(self, query: str) -> List[Resource]: diff --git a/kgforge/core/commons/es_query_builder.py b/kgforge/core/commons/es_query_builder.py index c2402bf4..4bdb2cba 100644 --- a/kgforge/core/commons/es_query_builder.py +++ b/kgforge/core/commons/es_query_builder.py @@ -184,6 +184,16 @@ def build( def build_resource_from_response(query: str, response: Dict, context: Context, *args, **params) -> List[Resource]: not_supported() + @staticmethod + def apply_limit_and_offset_to_query(query, limit, default_limit, offset, default_offset): + # TODO should there be an elastic search default limit? + if limit: + query["size"] = limit + if offset: + query["from"] = offset + + return query + def _look_up_known_parent_paths(f, last_path, property_path, m): if ( diff --git a/kgforge/core/commons/query_builder.py b/kgforge/core/commons/query_builder.py index e40c9071..ae419853 100644 --- a/kgforge/core/commons/query_builder.py +++ b/kgforge/core/commons/query_builder.py @@ -48,3 +48,7 @@ def debug_query(query): else: print(*["Submitted query:", *query.splitlines()], sep="\n ") + @staticmethod + @abstractmethod + def apply_limit_and_offset_to_query(query, limit, default_limit, offset, default_offset): + pass diff --git a/kgforge/core/commons/sparql_query_builder.py b/kgforge/core/commons/sparql_query_builder.py index 7b2cfa62..bbd30ade 100644 --- a/kgforge/core/commons/sparql_query_builder.py +++ b/kgforge/core/commons/sparql_query_builder.py @@ -116,6 +116,7 @@ class CategoryDataType(Enum): class SPARQLQueryBuilder(QueryBuilder): + @staticmethod def build( schema: Dict, @@ -343,38 +344,21 @@ def _replace_in_sparql( return qr + @staticmethod - def handle_sparql_query( - query: str, rewrite: bool, - limit: Optional[int], - offset: Optional[int], - default_limit: int, - default_offset: int, - model_context: Context, - metadata_context: Optional[Context], - debug: bool - ): - qr = ( - SPARQLQueryBuilder.rewrite_sparql(query, model_context, metadata_context) - if model_context is not None and rewrite - else query - ) + def apply_limit_and_offset_to_query(query, limit, default_limit, offset, default_offset): if limit: - qr = SPARQLQueryBuilder._replace_in_sparql( - qr, "LIMIT", limit, default_limit, + query = SPARQLQueryBuilder._replace_in_sparql( + query, "LIMIT", limit, default_limit, re.compile(r" LIMIT \d+", flags=re.IGNORECASE) ) if offset: - qr = SPARQLQueryBuilder._replace_in_sparql( - qr, "OFFSET", offset, default_offset, + query = SPARQLQueryBuilder._replace_in_sparql( + query, "OFFSET", offset, default_offset, re.compile(r" OFFSET \d+", flags=re.IGNORECASE) ) - if debug: - SPARQLQueryBuilder.debug_query(qr) - - return qr - + return query def _box_value_as_full_iri(value): return f"<{value}>" if is_valid_url(value) else value From 495ed22e994d3286a2114c952c22dc1d373d9aa4 Mon Sep 17 00:00:00 2001 From: mouffok Date: Wed, 25 Oct 2023 12:16:05 +0200 Subject: [PATCH 14/26] renaming --- .../models/rdf/rdf_model_service.py | 18 +++++++++--------- .../rdf/rdf_model_service_from_directory.py | 10 ++++------ .../models/rdf/rdf_model_service_from_store.py | 14 +++++++------- kgforge/specializations/models/rdf_model.py | 2 +- 4 files changed, 21 insertions(+), 23 deletions(-) diff --git a/kgforge/specializations/models/rdf/rdf_model_service.py b/kgforge/specializations/models/rdf/rdf_model_service.py index 4f0f8072..143ee27e 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service.py @@ -33,21 +33,21 @@ class RdfModelService: - schema_to_source: Dict[URIRef, str] - classes_to_shapes: Dict[str, URIRef] + shape_to_source: Dict[URIRef, str] + class_to_shape: Dict[str, URIRef] def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: if context_iri is None: raise ConfigurationError("RdfModel requires a context") self._graph = graph - self._context_cache = {} - self.schema_to_source, self.classes_to_shapes = self._build_shapes_map() + self._context_cache = dict() + self.shape_to_source, self.class_to_shape = self._build_shapes_map() self.context = Context(self.resolve_context(context_iri), context_iri) self.types_to_shapes: Dict[str, URIRef] = self._build_types_to_shapes() - def schema_source(self, schema_iri: URIRef) -> str: - return self.schema_to_source[schema_iri] + def shape_source(self, schema_iri: URIRef) -> str: + return self.shape_to_source[schema_iri] def sparql(self, query: str) -> List[Resource]: e = self._graph.query(query) @@ -109,7 +109,7 @@ def _build_types_to_shapes(self) -> Dict[str, URIRef]: """Iterates the classes_to_shapes dictionary to create a term to shape dictionary filtering the terms available in the context """ types_to_shapes: Dict = {} - for k, v in self.classes_to_shapes.items(): + for k, v in self.class_to_shape.items(): term = self.context.find_term(str(k)) if term: if term.name not in types_to_shapes: @@ -165,7 +165,7 @@ def traverse_properties(properties) -> Tuple[Dict, Dict]: return l_prefixes, l_terms target_classes = [] - for k in self.classes_to_shapes.keys(): + for k in self.class_to_shape.keys(): key = as_term(k) if key not in target_classes: target_classes.append(key) @@ -173,7 +173,7 @@ def traverse_properties(properties) -> Tuple[Dict, Dict]: # TODO: should this raise an error? print("duplicated term", key, k) - for type_, shape in self.classes_to_shapes.items(): + for type_, shape in self.class_to_shape.items(): t_prefix, t_namespace, t_name = self._graph.compute_qname(type_) prefixes.update({t_prefix: str(t_namespace)}) types_.update({t_name: {"@id": ":".join((t_prefix, t_name))}}) diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py index 8248fb19..5b377dd9 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py @@ -58,8 +58,6 @@ def resolve_context(self, iri: str) -> Dict: def generate_context(self) -> Dict: return self._generate_context() - - def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: query = """ PREFIX rdfs: @@ -76,18 +74,18 @@ def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: """ res = self._graph.query(query) - class_being_shaped_id_to_shape_uri: Dict[str, URIRef] = { + class_to_shape: Dict[str, URIRef] = { row["type"]: URIRef(row["shape"]) for row in res } # FIXME should return the file path where the schema is in - schema_to_file = dict( + shape_to_file = dict( (e, "") # TODO file source - for e in class_being_shaped_id_to_shape_uri.values() + for e in class_to_shape.values() ) - return schema_to_file, class_being_shaped_id_to_shape_uri + return shape_to_file, class_to_shape def load_rdf_files_into_graph(path: Path, memory_graph: Graph) -> Graph: diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py index ce535f01..db155241 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py @@ -70,7 +70,7 @@ def resolve_context(self, iri: str) -> Dict: return self._context_cache[iri] def generate_context(self) -> Dict: - for v in self.schema_to_source.values(): + for v in self.shape_to_source.values(): self._load_shape_and_reload_shapes_graph(v) return self._generate_context() @@ -95,19 +95,19 @@ def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: limit = 100 offset = 0 count = limit - class_being_shaped_id_to_shape_uri = {} - schema_to_resource: Dict[URIRef, URIRef] = {} + class_to_shape = dict() + shape_to_resource: Dict[URIRef, URIRef] = dict() while count == limit: resources = self.context_store.sparql(query, debug=False, limit=limit, offset=offset) for r in resources: shape_uri = URIRef(r.shape) - class_being_shaped_id_to_shape_uri[r.type] = shape_uri - schema_to_resource[shape_uri] = URIRef(r.resource_id) + class_to_shape[r.type] = shape_uri + shape_to_resource[shape_uri] = URIRef(r.resource_id) count = len(resources) offset += count - return schema_to_resource, class_being_shaped_id_to_shape_uri + return shape_to_resource, class_to_shape def recursive_resolve(self, context: Union[Dict, List, str]) -> Dict: document = {} @@ -162,7 +162,7 @@ def _load_and_get_type_shape(self, iri: URIRef) -> ShapeWrapper: try: return self._shapes_graph.lookup_shape_from_node(iri) except KeyError: - shape_resource_id = self.schema_to_source[iri] + shape_resource_id = self.shape_to_source[iri] self._load_shape_and_reload_shapes_graph(shape_resource_id) return self._shapes_graph.lookup_shape_from_node(iri) diff --git a/kgforge/specializations/models/rdf_model.py b/kgforge/specializations/models/rdf_model.py index 4ab06c69..418e8521 100644 --- a/kgforge/specializations/models/rdf_model.py +++ b/kgforge/specializations/models/rdf_model.py @@ -105,7 +105,7 @@ def get_shape_from_type(self, type: str) -> URIRef: def schema_id(self, type: str) -> URIRef: shape_iri: URIRef = self.get_shape_from_type(type) - return self.service.schema_source(shape_iri) + return self.service.shape_source(shape_iri) # Validation. From ff9e624b55d91415ad3f7778cab3eb2c5d9ffeba Mon Sep 17 00:00:00 2001 From: mouffok Date: Wed, 25 Oct 2023 14:38:42 +0200 Subject: [PATCH 15/26] resource id as str instead of str --- .../models/rdf/rdf_model_service_from_store.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py index db155241..f99464c2 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py @@ -136,7 +136,7 @@ def recursive_resolve(self, context: Union[Dict, List, str]) -> Dict: document.update(context) return document - def _load_shape(self, resource_id: URIRef): + def _load_shape(self, resource_id: str): if resource_id not in self._imported: try: shape = self.context_store.retrieve(resource_id, version=None, cross_bucket=False) @@ -162,12 +162,12 @@ def _load_and_get_type_shape(self, iri: URIRef) -> ShapeWrapper: try: return self._shapes_graph.lookup_shape_from_node(iri) except KeyError: - shape_resource_id = self.shape_to_source[iri] + shape_resource_id: str = self.shape_to_source[iri] self._load_shape_and_reload_shapes_graph(shape_resource_id) return self._shapes_graph.lookup_shape_from_node(iri) - def _load_shape_and_reload_shapes_graph(self, iri: URIRef): - self._load_shape(iri) + def _load_shape_and_reload_shapes_graph(self, resource_id: str): + self._load_shape(resource_id) # reloads the shapes graph self._shapes_graph = ShapesGraphWrapper(self._graph) From 91c93ab21bd5cc669ba9dc91b2143942cb66069c Mon Sep 17 00:00:00 2001 From: mouffok Date: Wed, 25 Oct 2023 14:53:42 +0200 Subject: [PATCH 16/26] sparql query builder separate method for construct queries --- kgforge/core/commons/sparql_query_builder.py | 80 ++++++++++---------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/kgforge/core/commons/sparql_query_builder.py b/kgforge/core/commons/sparql_query_builder.py index bbd30ade..c41a4ac0 100644 --- a/kgforge/core/commons/sparql_query_builder.py +++ b/kgforge/core/commons/sparql_query_builder.py @@ -184,48 +184,52 @@ def build_resource_from_response( query: str, response: Dict, context: Context, *args, **params ) -> List[Resource]: _, q_comp = Query.parseString(query) - if q_comp.name == "ConstructQuery": - subject_triples = {} - for r in response["results"]["bindings"]: - subject = r["subject"]["value"] - s = f"<{r['subject']['value']}>" - p = f"<{r['predicate']['value']}>" - if r["object"]["type"] == "uri": - o = f"<{r['object']['value']}>" - else: - if "datatype" in r["object"]: - o = f"\"{r['object']['value']}\"^^<{r['object']['datatype']}>" - else: - o = f"\"{r['object']['value']}\"" - if subject in subject_triples: - subject_triples[subject] += f"\n{s} {p} {o} . " - else: - subject_triples[subject] = f"{s} {p} {o} . " - - def triples_to_resource(iri, triples): - graph = Graph().parse(data=triples, format="nt") - data_expanded = json.loads(graph.serialize(format="json-ld")) - data_expanded = json.loads(graph.serialize(format="json-ld")) - frame = {"@id": iri} - data_framed = jsonld.frame(data_expanded, frame) - compacted = jsonld.compact(data_framed, context.document) - resource = from_jsonld(compacted) - resource.context = ( - context.iri - if context.is_http_iri() - else context.document["@context"] - ) - return resource + bindings = response["results"]["bindings"] - return [triples_to_resource(s, t) for s, t in subject_triples.items()] + if q_comp.name == "ConstructQuery": + return SPARQLQueryBuilder.build_resource_from_construct_query(bindings, context) else: # SELECT QUERY - return SPARQLQueryBuilder.build_resource_from_select_query( - response["results"]["bindings"] + return SPARQLQueryBuilder.build_resource_from_select_query(bindings) + + @staticmethod + def build_resource_from_construct_query(results: List, context: Context) -> List[Resource]: + + subject_triples = {} + + for r in results: + subject = r["subject"]["value"] + s = f"<{r['subject']['value']}>" + p = f"<{r['predicate']['value']}>" + if r["object"]["type"] == "uri": + o = f"<{r['object']['value']}>" + else: + if "datatype" in r["object"]: + o = f"\"{r['object']['value']}\"^^<{r['object']['datatype']}>" + else: + o = f"\"{r['object']['value']}\"" + if subject in subject_triples: + subject_triples[subject] += f"\n{s} {p} {o} . " + else: + subject_triples[subject] = f"{s} {p} {o} . " + + def triples_to_resource(iri, triples): + graph = Graph().parse(data=triples, format="nt") + data_expanded = json.loads(graph.serialize(format="json-ld")) + data_framed = jsonld.frame(data_expanded, {"@id": iri}) + compacted = jsonld.compact(data_framed, context.document) + resource = from_jsonld(compacted) + resource.context = ( + context.iri + if context.is_http_iri() + else context.document["@context"] ) + return resource + + return [triples_to_resource(s, t) for s, t in subject_triples.items()] @staticmethod - def build_resource_from_select_query(results: List): + def build_resource_from_select_query(results: List) -> List[Resource]: def process_v(v): if v['type'] == 'literal' and 'datatype' in v and v['datatype'] == \ @@ -246,7 +250,7 @@ def process_v(v): for x in results ] - + @staticmethod def rewrite_sparql(query: str, context: Context, metadata_context: Context) -> str: """Rewrite local property and type names from Model.template() as IRIs. @@ -344,7 +348,6 @@ def _replace_in_sparql( return qr - @staticmethod def apply_limit_and_offset_to_query(query, limit, default_limit, offset, default_offset): if limit: @@ -360,5 +363,6 @@ def apply_limit_and_offset_to_query(query, limit, default_limit, offset, default return query + def _box_value_as_full_iri(value): return f"<{value}>" if is_valid_url(value) else value From 0986e3dc4422c7dbf2ba707b2fd96dd14744c62c Mon Sep 17 00:00:00 2001 From: mouffok Date: Wed, 25 Oct 2023 20:53:50 +0200 Subject: [PATCH 17/26] get file from which a shape originates from in rdf model service from directory' --- kgforge/core/archetypes/model.py | 4 +- .../models/rdf/rdf_model_service.py | 19 ++--- .../rdf/rdf_model_service_from_directory.py | 84 ++++++++++++------- .../rdf/rdf_model_service_from_store.py | 18 ++-- kgforge/specializations/models/rdf_model.py | 4 +- tests/specializations/models/data.py | 13 ++- .../specializations/models/test_rdf_model.py | 32 ++++++- 7 files changed, 118 insertions(+), 56 deletions(-) diff --git a/kgforge/core/archetypes/model.py b/kgforge/core/archetypes/model.py index 2972b4f3..950252cc 100644 --- a/kgforge/core/archetypes/model.py +++ b/kgforge/core/archetypes/model.py @@ -196,8 +196,8 @@ def mapping(self, entity: str, source: str, type: Callable) -> Mapping: # Validation. - def schema_id(self, type: str) -> URIRef: - # POLICY Should retrieve the schema id of the given type. + def schema_source(self, type: str) -> str: + # POLICY Should retrieve the schema source of the given type. not_supported() def validate(self, data: Union[Resource, List[Resource]], diff --git a/kgforge/specializations/models/rdf/rdf_model_service.py b/kgforge/specializations/models/rdf/rdf_model_service.py index 143ee27e..ba4849f5 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service.py @@ -33,20 +33,24 @@ class RdfModelService: - shape_to_source: Dict[URIRef, str] - class_to_shape: Dict[str, URIRef] - def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: + def __init__( + self, graph: Graph, + shape_to_source: Dict[URIRef, str], + class_to_shape: Dict[str, URIRef], + context_iri: Optional[str] = None, + ) -> None: if context_iri is None: raise ConfigurationError("RdfModel requires a context") self._graph = graph self._context_cache = dict() - self.shape_to_source, self.class_to_shape = self._build_shapes_map() + self.shape_to_source = shape_to_source + self.class_to_shape = class_to_shape self.context = Context(self.resolve_context(context_iri), context_iri) self.types_to_shapes: Dict[str, URIRef] = self._build_types_to_shapes() - def shape_source(self, schema_iri: URIRef) -> str: + def get_shape_source(self, schema_iri: URIRef) -> str: return self.shape_to_source[schema_iri] def sparql(self, query: str) -> List[Resource]: @@ -100,11 +104,6 @@ def generate_context(self) -> Dict: """Generates a JSON-LD context with the classes and terms present in the SHACL graph.""" raise NotImplementedError() - @abstractmethod - def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: - """Queries the source and returns a map of owl:Class to sh:NodeShape""" - raise NotImplementedError() - def _build_types_to_shapes(self) -> Dict[str, URIRef]: """Iterates the classes_to_shapes dictionary to create a term to shape dictionary filtering the terms available in the context """ diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py index 5b377dd9..a8452885 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py @@ -28,9 +28,14 @@ class RdfModelServiceFromDirectory(RdfModelService): def __init__(self, dir_path: Path, context_iri: str) -> None: - self._graph = load_rdf_files_into_graph(dir_path, Graph()) - self._shapes_graph = ShapesGraphWrapper(self._graph) - super().__init__(self._graph, context_iri) + + graph, shape_to_source, class_to_shape = self._build_shapes_map(dir_path=dir_path) + self._shapes_graph = ShapesGraphWrapper(graph) + + super().__init__( + graph=graph, context_iri=context_iri, shape_to_source=shape_to_source, + class_to_shape=class_to_shape + ) def materialize(self, iri: URIRef) -> NodeProperties: sh = self._shapes_graph.lookup_shape_from_node(iri) @@ -58,34 +63,53 @@ def resolve_context(self, iri: str) -> Dict: def generate_context(self) -> Dict: return self._generate_context() - def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: - query = """ - PREFIX rdfs: - PREFIX sh: - SELECT ?type ?shape WHERE { - { ?shape sh:targetClass ?type .} - UNION { - SELECT (?shape as ?type) ?shape WHERE { - ?shape a sh:NodeShape . - ?shape a rdfs:Class - } - } - } ORDER BY ?type - """ - res = self._graph.query(query) - - class_to_shape: Dict[str, URIRef] = { - row["type"]: URIRef(row["shape"]) - for row in res - } - - # FIXME should return the file path where the schema is in - shape_to_file = dict( - (e, "") # TODO file source - for e in class_to_shape.values() - ) + def _build_shapes_map( + self, dir_path: Path + ) -> Tuple[Graph, Dict[URIRef, str], Dict[str, URIRef]]: - return shape_to_file, class_to_shape + query = """ + PREFIX rdfs: + PREFIX sh: + SELECT ?type ?shape WHERE { + { ?shape sh:targetClass ?type .} + UNION { + SELECT (?shape as ?type) ?shape WHERE { + ?shape a sh:NodeShape . + ?shape a rdfs:Class + } + } + } ORDER BY ?type + """ + + class_to_shape: Dict[str, URIRef] = dict() + shape_to_file: Dict[URIRef, str] = dict() + graph = Graph() + + extensions = [".ttl", ".n3", ".json", ".rdf"] + for f in dir_path.rglob(os.path.join("*.*")): + graph_i = Graph() + if f.suffix in extensions: + file_format = guess_format(f.name) + if file_format is None: + file_format = "json-ld" + graph_i.parse(f.as_posix(), format=file_format) + + res = graph_i.query(query) + + class_to_shape_i = dict( + (row["type"], URIRef(row["shape"])) + for row in res + ) + class_to_shape.update(class_to_shape_i) + + shape_to_file.update(dict( + (e, f.as_posix()) + for e in class_to_shape_i.values() + )) + + graph += graph_i + + return graph, shape_to_file, class_to_shape def load_rdf_files_into_graph(path: Path, memory_graph: Graph) -> Graph: diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py index f99464c2..36480e17 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py @@ -46,9 +46,13 @@ def __init__(self, default_store: Store, context_iri: Optional[str] = None, self._imported = [] - g = Graph() - self._shapes_graph = ShapesGraphWrapper(g) - super().__init__(g, context_iri) + graph, shape_to_resource, class_to_shape = self._build_shapes_map() + self._shapes_graph = ShapesGraphWrapper(graph) + + super().__init__( + graph=graph, context_iri=context_iri, shape_to_source=shape_to_resource, + class_to_shape=class_to_shape + ) def materialize(self, iri: URIRef) -> NodeProperties: shape: ShapeWrapper = self._load_and_get_type_shape(iri) @@ -75,7 +79,7 @@ def generate_context(self) -> Dict: return self._generate_context() - def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: + def _build_shapes_map(self) -> Tuple[Graph, Dict[URIRef, str], Dict[str, URIRef]]: query = f""" PREFIX rdfs: PREFIX sh: @@ -95,8 +99,8 @@ def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: limit = 100 offset = 0 count = limit - class_to_shape = dict() - shape_to_resource: Dict[URIRef, URIRef] = dict() + class_to_shape: Dict[str, URIRef] = dict() + shape_to_resource: Dict[URIRef, str] = dict() while count == limit: resources = self.context_store.sparql(query, debug=False, limit=limit, offset=offset) @@ -107,7 +111,7 @@ def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]: count = len(resources) offset += count - return shape_to_resource, class_to_shape + return Graph(), shape_to_resource, class_to_shape def recursive_resolve(self, context: Union[Dict, List, str]) -> Dict: document = {} diff --git a/kgforge/specializations/models/rdf_model.py b/kgforge/specializations/models/rdf_model.py index 418e8521..71ec8cf4 100644 --- a/kgforge/specializations/models/rdf_model.py +++ b/kgforge/specializations/models/rdf_model.py @@ -103,9 +103,9 @@ def get_shape_from_type(self, type: str) -> URIRef: raise ValueError(f"Type {type} not found") return self.service.types_to_shapes[type] - def schema_id(self, type: str) -> URIRef: + def schema_source(self, type: str) -> str: shape_iri: URIRef = self.get_shape_from_type(type) - return self.service.shape_source(shape_iri) + return self.service.get_shape_source(shape_iri) # Validation. diff --git a/tests/specializations/models/data.py b/tests/specializations/models/data.py index e3f2b1e6..6742e643 100644 --- a/tests/specializations/models/data.py +++ b/tests/specializations/models/data.py @@ -13,6 +13,7 @@ # along with Blue Brain Nexus Forge. If not, see . from copy import deepcopy +from utils import full_path_relative_to_root ORGANIZATION = { "id": "", @@ -134,4 +135,14 @@ "Organization": "http://www.example.com/OrganizationShape", "Person": "http://www.example.com/PersonShape", "PostalAddress": "http://schema.org/PostalAddress", -} \ No newline at end of file +} + +SCHEMA_SOURCE_MAP = { + "Activity": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-2.json'), + "Association": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-1.json'), + "Building": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-3.json'), + "Employee": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-1.json'), + "Organization": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-1.json'), + "Person": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-1.json'), + "PostalAddress": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-1.json'), +} diff --git a/tests/specializations/models/test_rdf_model.py b/tests/specializations/models/test_rdf_model.py index 7207c5a5..4d6b6846 100644 --- a/tests/specializations/models/test_rdf_model.py +++ b/tests/specializations/models/test_rdf_model.py @@ -99,11 +99,35 @@ def valid_activity_resource(self, activity_json): return resource @pytest.mark.parametrize("type_,", TYPES_SCHEMAS_MAP.keys()) - @pytest.mark.xfail def test_type_to_schema(self, rdf_model: RdfModel, type_): - # FIXME TYPES_SCHEMAS_MAP should be a type to file dictionary - # see _build_shapes_map from RdfModelServiceFromDirectory - assert rdf_model.schema_id(type_) == URIRef(TYPES_SCHEMAS_MAP[type_]) + + # The problem: + # For DirectoryService, + # the best way to track the file from which a schema originates is + # - before building the shapes map + # - on service initialisation, when graph loading (gets all schemas), + + # For StoreService, + # the best way to track the file from which a schema originates is + # - when building the shapes map, querying the store to get resource_id + # - not on service initialisation, no graph loading + # (empty graph provided, lazily loaded on request) + + # Solution: graph loading should happen in building the shapes map. Shape loading returns: + # the graph with the shapes (empty for Store, full for Directory) + # shape_to_source + # class_to_shape + + # Would mean that in + # - RdfModelServiceFromStore g = Graph() would happen in the implementation of + # _build_shapes_map, and not in constructor + # - RdfModelFromDirectory load_rdf_files_into_graph() would happen in the implementation of + # _build_shapes_map, and not in constructor + + # - RdfModelService: self.shape_to_source, self.class_to_shape can be parameters of the + # constructor of this abstract class, and they'd be passed to super by the implementations + + assert rdf_model.schema_source(type_) == SCHEMA_SOURCE_MAP[type_] def test_validate_one(self, rdf_model: RdfModel, valid_activity_resource): rdf_model.validate(valid_activity_resource, False, type_="Activity") From fd64e55402f400895082a8d82cb84a274b69fbc7 Mon Sep 17 00:00:00 2001 From: mouffok Date: Thu, 26 Oct 2023 16:38:48 +0200 Subject: [PATCH 18/26] rm extra method from rebase --- .../models/rdf/pyshacl_shape_wrapper.py | 181 ------------------ 1 file changed, 181 deletions(-) diff --git a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py index 09fcf8f1..d1a64b10 100644 --- a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py +++ b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py @@ -62,187 +62,6 @@ def traverse(self, predecessors: Set[URIRef]) -> Tuple[List, Dict]: return properties, attributes - def validate( - self, - target_graph: GraphLike, - focus: Optional[ - Union[ - Tuple[Union[URIRef, BNode]], - List[Union[URIRef, BNode]], - Set[Union[URIRef, BNode]], - Union[URIRef, BNode], - ] - ] = None, - abort_on_first: Optional[bool] = False, - allow_infos: Optional[bool] = False, - allow_warnings: Optional[bool] = False, - _evaluation_path: Optional[List] = None, - ): - if self.deactivated: - if self.sg.debug: - self.logger.debug(f"Skipping shape because it is deactivated: {str(self)}") - return True, [] - if focus is not None: - lh_shape = False - rh_shape = True - self.logger.debug(f"Running evaluation of Shape {str(self)}") - if not isinstance(focus, (tuple, list, set)): - focus = [focus] - self.logger.debug(f"Shape was passed {len(focus)} Focus Node/s to evaluate.") - if len(focus) < 1: - return True, [] - else: - lh_shape = True - rh_shape = False - self.logger.debug(f"Checking if Shape {str(self)} defines its own targets.") - self.logger.debug("Identifying targets to find focus nodes.") - focus = self.focus_nodes(target_graph) - self.logger.debug(f"Found {len(focus)} Focus Nodes to evaluate.") - if len(focus) < 1: - # It's possible for shapes to have _no_ focus nodes - # (they are called in other ways) - if self.sg.debug: - self.logger.debug( - f"Skipping shape {str(self)} because it found no focus nodes.") - return True, [] - else: - self.logger.debug(f"Running evaluation of Shape {str(self)}") - if _evaluation_path is None: - _evaluation_path = [] - print(len(_evaluation_path)) - # elif len(_evaluation_path) >= 30: - # # 27 is the depth required to successfully do the meta-shacl test on shacl.ttl - # path_str = " -> ".join((str(e) for e in _evaluation_path)) - # raise ReportableRuntimeError("Evaluation path too deep!\n{}".format(path_str)) - t1 = perf_counter() - # Lazy import here to avoid an import loop - CONSTRAINT_PARAMETERS, PARAMETER_MAP = getattr( - pyshacl.module, 'CONSTRAINT_PARAMS', (None, None)) - if not CONSTRAINT_PARAMETERS or not PARAMETER_MAP: - from pyshacl.constraints import ALL_CONSTRAINT_PARAMETERS, CONSTRAINT_PARAMETERS_MAP - - setattr(pyshacl.shape, 'CONSTRAINT_PARAMS', - (ALL_CONSTRAINT_PARAMETERS, CONSTRAINT_PARAMETERS_MAP)) - - CONSTRAINT_PARAMETERS = ALL_CONSTRAINT_PARAMETERS - PARAMETER_MAP = CONSTRAINT_PARAMETERS_MAP - if self.sg.js_enabled or self._advanced: - search_parameters = CONSTRAINT_PARAMETERS.copy() - constraint_map = PARAMETER_MAP.copy() - if self._advanced: - from pyshacl.constraints.advanced import ExpressionConstraint, SH_expression - - search_parameters.append(SH_expression) - constraint_map[SH_expression] = ExpressionConstraint - if self.sg.js_enabled: - from pyshacl.extras.js.constraint import JSConstraint, SH_js - - search_parameters.append(SH_js) - constraint_map[SH_js] = JSConstraint - else: - search_parameters = CONSTRAINT_PARAMETERS - constraint_map = PARAMETER_MAP - parameters = (p for p, v in self.sg.predicate_objects(self.node) if p in search_parameters) - reports = [] - focus_value_nodes = self.value_nodes(target_graph, focus) - filter_reports: bool = False - allow_conform: bool = False - allowed_severities: Set[URIRef] = set() - if allow_infos: - allowed_severities.add(SH_Info) - if allow_warnings: - allowed_severities.add(SH_Info) - allowed_severities.add(SH_Warning) - if allow_infos or allow_warnings: - if self.severity in allowed_severities: - allow_conform = True - else: - filter_reports = True - - non_conformant = False - done_constraints = set() - run_count = 0 - _evaluation_path.append(self) - if self.sg.debug: - path_str = " -> ".join((str(e) for e in _evaluation_path)) - self.logger.debug(f"Current shape evaluation path: {path_str}") - constraint_components = [constraint_map[p] for p in iter(parameters)] - constraint_component: Type['ConstraintComponent'] - for constraint_component in constraint_components: - if constraint_component in done_constraints: - continue - try: - # if self.sg.debug: - # self.logger.debug(f"Constructing Constraint Component: {repr(constraint_component)}") - c = constraint_component(self) - except ConstraintLoadWarning as w: - self.logger.warning(repr(w)) - continue - except ConstraintLoadError as e: - self.logger.error(repr(e)) - raise e - _e_p_copy = _evaluation_path[:] - _e_p_copy.append(c) - if self.sg.debug: - self.logger.debug(f"Checking conformance for constraint: {str(c)}") - ct1 = perf_counter() - if self.sg.debug: - path_str = " -> ".join((str(e) for e in _e_p_copy)) - self.logger.debug(f"Current constraint evaluation path: {path_str}") - _is_conform, _reports = c.evaluate(target_graph, focus_value_nodes, _e_p_copy) - ct2 = perf_counter() - if self.sg.debug: - elapsed = ct2 - ct1 - self.logger.debug( - f"Milliseconds to check constraint {str(c)}: {elapsed * 1000.0:.3f}ms") - if _is_conform: - self.logger.debug(f"DataGraph conforms to constraint {c}.") - elif allow_conform: - self.logger.debug( - f"Focus nodes do _not_ conform to constraint {c} but given severity is allowed.") - else: - self.logger.debug(f"Focus nodes do _not_ conform to constraint {c}.") - if lh_shape or (not rh_shape): - for v_str, v_node, v_parts in _reports: - self.logger.debug(v_str) - - if _is_conform or allow_conform: - ... - elif filter_reports: - all_allow = True - for v_str, v_node, v_parts in _reports: - severity_bits = list( - filter(lambda p: p[0] == v_node and p[1] == SH_resultSeverity, v_parts)) - if severity_bits: - all_allow = all_allow and (severity_bits[0][2] in allowed_severities) - non_conformant = non_conformant or (not all_allow) - else: - non_conformant = non_conformant or (not _is_conform) - reports.extend(_reports) - run_count += 1 - done_constraints.add(constraint_component) - if non_conformant and abort_on_first: - break - applicable_custom_constraints = self.find_custom_constraints() - for a in applicable_custom_constraints: - if non_conformant and abort_on_first: - break - _e_p_copy2 = _evaluation_path[:] - validator = a.make_validator_for_shape(self) - _e_p_copy2.append(validator) - _is_conform, _r = validator.evaluate(target_graph, focus_value_nodes, _e_p_copy2) - non_conformant = non_conformant or (not _is_conform) - reports.extend(_r) - run_count += 1 - t2 = perf_counter() - if self.sg.debug: - elapsed = t2 - t1 - self.logger.debug( - f"Milliseconds to evaluate shape {str(self)}: {elapsed * 1000.0:.3f}ms") - # print(_evaluation_path, "Passes" if not non_conformant else "Fails") - return (not non_conformant), reports - - class ShapesGraphWrapper(ShapesGraph): From 642f3405175bdabd3177b3bcc711c24504e26ba5 Mon Sep 17 00:00:00 2001 From: mouffok Date: Thu, 26 Oct 2023 16:40:26 +0200 Subject: [PATCH 19/26] fix pycodestyle --- kgforge/core/archetypes/resolver.py | 2 +- kgforge/core/commons/sparql_query_builder.py | 6 +++--- kgforge/specializations/models/demo_model.py | 2 +- kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py | 2 +- .../models/rdf/rdf_model_service_from_store.py | 1 - 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/kgforge/core/archetypes/resolver.py b/kgforge/core/archetypes/resolver.py index c37b81d9..5dc6d2b7 100644 --- a/kgforge/core/archetypes/resolver.py +++ b/kgforge/core/archetypes/resolver.py @@ -229,7 +229,7 @@ def _build_resolving_query(text, query_template, deprecated_property, filters, s target_query_statements, target_query_filters = query_builder.build( None, None, resolving_context, configured_target_filters ) - + target_query_statements = ";\n ".join(target_query_statements) target_query_filters = "\n ".join(target_query_filters) first_filters = f"{first_filters} ; \n {target_query_statements}" diff --git a/kgforge/core/commons/sparql_query_builder.py b/kgforge/core/commons/sparql_query_builder.py index c41a4ac0..45b37865 100644 --- a/kgforge/core/commons/sparql_query_builder.py +++ b/kgforge/core/commons/sparql_query_builder.py @@ -40,6 +40,8 @@ class CategoryDataType(Enum): LITERAL = "literal" # FIXME: need to find a comprehensive way (different than list) to get all SPARQL reserved clauses + + SPARQL_CLAUSES = [ "where", "filter", @@ -116,7 +118,6 @@ class CategoryDataType(Enum): class SPARQLQueryBuilder(QueryBuilder): - @staticmethod def build( schema: Dict, @@ -287,8 +288,7 @@ def replace(match: Match) -> str: else: v = ( ctx.get(m4, ":" + m4 if context.has_vocab() else None) - if str(m4).lower() not in SPARQL_CLAUSES - and not str(m4).startswith("https") + if str(m4).lower() not in SPARQL_CLAUSES and not str(m4).startswith("https") else m4 ) if v is None: diff --git a/kgforge/specializations/models/demo_model.py b/kgforge/specializations/models/demo_model.py index ac03d518..cd651c61 100644 --- a/kgforge/specializations/models/demo_model.py +++ b/kgforge/specializations/models/demo_model.py @@ -75,7 +75,7 @@ def mapping(self, entity: str, source: str, type: Type[Mapping]) -> Mapping: filename = f"{entity}.hjson" filepath = Path(self.source, "mappings", source, type.__name__, filename) if filepath.is_file(): - return type.load(filepath) # TODO should be str + return type.load(filepath) # TODO should be str raise ValueError("unrecognized entity type or source") diff --git a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py index d1a64b10..bc6af033 100644 --- a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py +++ b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py @@ -85,4 +85,4 @@ def lookup_shape_from_node(self, node: URIRef) -> Optional[ShapeWrapper]: # if not hasattr(shape_wrapper, "traverse"): # shape_wrapper.traverse = types.MethodType(traverse, shape_wrapper) # return shape_wrapper - return None \ No newline at end of file + return None diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py index 36480e17..8a65b0c2 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py @@ -174,4 +174,3 @@ def _load_shape_and_reload_shapes_graph(self, resource_id: str): self._load_shape(resource_id) # reloads the shapes graph self._shapes_graph = ShapesGraphWrapper(self._graph) - From a8a79435837be4c2448d9855b4d0ec45084fb7b4 Mon Sep 17 00:00:00 2001 From: mouffok Date: Mon, 27 Nov 2023 15:56:43 +0100 Subject: [PATCH 20/26] rm formatting --- kgforge/core/commons/sparql_query_builder.py | 1 + kgforge/core/forge.py | 151 +++++++++---------- 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/kgforge/core/commons/sparql_query_builder.py b/kgforge/core/commons/sparql_query_builder.py index 562ec887..b02b9f17 100644 --- a/kgforge/core/commons/sparql_query_builder.py +++ b/kgforge/core/commons/sparql_query_builder.py @@ -191,6 +191,7 @@ def build_resource_from_response( if q_comp.name == "ConstructQuery": return SPARQLQueryBuilder.build_resource_from_construct_query(bindings, context) + # SELECT QUERY return SPARQLQueryBuilder.build_resource_from_select_query(bindings) @staticmethod diff --git a/kgforge/core/forge.py b/kgforge/core/forge.py index 95ea95bb..5e72909a 100644 --- a/kgforge/core/forge.py +++ b/kgforge/core/forge.py @@ -304,7 +304,7 @@ def types(self, pretty: bool = True) -> Optional[List[str]]: @catch def template( - self, type: str, only_required: bool = False, output: str = "hjson" + self, type: str, only_required: bool = False, output: str = "hjson" ) -> Optional[Dict]: """ Print the schema associated with a given resource type (must be listed in forge.types(...)) in hjson (output='hjson') or JSON (output='json') format. @@ -388,18 +388,18 @@ def resolvers(self, output: str = "print") -> Optional[Dict]: @catch def resolve( - self, - text: Union[str, List[str], Resource], - scope: Optional[str] = None, - resolver: Optional[str] = None, - target: Optional[str] = None, - type: Optional[str] = None, - strategy: Union[ResolvingStrategy, str] = ResolvingStrategy.BEST_MATCH, - resolving_context: Optional[Any] = None, - property_to_resolve: Optional[str] = None, - merge_inplace_as: Optional[str] = None, - limit: Optional[int] = 10, - threshold: Optional[float] = 0.5, + self, + text: Union[str, List[str], Resource], + scope: Optional[str] = None, + resolver: Optional[str] = None, + target: Optional[str] = None, + type: Optional[str] = None, + strategy: Union[ResolvingStrategy, str] = ResolvingStrategy.BEST_MATCH, + resolving_context: Optional[Any] = None, + property_to_resolve: Optional[str] = None, + merge_inplace_as: Optional[str] = None, + limit: Optional[int] = 10, + threshold: Optional[float] = 0.5, ) -> Optional[Union[Resource, List[Resource], Dict[str, List[Resource]]]]: """ Resolve text(s) or a resource into existing resources (from the configured Store) depending on the resolving strategy. @@ -484,8 +484,7 @@ def resolve( # Formatting User Interface. @catch - def format(self, what: str = None, *args, formatter: Union[Formatter, str] = Formatter.STR, - uri: str = None, **kwargs) -> str: + def format(self, what: str = None, *args, formatter: Union[Formatter, str] = Formatter.STR, uri: str = None, **kwargs) -> str: """ Select a configured formatter (see https://nexus-forge.readthedocs.io/en/latest/interaction.html#formatting) string (identified by 'what') and format it using provided '*args' :param what: a configured str format name. Required formatter:str = Formatter.STR @@ -543,7 +542,7 @@ def sources(self, pretty: bool = True) -> Optional[List[str]]: @catch def mappings( - self, source: str, pretty: bool = True + self, source: str, pretty: bool = True ) -> Optional[Dict[str, List[str]]]: """ Print(pretty=True) or return (pretty=False) configured mappings for a given source. @@ -557,7 +556,7 @@ def mappings( @catch def mapping( - self, entity: str, source: str, type: Callable = DictionaryMapping + self, entity: str, source: str, type: Callable = DictionaryMapping ) -> Mapping: """ Return a Mapping object of type 'type' for a resource type 'entity' and a source. @@ -571,11 +570,11 @@ def mapping( @catch def map( - self, - data: Any, - mapping: Union[Mapping, List[Mapping]], - mapper: Callable = DictionaryMapper, - na: Union[Any, List[Any]] = None, + self, + data: Any, + mapping: Union[Mapping, List[Mapping]], + mapper: Callable = DictionaryMapper, + na: Union[Any, List[Any]] = None, ) -> Union[Resource, List[Resource]]: """ Transform data to resources using transformations rules provided as mappings. The format of the data to transform @@ -593,10 +592,10 @@ def map( @catch def reshape( - self, - data: Union[Resource, List[Resource]], - keep: List[str], - versioned: bool = False, + self, + data: Union[Resource, List[Resource]], + keep: List[str], + versioned: bool = False, ) -> Union[Resource, List[Resource]]: """ Keep only a provided list of properties ('keep') from a resource of list of resources. @@ -614,11 +613,11 @@ def reshape( @catch def retrieve( - self, - id: str, - version: Optional[Union[int, str]] = None, - cross_bucket: bool = False, - **params + self, + id: str, + version: Optional[Union[int, str]] = None, + cross_bucket: bool = False, + **params ) -> Resource: """ Retrieve a resource by its identifier from the configured store and possibly at a given version. @@ -659,12 +658,12 @@ def search(self, *filters: Union[Dict, Filter], **params) -> List[Resource]: @catch def sparql( - self, - query: str, - debug: bool = False, - limit: Optional[int] = None, - offset: Optional[int] = None, - **params + self, + query: str, + debug: bool = False, + limit: Optional[int] = None, + offset: Optional[int] = None, + **params ) -> List[Resource]: """ Search for resources using a SPARQL query. See SPARQL docs: https://www.w3.org/TR/sparql11-query. @@ -680,11 +679,11 @@ def sparql( @catch def elastic( - self, - query: str, - debug: bool = False, - limit: Optional[int] = None, - offset: Optional[int] = None, + self, + query: str, + debug: bool = False, + limit: Optional[int] = None, + offset: Optional[int] = None, ) -> List[Resource]: """ Search for resources using an ElasticSearch DSL query. See ElasticSearch DSL docs: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html. @@ -699,13 +698,13 @@ def elastic( @catch def download( - self, - data: Union[Resource, List[Resource]], - follow: str = "distribution.contentUrl", - path: str = ".", - overwrite: bool = False, - cross_bucket: bool = False, - content_type: str = None + self, + data: Union[Resource, List[Resource]], + follow: str = "distribution.contentUrl", + path: str = ".", + overwrite: bool = False, + cross_bucket: bool = False, + content_type: str = None ) -> None: """ Download files attached to a resource or a list of resources. @@ -724,7 +723,7 @@ def download( # No @catch because the error handling is done by execution.run(). def register( - self, data: Union[Resource, List[Resource]], schema_id: Optional[str] = None + self, data: Union[Resource, List[Resource]], schema_id: Optional[str] = None ) -> None: """ Store a resource or list of resources in the configured Store. @@ -737,7 +736,7 @@ def register( # No @catch because the error handling is done by execution.run(). def update( - self, data: Union[Resource, List[Resource]], schema_id: Optional[str] = None + self, data: Union[Resource, List[Resource]], schema_id: Optional[str] = None ) -> None: """ Update a resource or a list of resources in the configured Store. @@ -797,10 +796,10 @@ def attach(self, path: str, content_type: str = None) -> LazyAction: @catch def as_json( - self, - data: Union[Resource, List[Resource]], - expanded: bool = False, - store_metadata: bool = False, + self, + data: Union[Resource, List[Resource]], + expanded: bool = False, + store_metadata: bool = False, ) -> Union[Dict, List[Dict]]: """ Convert a resource or a list of resources to JSON. @@ -821,11 +820,11 @@ def as_json( @catch def as_jsonld( - self, - data: Union[Resource, List[Resource]], - form: str = Form.COMPACTED.value, - store_metadata: bool = False, - **params + self, + data: Union[Resource, List[Resource]], + form: str = Form.COMPACTED.value, + store_metadata: bool = False, + **params ) -> Union[Dict, List[Dict]]: """ Convert a resource or a list of resources to JSON-LD. @@ -848,7 +847,7 @@ def as_jsonld( @catch def as_graph( - self, data: Union[Resource, List[Resource]], store_metadata: bool = False + self, data: Union[Resource, List[Resource]], store_metadata: bool = False ) -> Graph: """ Convert a resource or a list of resources to a RDFLib Graph object: https://rdflib.readthedocs.io/en/stable/intro_to_graphs.html. @@ -867,12 +866,12 @@ def as_graph( @catch def as_dataframe( - self, - data: Union[Resource, List[Resource]], - na: Union[Any, List[Any]] = [None], - nesting: str = ".", - expanded: bool = False, - store_metadata: bool = False, + self, + data: Union[Resource, List[Resource]], + na: Union[Any, List[Any]] = [None], + nesting: str = ".", + expanded: bool = False, + store_metadata: bool = False, ) -> DataFrame: """ Convert a resource or a list of resources to pandas.DataFrame. @@ -897,7 +896,7 @@ def as_dataframe( @catch def from_json( - self, data: Union[Dict, List[Dict]], na: Union[Any, List[Any]] = None + self, data: Union[Dict, List[Dict]], na: Union[Any, List[Any]] = None ) -> Union[Resource, List[Resource]]: """ Convert a JSON document or a list of JSON documents to a resource or a list of resources. @@ -910,7 +909,7 @@ def from_json( @catch def from_jsonld( - self, data: Union[Dict, List[Dict]] + self, data: Union[Dict, List[Dict]] ) -> Union[Resource, List[Resource]]: """ Convert a JSON-LD document or a list of JSON-LD documents to a resource or a list of resources. @@ -922,11 +921,11 @@ def from_jsonld( @catch def from_graph( - self, - data: Graph, - type: Union[str, List[str]] = None, - frame: Dict = None, - use_model_context=False, + self, + data: Graph, + type: Union[str, List[str]] = None, + frame: Dict = None, + use_model_context=False, ) -> Union[Resource, List[Resource]]: """ Convert a RDFLib.Graph object to a resource or a list of resources. What to convert from the RDFLib.Graph can be @@ -943,7 +942,7 @@ def from_graph( @catch def from_dataframe( - self, data: DataFrame, na: Union[Any, List[Any]] = np.nan, nesting: str = "." + self, data: DataFrame, na: Union[Any, List[Any]] = np.nan, nesting: str = "." ) -> Union[Resource, List[Resource]]: """ Convert a pandas.DataFrame to a resource or a list of resources. @@ -965,7 +964,7 @@ def get_model_context(self): def prepare_resolvers( - config: Dict, store_config: Dict + config: Dict, store_config: Dict ) -> Dict[str, Dict[str, Resolver]]: return { scope: dict(prepare_resolver(x, store_config) for x in configs) From d031570e153e54dabc67cde1f2f44743f5cf2831 Mon Sep 17 00:00:00 2001 From: mouffok Date: Mon, 27 Nov 2023 19:41:12 +0100 Subject: [PATCH 21/26] reorganise init of rdf model service --- .../models/rdf/rdf_model_service.py | 38 +++++++++---------- .../rdf/rdf_model_service_from_directory.py | 21 +++------- .../rdf/rdf_model_service_from_store.py | 8 +--- 3 files changed, 24 insertions(+), 43 deletions(-) diff --git a/kgforge/specializations/models/rdf/rdf_model_service.py b/kgforge/specializations/models/rdf/rdf_model_service.py index 2faf15ac..9756e0cc 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service.py @@ -12,16 +12,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with Blue Brain Nexus Forge. If not, see . import json -import types -from io import StringIO +from abc import abstractmethod, ABC from typing import List, Dict, Tuple, Set, Optional -from abc import abstractmethod -from typing import List, Dict, Tuple, Set, Optional -from pyshacl.shape import Shape -from pyshacl.shapes_graph import ShapesGraph -from rdflib import Graph, URIRef, RDF, XSD -from rdflib.plugins.sparql.results.jsonresults import JSONResultSerializer +from rdflib import Graph, URIRef, RDF, XSD from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder from kgforge.core.resource import Resource from kgforge.core.commons.context import Context @@ -30,23 +24,21 @@ from kgforge.specializations.models.rdf.node_properties import NodeProperties from kgforge.specializations.models.rdf.utils import as_term +from kgforge.specializations.models.rdf.pyshacl_shape_wrapper import ShapesGraphWrapper -class RdfModelService: +class RdfModelService(ABC): - def __init__( - self, graph: Graph, - shape_to_source: Dict[URIRef, str], - class_to_shape: Dict[str, URIRef], - context_iri: Optional[str] = None, - ) -> None: + def __init__(self, context_iri: Optional[str] = None): if context_iri is None: raise ConfigurationError("RdfModel requires a context") - self._graph = graph + + self._graph, self.shape_to_source, self.class_to_shape = self._build_shapes_map() + self._shapes_graph = ShapesGraphWrapper(self._graph) + self._context_cache = dict() - self.shape_to_source = shape_to_source - self.class_to_shape = class_to_shape + self.context = Context(self.resolve_context(context_iri), context_iri) self.types_to_shapes: Dict[str, URIRef] = self._build_types_to_shapes() @@ -92,17 +84,21 @@ def validate(self, resource: Resource, type_: str): @abstractmethod def _validate(self, iri: str, data_graph: Graph) -> Tuple[bool, Graph, str]: - raise NotImplementedError() + ... @abstractmethod def resolve_context(self, iri: str) -> Dict: """For a given IRI return its resolved context recursively""" - raise NotImplementedError() + ... @abstractmethod def generate_context(self) -> Dict: """Generates a JSON-LD context with the classes and terms present in the SHACL graph.""" - raise NotImplementedError() + ... + + @abstractmethod + def _build_shapes_map(self) -> Tuple[Graph, Dict[URIRef, str], Dict[str, URIRef]]: + ... def _build_types_to_shapes(self) -> Dict[str, URIRef]: """Iterates the classes_to_shapes dictionary to create a term to shape dictionary filtering diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py index a8452885..0f45a6ad 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py @@ -13,7 +13,7 @@ # along with Blue Brain Nexus Forge. If not, see . import os from pathlib import Path -from typing import Dict, Tuple, Optional +from typing import Dict, Tuple from pyshacl import validate from rdflib import Graph, URIRef @@ -22,20 +22,13 @@ from kgforge.core.commons.context import Context from kgforge.specializations.models.rdf.node_properties import NodeProperties from kgforge.specializations.models.rdf.rdf_model_service import RdfModelService -from kgforge.specializations.models.rdf.pyshacl_shape_wrapper import ShapesGraphWrapper class RdfModelServiceFromDirectory(RdfModelService): def __init__(self, dir_path: Path, context_iri: str) -> None: - - graph, shape_to_source, class_to_shape = self._build_shapes_map(dir_path=dir_path) - self._shapes_graph = ShapesGraphWrapper(graph) - - super().__init__( - graph=graph, context_iri=context_iri, shape_to_source=shape_to_source, - class_to_shape=class_to_shape - ) + self.dir_path = dir_path + super().__init__(context_iri=context_iri) def materialize(self, iri: URIRef) -> NodeProperties: sh = self._shapes_graph.lookup_shape_from_node(iri) @@ -55,7 +48,7 @@ def resolve_context(self, iri: str) -> Dict: try: context = Context(iri) except FileNotFoundError as e: - raise ValueError(e) + raise ValueError(e) from e self._context_cache.update({iri: context.document}) return context.document @@ -63,9 +56,7 @@ def resolve_context(self, iri: str) -> Dict: def generate_context(self) -> Dict: return self._generate_context() - def _build_shapes_map( - self, dir_path: Path - ) -> Tuple[Graph, Dict[URIRef, str], Dict[str, URIRef]]: + def _build_shapes_map(self) -> Tuple[Graph, Dict[URIRef, str], Dict[str, URIRef]]: query = """ PREFIX rdfs: @@ -86,7 +77,7 @@ def _build_shapes_map( graph = Graph() extensions = [".ttl", ".n3", ".json", ".rdf"] - for f in dir_path.rglob(os.path.join("*.*")): + for f in self.dir_path.rglob(os.path.join("*.*")): graph_i = Graph() if f.suffix in extensions: file_format = guess_format(f.name) diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py index 0cee79b8..c8c611ce 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py @@ -46,13 +46,7 @@ def __init__(self, default_store: Store, context_iri: Optional[str] = None, self._imported = [] - graph, shape_to_resource, class_to_shape = self._build_shapes_map() - self._shapes_graph = ShapesGraphWrapper(graph) - - super().__init__( - graph=graph, context_iri=context_iri, shape_to_source=shape_to_resource, - class_to_shape=class_to_shape - ) + super().__init__(context_iri=context_iri) def materialize(self, iri: URIRef) -> NodeProperties: shape: ShapeWrapper = self._load_and_get_type_shape(iri) From 3a96e452398dcfee79f44078fb0c618716cce506 Mon Sep 17 00:00:00 2001 From: mouffok Date: Mon, 27 Nov 2023 19:42:41 +0100 Subject: [PATCH 22/26] rm empty directory service file --- kgforge/specializations/models/rdf/directory_service.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 kgforge/specializations/models/rdf/directory_service.py diff --git a/kgforge/specializations/models/rdf/directory_service.py b/kgforge/specializations/models/rdf/directory_service.py deleted file mode 100644 index e69de29b..00000000 From fe4a8bdedc20295349c490b6940e3253a0fd3ad8 Mon Sep 17 00:00:00 2001 From: mouffok Date: Mon, 27 Nov 2023 19:49:12 +0100 Subject: [PATCH 23/26] rm not_supported and NotImplemented when abstract --- kgforge/specializations/models/rdf/collectors.py | 4 ++-- kgforge/specializations/models/rdf/rdf_model_service.py | 2 +- kgforge/specializations/models/rdf_model.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kgforge/specializations/models/rdf/collectors.py b/kgforge/specializations/models/rdf/collectors.py index efedef71..e76f0bec 100644 --- a/kgforge/specializations/models/rdf/collectors.py +++ b/kgforge/specializations/models/rdf/collectors.py @@ -47,7 +47,7 @@ def __init__(self, shape: Shape) -> None: @abstractmethod def constraint(cls) -> URIRef: """Returns the Shacl constraint URI of the collector""" - raise NotImplementedError() + ... @abstractmethod def collect( @@ -64,7 +64,7 @@ def collect( properties, attributes: Tuple(list,dict), the collected properties and attributes respectively """ - raise NotImplementedError() + ... def get_shape_target_classes(self) -> List: """Returns a list of target and implicit classes if any of the shape diff --git a/kgforge/specializations/models/rdf/rdf_model_service.py b/kgforge/specializations/models/rdf/rdf_model_service.py index 9756e0cc..f923eb75 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service.py @@ -60,7 +60,7 @@ def materialize(self, iri: URIRef) -> NodeProperties: Returns: A NodeProperty object with the collected properties """ - raise NotImplementedError() + ... def validate(self, resource: Resource, type_: str): diff --git a/kgforge/specializations/models/rdf_model.py b/kgforge/specializations/models/rdf_model.py index 53f8f49b..39114497 100644 --- a/kgforge/specializations/models/rdf_model.py +++ b/kgforge/specializations/models/rdf_model.py @@ -149,7 +149,7 @@ def _validate_one(self, resource: Resource, type_: str) -> None: # Utils. @staticmethod - def _service_from_directory(dirpath: Path, context_iri: str, **dir_config) -> RdfModelService: + def _service_from_directory(dirpath: Path, context_iri: str) -> RdfModelService: return RdfModelServiceFromDirectory(dirpath, context_iri) @staticmethod From f7638a71408420f82d0e9b1b391d163724e37c40 Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 28 Nov 2023 10:08:10 +0100 Subject: [PATCH 24/26] smaller test ontology for model querying --- tests/data/shacl-model/commons/ontology-1.ttl | 301 +----------------- .../specializations/models/test_rdf_model.py | 11 +- 2 files changed, 13 insertions(+), 299 deletions(-) diff --git a/tests/data/shacl-model/commons/ontology-1.ttl b/tests/data/shacl-model/commons/ontology-1.ttl index a416005a..56849df8 100644 --- a/tests/data/shacl-model/commons/ontology-1.ttl +++ b/tests/data/shacl-model/commons/ontology-1.ttl @@ -1,47 +1,25 @@ @prefix bmo: . -@prefix ns1: . +@prefix vann: . @prefix nsg: . @prefix owl: . -@prefix rdf: . -@prefix xsd: . +@prefix parms: . @prefix prov: . @prefix rdfs: . -@prefix skos: . -@prefix parms: . @prefix schema: . - - - rdf:type owl:Ontology ; - "parms"^^xsd:string ; - schema:title "Brain Modeling Parameter Ontology"^^xsd:string ; - rdfs:label "Brain Modeling Parameter Ontology"^^xsd:string ; - owl:versionInfo "R4"^^xsd:string . - -ns1:preferredNamespacePrefix a owl:AnnotationProperty . - -schema:Dataset a owl:Class . +@prefix skos: . +@prefix xsd: . schema:name a owl:AnnotationProperty ; rdfs:label "name"@en ; skos:altLabel "name"@en . -schema:sameAs a owl:ObjectProperty ; - rdfs:label "sameAs"@en ; - rdfs:subPropertyOf owl:topObjectProperty . - schema:unitCode a owl:AnnotationProperty ; rdfs:label "unitCode"@en ; skos:altLabel "units"@en . -rdfs:isDefinedBy rdfs:label "isDefinedBy"@en . - -rdfs:label rdfs:label "label"@en . - owl:equivalentClass a owl:AnnotationProperty ; rdfs:label "equivalentClass"@en . -owl:topDataProperty rdfs:label "Attributes"@en . - skos:altLabel a owl:AnnotationProperty ; skos:altLabel "altLabel"@en . @@ -94,282 +72,13 @@ bmo:EModelParameterConstraint a owl:Class ; rdfs:label "EModel Parameter Constraint"@en ; rdfs:subClassOf bmo:ModelBrainParameterConstraint . -bmo:ETypeRatio a owl:Class ; - rdfs:label "E-TypeRatio"@en ; - rdfs:seeAlso ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "The E-type ratio is the ratio of an e-type for a given m-type, brain region and layer. For a given m-type, all e-type ratios add up to 1."^^xsd:string ; - skos:prefLabel "E-Type Ratio"^^xsd:string . - -bmo:ElectricalStimulus a owl:Class ; - rdfs:label "Electrical Stimulus"@en ; - rdfs:isDefinedBy ; - rdfs:seeAlso ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:editorialNote "How is this related to 'Protocol' in BluePyEModel"^^xsd:string . - bmo:METypeRatio a owl:Class ; rdfs:label "ME-Type Ratio"@en ; rdfs:subClassOf bmo:ModelBrainParameter ; skos:prefLabel "ME-Type Ratio"^^xsd:string . -bmo:ModelBiochemicalReactionParameter a owl:Class ; - rdfs:label "Model Biochemical Reaction Parameter"@en ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Parameter used to represent a specific biological or theoretical value that is used in the modeling of biochemical reactions."^^xsd:string . - a owl:Ontology ; rdfs:label "Brain Modeling Parameter Ontology"^^xsd:string ; - ns1:preferredNamespacePrefix "parms"^^xsd:string ; + vann:preferredNamespacePrefix "parms"^^xsd:string ; schema:title "Brain Modeling Parameter Ontology"^^xsd:string ; owl:versionInfo "R4"^^xsd:string . - -parms:Dep a owl:Class ; - rdfs:label "Dep"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Relaxation time constant for depression, as measured by fitting the TM model to electrophysiological traces."@en . - -parms:Fac a owl:Class ; - rdfs:label "Fac"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Relaxation time constant for facilitation, as measured by fitting the TM model to electrophysiological traces."@en . - -parms:GABAB_ratio a owl:Class ; - rdfs:label "GABAB_ratio"@en ; - schema:unitCode "Unitless" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Synaptic conductance of GABAB proportional to the value for GABAA."@en . - -parms:NMDA_ratio a owl:Class ; - rdfs:label "NMDA_ratio"@en ; - schema:unitCode "Unitless" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Synaptic conductance of NMDA proportional to the value for AMPA."@en . - -parms:Nrrp a owl:Class ; - rdfs:label "Nrrp"@en ; - schema:unitCode "Unitless" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Number of tital release sites for given contact."@en . - -parms:Use a owl:Class ; - rdfs:label "Use"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Utilization of synaptic efficiency as measured by fitting the TM model to electrophysiological traces."@en . - -parms:conductance a owl:Class ; - rdfs:label "conductance"@en ; - schema:unitCode "uS" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Synaptic conductance."@en . - -parms:TimeConstantInMsForRecoveryFromDepression a owl:Class ; - rdfs:label "d"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "time constant (in ms) for recovery from depression, following a Gamma distribution"@en . - -parms:DecayTimeConstant a owl:Class ; - rdfs:label "dtc"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "decay time constant (in ms), following a truncated Normal distribution"@en . - -parms:SynapticReversalPotential a owl:Class ; - rdfs:label "e"@en ; - schema:unitCode "mV" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Synaptic reversal potential"@en . - -parms:e_GABAA a owl:Class ; - rdfs:label "e_GABAA"@en ; - schema:unitCode "mV" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "GABAA reversal potential"@en . - -parms:e_GABAB a owl:Class ; - rdfs:label "e_GABAB"@en ; - schema:unitCode "mV" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "GABAB reversal potential"@en . - -parms:TimeConstantInMsForRecoveryFromFacilitation a owl:Class ; - rdfs:label "f"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "time constant (in ms) for recovery from facilitation, following a Gamma distribution"@en . - -parms:gmax a owl:Class ; - rdfs:label "gmax"@en ; - schema:unitCode "uS" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "weight conversion factor (from nS to uS)"@en . - -parms:gsyn a owl:Class ; - rdfs:label "gsyn"@en ; - schema:unitCode "nS" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "the peak conductance (in nS) for a single synaptic contact, following a Gamma distribution"@en . - -parms:gsynSRSF a owl:Class ; - rdfs:label "gsynSRSF"@en ; - schema:unitCode "Unitless" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "the scale factor for the conductance; SRSF: 'synaptic receptor scaling factor'"@en . - -parms:InitialConcentrationOfMg a owl:Class ; - rdfs:label "mg"@en ; - schema:unitCode "mM" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Initial concentration of Mg2+"@en . - -parms:nrrp a owl:Class ; - rdfs:label "nrrp"@en ; - schema:unitCode "Unitless" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "number of vesicles in readily releasable pool, following a Poisson distribution"@en . - -parms:scale_mg a owl:Class ; - rdfs:label "scale_mg"@en ; - schema:unitCode "mM" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Scale parameter for determining mg concentration"@en . - -parms:slope_mg a owl:Class ; - rdfs:label "slope_mg"@en ; - schema:unitCode "1/mV" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Slope parameter for determining mg concentration"@en . - -parms:tau_d_AMPA a owl:Class ; - rdfs:label "tau_d_AMPA"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Decay time for AMPA currents."@en . - -parms:tau_d_GABAA a owl:Class ; - rdfs:label "tau_d_GABAA"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "GABAA-R synaptic conductance decay time constant"@en . - -parms:tau_d_GABAB a owl:Class ; - rdfs:label "tau_d_GABAB"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "GABAB-R synaptic conductance decay time constant"@en . - -parms:tau_d_NMDA a owl:Class ; - rdfs:label "tau_d_NMDA"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Decay time for NMDA currents."@en . - -parms:tau_r_AMPA a owl:Class ; - rdfs:label "tau_r_AMPA"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Rise time for AMPA currents."@en . - -parms:tau_r_GABAA a owl:Class ; - rdfs:label "tau_r_GABAA"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "GABAA-R synaptic conductance rise time constant"@en . - -parms:tau_r_GABAB a owl:Class ; - rdfs:label "tau_r_GABAB"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "GABAB-R synaptic conductance rise time constant"@en . - -parms:tau_r_NMDA a owl:Class ; - rdfs:label "tau_r_NMDA"@en ; - schema:unitCode "ms" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Rise time for NMDA currents."@en . - -parms:u a owl:Class ; - rdfs:label "u"@en ; - schema:unitCode "Unitless" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "utilization of synaptic efficacy, following a truncated Normal distribution"@en . - -parms:u0 a owl:Class ; - rdfs:label "u0"@en ; - schema:unitCode "Unitless" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Initial value of u, which is the running value of Use"@en . - -parms:uHillCoefficient a owl:Class ; - rdfs:label "uHillCoefficient"@en ; - schema:unitCode "Unitless" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "A coefficient describing the scaling of u to be done by the simulator"@en . - -parms:SynapticConductanceWeight a owl:Class ; - rdfs:label "weight"@en ; - schema:unitCode "uS" ; - rdfs:subClassOf bmo:ModelBrainParameter ; - skos:definition "Synaptic conductance."@en . - -nsg:hasBaselDendriteFeature rdfs:label "hasBaselDendriteFeature"@en . - -nsg:hasConstraint a owl:ObjectProperty ; - rdfs:label "hasConstraint"@en . - -nsg:hasLayerLocation rdfs:label "hasLayerLocation"@en . - -nsg:hasObliqDendrite rdfs:label "hasObliqDendrite"@en . - -nsg:hasTuftDendriteFeature rdfs:label "hasTuftDendriteFeature"@en . - -schema:QuantitativeValue a owl:Class ; - rdfs:label "Quantitative Value"@en ; - rdfs:subClassOf prov:Entity . - -schema:value a owl:ObjectProperty ; - rdfs:label "value"@en ; - rdfs:subPropertyOf owl:topObjectProperty ; - skos:altLabel "value"@en . - -bmo:EModelParameter a owl:Class ; - rdfs:label "EModel Parameter"^^xsd:string ; - rdfs:subClassOf bmo:ModelBrainParameter . - -bmo:Ion a owl:Class . - -bmo:ModelBrainParameterConstraint a owl:Class ; - rdfs:label "Model Brain Parameter Constraint"@en ; - rdfs:subClassOf [ a owl:Restriction ; - owl:onProperty bmo:constraints ; - owl:someValuesFrom bmo:ModelBrainParameter ], - prov:Entity . - -bmo:ModelConnectivityParameter a owl:Class ; - rdfs:label "Model Connectivity Parameter"@en ; - rdfs:subClassOf bmo:ModelBrainParameter . - -bmo:NeuronPart a owl:Class . - -bmo:NeuronPartFeature a owl:Class . - -bmo:constraints a owl:ObjectProperty ; - rdfs:label "constraints"@en ; - skos:altLabel "constraint"@en . - -prov:Entity a owl:Class ; - rdfs:label "Entity"@en ; - skos:prefLabel "Entity"^^xsd:string . - -bmo:ModelBrainParameter a owl:Class ; - rdfs:label "Model Brain Parameter"@en ; - rdfs:subClassOf [ a owl:Restriction ; - owl:onProperty schema:value ; - owl:someValuesFrom schema:QuantitativeValue ], - prov:Entity ; - skos:definition "A brain parameter is a parameter used to represent a specific biological or theoretical value that is used in the construction of a brain model."^^xsd:string . - diff --git a/tests/specializations/models/test_rdf_model.py b/tests/specializations/models/test_rdf_model.py index 4d6b6846..e1ef7ce0 100644 --- a/tests/specializations/models/test_rdf_model.py +++ b/tests/specializations/models/test_rdf_model.py @@ -152,6 +152,11 @@ def test_validate_many(self, rdf_model: RdfModel, valid_activity_resource, def test_query_model(self, rdf_model: RdfModel): - q = "SELECT ?id ?label WHERE { ?id a owl:Class ; rdfs:label ?label }" - res = rdf_model.sparql(q, debug=True) - # TODO assertion \ No newline at end of file + q_template = "SELECT ?id WHERE { ?id a %s }" + res1 = rdf_model.sparql(q_template % "owl:Ontology", debug=True) + res2 = rdf_model.sparql(q_template % "owl:AnnotationProperty", debug=True) + res3 = rdf_model.sparql(q_template % "owl:Class", debug=True) + + assert len(res1) == 1 + assert len(res2) == 5 + assert len(res3) == 7 From 9d3cfeeaf4c3fd8a5ec4b2f19beb188ff2c31da6 Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 28 Nov 2023 10:10:54 +0100 Subject: [PATCH 25/26] fix pycodestyle --- kgforge/specializations/models/rdf/rdf_model_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kgforge/specializations/models/rdf/rdf_model_service.py b/kgforge/specializations/models/rdf/rdf_model_service.py index f923eb75..d3d1c023 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service.py @@ -34,7 +34,7 @@ def __init__(self, context_iri: Optional[str] = None): if context_iri is None: raise ConfigurationError("RdfModel requires a context") - self._graph, self.shape_to_source, self.class_to_shape = self._build_shapes_map() + self._graph, self.shape_to_source, self.class_to_shape = self._build_shapes_map() self._shapes_graph = ShapesGraphWrapper(self._graph) self._context_cache = dict() From b05de6f594a45e422c57b6056deded0a4390a137 Mon Sep 17 00:00:00 2001 From: mouffok Date: Tue, 28 Nov 2023 10:21:57 +0100 Subject: [PATCH 26/26] linting and test of querying model --- kgforge/core/archetypes/dataset_store.py | 3 +-- kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py | 4 ++-- kgforge/specializations/models/rdf/rdf_model_service.py | 2 +- .../models/rdf/rdf_model_service_from_directory.py | 4 ++-- .../models/rdf/rdf_model_service_from_store.py | 4 ++-- kgforge/specializations/stores/bluebrain_nexus.py | 3 +-- kgforge/specializations/stores/demo_store.py | 2 +- tests/specializations/models/test_rdf_model.py | 6 ++---- 8 files changed, 12 insertions(+), 16 deletions(-) diff --git a/kgforge/core/archetypes/dataset_store.py b/kgforge/core/archetypes/dataset_store.py index 2dac7fcf..a7a3836b 100644 --- a/kgforge/core/archetypes/dataset_store.py +++ b/kgforge/core/archetypes/dataset_store.py @@ -79,8 +79,7 @@ def types(self) -> Optional[List[str]]: return list(self.model.mappings(self.model.source, False).keys()) def search( - self, filters: List[Union[Dict, Filter]], resolvers: Optional[List[Resolver]] = None, - **params + self, resolvers: Optional[List[Resolver]], filters: List[Union[Dict, Filter]], **params ) -> Optional[List[Resource]]: """Search within the database. :param map: bool diff --git a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py index bc6af033..9a2037fa 100644 --- a/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py +++ b/kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py @@ -38,8 +38,8 @@ def traverse(self, predecessors: Set[URIRef]) -> Tuple[List, Dict]: """ parameters = self.parameters() - properties = list() - attributes = dict() + properties = [] + attributes = {} done_collectors = set() for param in iter(parameters): if param in ALL_COLLECTORS_MAP: diff --git a/kgforge/specializations/models/rdf/rdf_model_service.py b/kgforge/specializations/models/rdf/rdf_model_service.py index d3d1c023..e0a49f9f 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service.py +++ b/kgforge/specializations/models/rdf/rdf_model_service.py @@ -37,7 +37,7 @@ def __init__(self, context_iri: Optional[str] = None): self._graph, self.shape_to_source, self.class_to_shape = self._build_shapes_map() self._shapes_graph = ShapesGraphWrapper(self._graph) - self._context_cache = dict() + self._context_cache = {} self.context = Context(self.resolve_context(context_iri), context_iri) self.types_to_shapes: Dict[str, URIRef] = self._build_types_to_shapes() diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py index 0f45a6ad..28fe5e1a 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_directory.py @@ -72,8 +72,8 @@ def _build_shapes_map(self) -> Tuple[Graph, Dict[URIRef, str], Dict[str, URIRef] } ORDER BY ?type """ - class_to_shape: Dict[str, URIRef] = dict() - shape_to_file: Dict[URIRef, str] = dict() + class_to_shape: Dict[str, URIRef] = {} + shape_to_file: Dict[URIRef, str] = {} graph = Graph() extensions = [".ttl", ".n3", ".json", ".rdf"] diff --git a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py index c8c611ce..e90801f6 100644 --- a/kgforge/specializations/models/rdf/rdf_model_service_from_store.py +++ b/kgforge/specializations/models/rdf/rdf_model_service_from_store.py @@ -93,8 +93,8 @@ def _build_shapes_map(self) -> Tuple[Graph, Dict[URIRef, str], Dict[str, URIRef] limit = 100 offset = 0 count = limit - class_to_shape: Dict[str, URIRef] = dict() - shape_to_resource: Dict[URIRef, str] = dict() + class_to_shape: Dict[str, URIRef] = {} + shape_to_resource: Dict[URIRef, str] = {} while count == limit: resources = self.context_store.sparql(query, debug=False, limit=limit, offset=offset) diff --git a/kgforge/specializations/stores/bluebrain_nexus.py b/kgforge/specializations/stores/bluebrain_nexus.py index 5089654b..bb291f27 100644 --- a/kgforge/specializations/stores/bluebrain_nexus.py +++ b/kgforge/specializations/stores/bluebrain_nexus.py @@ -687,8 +687,7 @@ def _deprecate_one(self, resource: Resource) -> None: # Querying. def search( - self, filters: List[Union[Dict, Filter]], resolvers: Optional[List[Resolver]], - **params + self, resolvers: Optional[List[Resolver]], filters: List[Union[Dict, Filter]], **params ) -> List[Resource]: if self.model_context() is None: diff --git a/kgforge/specializations/stores/demo_store.py b/kgforge/specializations/stores/demo_store.py index 281b6925..a3ed85c5 100644 --- a/kgforge/specializations/stores/demo_store.py +++ b/kgforge/specializations/stores/demo_store.py @@ -137,7 +137,7 @@ def _deprecate_one(self, resource: Resource) -> None: # Querying. def search( - self, filters: List[Union[Dict, Filter]], resolvers: Optional[List[Resolver]], **params + self, resolvers: Optional[List[Resolver]], filters: List[Union[Dict, Filter]], **params ) -> List[Resource]: cross_bucket = params.get("cross_bucket", None) diff --git a/tests/specializations/models/test_rdf_model.py b/tests/specializations/models/test_rdf_model.py index e1ef7ce0..d71ed921 100644 --- a/tests/specializations/models/test_rdf_model.py +++ b/tests/specializations/models/test_rdf_model.py @@ -13,8 +13,6 @@ # along with Blue Brain Nexus Forge. If not, see . import json import pytest -from rdflib import URIRef -from rdflib.plugins.sparql import prepareQuery from kgforge.core import Resource from kgforge.core.commons.exceptions import ValidationError @@ -158,5 +156,5 @@ def test_query_model(self, rdf_model: RdfModel): res3 = rdf_model.sparql(q_template % "owl:Class", debug=True) assert len(res1) == 1 - assert len(res2) == 5 - assert len(res3) == 7 + assert len(res2) == 9 + assert len(res3) == 8