diff --git a/backend/maelstro/metadata/__init__.py b/backend/maelstro/metadata/__init__.py new file mode 100644 index 0000000..7814cb3 --- /dev/null +++ b/backend/maelstro/metadata/__init__.py @@ -0,0 +1,3 @@ +from .meta import MetaZip as Meta + +__all__ = ["Meta"] diff --git a/backend/maelstro/metadata/meta.py b/backend/maelstro/metadata/meta.py new file mode 100644 index 0000000..fd1336e --- /dev/null +++ b/backend/maelstro/metadata/meta.py @@ -0,0 +1,88 @@ +from io import BytesIO, StringIO +from zipfile import ZipFile +from csv import DictReader +from lxml import etree + + +NS_PREFIXES = { + "iso19139": "gmd", + "iso19115-3.2018": "cit", +} + +NS_REGISTRIES = { + "iso19139": { + "gmd": "http://www.isotc211.org/2005/gmd", + }, + "iso19115-3.2018": { + "cit": "http://standards.iso.org/iso/19115/-3/cit/2.0", + }, +} + + +class MetaXml: + def __init__(self, xml_bytes: bytes, schema: str = "iso19139"): + self.xml_bytes = xml_bytes + self.namespaces = NS_REGISTRIES.get(schema) + self.prefix = NS_PREFIXES.get(schema) + + def get_ogc_geoserver_layers(self) -> list[dict[str, str | None]]: + xml_root = etree.parse(BytesIO(self.xml_bytes)) + + return [ + self.layerproperties_from_link(link_node) + for link_node in xml_root.findall( + f".//{self.prefix}:CI_OnlineResource", self.namespaces + ) + if self.is_ogc_layer(link_node) + ] + + def is_ogc_layer(self, link_node: etree._Element) -> bool: + link_protocol = self.protocol_from_link(link_node) + if link_protocol is None: + return False + return link_protocol[:7].lower() in ["ogc:wms", "ogc:wfs", "ogc:wcs"] + + def layerproperties_from_link( + self, link_node: etree._Element + ) -> dict[str, str | None]: + return { + "server_url": self.url_from_link(link_node), + "name": self.name_from_link(link_node), + "description": self.desc_from_link(link_node), + "protocol": self.protocol_from_link(link_node), + } + + def url_from_link(self, link_node: etree._Element) -> str | None: + return self.property_from_link(link_node, f"{self.prefix}:linkage") + + def name_from_link(self, link_node: etree._Element) -> str | None: + return self.property_from_link(link_node, f"{self.prefix}:name") + + def desc_from_link(self, link_node: etree._Element) -> str | None: + return self.property_from_link(link_node, f"{self.prefix}:description") + + def protocol_from_link(self, link_node: etree._Element) -> str | None: + return self.property_from_link(link_node, f"{self.prefix}:protocol") + + def property_from_link(self, link_node: etree._Element, tag: str) -> str | None: + property_node = link_node.find(tag, self.namespaces) + if property_node is not None: + text_node = property_node.find("./") + if text_node is not None: + return str(text_node.text) + return None + + +class MetaZip(MetaXml): + def __init__(self, zipfile: bytes): + self.zipfile = zipfile + with ZipFile(BytesIO(zipfile)) as zf: + zip_properties = zf.read("index.csv").decode() + dr = DictReader(StringIO(zip_properties), delimiter=";") + self.properties = next(dr) + + xml_bytes = zf.read(f"{self.properties['uuid']}/metadata/metadata.xml") + + schema = self.properties.get("schema", "iso19139") + + super().__init__(xml_bytes, schema) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index fa86fa0..816288e 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -10,6 +10,9 @@ requires-python = ">=3.12" dependencies = [ "fastapi[standard] (>=0.115.6,<0.116.0)", "requests (>=2.32.3,<3.0.0)", + "lxml (>=5.3.0,<6.0.0)", + "types-pyyaml (>=6.0.12.20241230,<7.0.0.0)", + "lxml-stubs (>=0.5.1,<0.6.0)", ] [tool.poetry.group.check] diff --git a/backend/tests/demo_iso19139.zip b/backend/tests/demo_iso19139.zip new file mode 100644 index 0000000..952328b Binary files /dev/null and b/backend/tests/demo_iso19139.zip differ diff --git a/backend/tests/lille_iso19115-3.zip b/backend/tests/lille_iso19115-3.zip new file mode 100644 index 0000000..f3fcbdd Binary files /dev/null and b/backend/tests/lille_iso19115-3.zip differ diff --git a/backend/tests/test_meta.py b/backend/tests/test_meta.py new file mode 100644 index 0000000..ca631f6 --- /dev/null +++ b/backend/tests/test_meta.py @@ -0,0 +1,44 @@ +import os +from maelstro.metadata import Meta + + +def test_iso19139(): + with open(os.path.join(os.path.dirname(__file__), 'demo_iso19139.zip'), 'rb') as zf: + mm = Meta(zf.read()) + assert mm.properties["schema"] == "iso19139" + + assert mm.get_ogc_geoserver_layers() == [ + { + 'server_url': 'https://public.sig.rennesmetropole.fr/geoserver/ows?service=wms&request=GetCapabilities', + 'name': 'trp_doux:reparation_velo', + 'description': 'Stations de réparation et gonflage pour vélo sur Rennes Métropole', + 'protocol': 'OGC:WMS' + }, + { + 'server_url': 'https://public.sig.rennesmetropole.fr/geoserver/ows?service=wfs&request=GetCapabilities', + 'name': 'trp_doux:reparation_velo', + 'description': 'Stations de réparation et gonflage pour vélo sur Rennes Métropole', + 'protocol': 'OGC:WFS' + } + ] + + +def test_iso19115(): + with open(os.path.join(os.path.dirname(__file__), 'lille_iso19115-3.zip'), 'rb') as zf: + mm = Meta(zf.read()) + assert mm.properties["schema"] == "iso19115-3.2018" + + assert mm.get_ogc_geoserver_layers() == [ + { + 'server_url': 'https://data.lillemetropole.fr/geoserver/ows', + 'name': 'mel_espacepublic:voies_vertes_chemins', + 'description': 'mel_espacepublic:voies_vertes_chemins', + 'protocol': 'OGC:WMS' + }, + { + 'server_url': 'https://data.lillemetropole.fr/geoserver/ows', + 'name': 'mel_espacepublic:voies_vertes_chemins', + 'description': 'mel_espacepublic:voies_vertes_chemins', + 'protocol': 'OGC:WFS' + } + ] diff --git a/docker-compose.yml b/docker-compose.yml index 1f02089..bcc5ec1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -32,6 +32,8 @@ services: volumes: - ./backend:/app - georchestra_datadir:/etc/georchestra + environment: + - LOCAL_LOGIN=admin healthcheck: test: "health_check" interval: 10s