Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parse xml metadata to identify ogc layers #12

Merged
merged 2 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backend/maelstro/metadata/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .meta import MetaZip as Meta

__all__ = ["Meta"]
88 changes: 88 additions & 0 deletions backend/maelstro/metadata/meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from io import BytesIO, StringIO
from zipfile import ZipFile
from csv import DictReader
from lxml import etree


NS_PREFIXES = {
"iso19139": "gmd",
"iso19115-3.2018": "cit",
}

NS_REGISTRIES = {
"iso19139": {
"gmd": "http://www.isotc211.org/2005/gmd",
},
"iso19115-3.2018": {
"cit": "http://standards.iso.org/iso/19115/-3/cit/2.0",
},
}


class MetaXml:
def __init__(self, xml_bytes: bytes, schema: str = "iso19139"):
self.xml_bytes = xml_bytes
self.namespaces = NS_REGISTRIES.get(schema)
self.prefix = NS_PREFIXES.get(schema)

def get_ogc_geoserver_layers(self) -> list[dict[str, str | None]]:
xml_root = etree.parse(BytesIO(self.xml_bytes))

return [
self.layerproperties_from_link(link_node)
for link_node in xml_root.findall(
f".//{self.prefix}:CI_OnlineResource", self.namespaces
)
if self.is_ogc_layer(link_node)
]

def is_ogc_layer(self, link_node: etree._Element) -> bool:
link_protocol = self.protocol_from_link(link_node)
if link_protocol is None:
return False
return link_protocol[:7].lower() in ["ogc:wms", "ogc:wfs", "ogc:wcs"]

def layerproperties_from_link(
self, link_node: etree._Element
) -> dict[str, str | None]:
return {
"server_url": self.url_from_link(link_node),
"name": self.name_from_link(link_node),
"description": self.desc_from_link(link_node),
"protocol": self.protocol_from_link(link_node),
}

def url_from_link(self, link_node: etree._Element) -> str | None:
return self.property_from_link(link_node, f"{self.prefix}:linkage")

def name_from_link(self, link_node: etree._Element) -> str | None:
return self.property_from_link(link_node, f"{self.prefix}:name")

def desc_from_link(self, link_node: etree._Element) -> str | None:
return self.property_from_link(link_node, f"{self.prefix}:description")

def protocol_from_link(self, link_node: etree._Element) -> str | None:
return self.property_from_link(link_node, f"{self.prefix}:protocol")

def property_from_link(self, link_node: etree._Element, tag: str) -> str | None:
property_node = link_node.find(tag, self.namespaces)
if property_node is not None:
text_node = property_node.find("./")
if text_node is not None:
return str(text_node.text)
return None


class MetaZip(MetaXml):
def __init__(self, zipfile: bytes):
self.zipfile = zipfile
with ZipFile(BytesIO(zipfile)) as zf:
zip_properties = zf.read("index.csv").decode()
dr = DictReader(StringIO(zip_properties), delimiter=";")
self.properties = next(dr)

xml_bytes = zf.read(f"{self.properties['uuid']}/metadata/metadata.xml")

schema = self.properties.get("schema", "iso19139")

super().__init__(xml_bytes, schema)
3 changes: 3 additions & 0 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ requires-python = ">=3.12"
dependencies = [
"fastapi[standard] (>=0.115.6,<0.116.0)",
"requests (>=2.32.3,<3.0.0)",
"lxml (>=5.3.0,<6.0.0)",
"types-pyyaml (>=6.0.12.20241230,<7.0.0.0)",
"lxml-stubs (>=0.5.1,<0.6.0)",
]

[tool.poetry.group.check]
Expand Down
Binary file added backend/tests/demo_iso19139.zip
Binary file not shown.
Binary file added backend/tests/lille_iso19115-3.zip
Binary file not shown.
44 changes: 44 additions & 0 deletions backend/tests/test_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import os
from maelstro.metadata import Meta


def test_iso19139():
with open(os.path.join(os.path.dirname(__file__), 'demo_iso19139.zip'), 'rb') as zf:
mm = Meta(zf.read())
assert mm.properties["schema"] == "iso19139"

assert mm.get_ogc_geoserver_layers() == [
{
'server_url': 'https://public.sig.rennesmetropole.fr/geoserver/ows?service=wms&request=GetCapabilities',
'name': 'trp_doux:reparation_velo',
'description': 'Stations de réparation et gonflage pour vélo sur Rennes Métropole',
'protocol': 'OGC:WMS'
},
{
'server_url': 'https://public.sig.rennesmetropole.fr/geoserver/ows?service=wfs&request=GetCapabilities',
'name': 'trp_doux:reparation_velo',
'description': 'Stations de réparation et gonflage pour vélo sur Rennes Métropole',
'protocol': 'OGC:WFS'
}
]


def test_iso19115():
with open(os.path.join(os.path.dirname(__file__), 'lille_iso19115-3.zip'), 'rb') as zf:
mm = Meta(zf.read())
assert mm.properties["schema"] == "iso19115-3.2018"

assert mm.get_ogc_geoserver_layers() == [
{
'server_url': 'https://data.lillemetropole.fr/geoserver/ows',
'name': 'mel_espacepublic:voies_vertes_chemins',
'description': 'mel_espacepublic:voies_vertes_chemins',
'protocol': 'OGC:WMS'
},
{
'server_url': 'https://data.lillemetropole.fr/geoserver/ows',
'name': 'mel_espacepublic:voies_vertes_chemins',
'description': 'mel_espacepublic:voies_vertes_chemins',
'protocol': 'OGC:WFS'
}
]
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ services:
volumes:
- ./backend:/app
- georchestra_datadir:/etc/georchestra
environment:
- LOCAL_LOGIN=admin
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see it use anywhere

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I think this goes with the other PR

healthcheck:
test: "health_check"
interval: 10s
Expand Down