-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
parse xml metadata to identify ogc layers #12
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .meta import Meta as Meta | ||
|
||
__all__ = ["Meta"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
from io import BytesIO, StringIO | ||
from zipfile import ZipFile | ||
from csv import DictReader | ||
from lxml import etree | ||
|
||
|
||
NS_PREFIXES = { | ||
"iso19139": "gmd", | ||
"iso19115-3.2018": "cit", | ||
} | ||
|
||
NS_REGISTRIES = { | ||
"iso19139": { | ||
"gmd": "http://www.isotc211.org/2005/gmd", | ||
}, | ||
"iso19115-3.2018": { | ||
"cit": "http://standards.iso.org/iso/19115/-3/cit/2.0", | ||
}, | ||
} | ||
|
||
|
||
class Meta: | ||
def __init__(self, zipfile: bytes): | ||
self.zipfile = zipfile | ||
with ZipFile(BytesIO(zipfile)) as zf: | ||
zip_properties = zf.read("index.csv").decode() | ||
dr = DictReader(StringIO(zip_properties), delimiter=";") | ||
self.properties = next(dr) | ||
|
||
self.xml_bytes = zf.read(f"{self.properties['uuid']}/metadata/metadata.xml") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the default metadata can be found at this (static path), additionnally, the othe format may be added as a suffix There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually it is depending on the default format, for example There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as far as I understand, the default schema is defined in the schema column of index.csv. The metadata corresponding to this default format will be called metadata.xml, there may be additional xml files with suffix in other formats and can be ignorer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we should ignore other format if we want to make a real copy There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. everything will be copied, the default format is just used for parsing the layers since we can be sure that it exists |
||
|
||
schema = self.properties.get("schema", "iso19139") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. schema is not "static" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is read from the csv file, which should have a 'schema' column/ Do you think this is not reliable ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i guess yes it is reliable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
self.namespaces = NS_REGISTRIES.get(schema) | ||
self.prefix = NS_PREFIXES.get(schema) | ||
|
||
def get_ogc_geoserver_layers(self) -> list[dict[str, str | None]]: | ||
xml_root = etree.parse(BytesIO(self.xml_bytes)) | ||
|
||
return [ | ||
self.layerproperties_from_link(link_node) | ||
for link_node in xml_root.findall( | ||
f".//{self.prefix}:CI_OnlineResource", self.namespaces | ||
) | ||
if self.is_ogc_layer(link_node) | ||
] | ||
|
||
def is_ogc_layer(self, link_node: etree._Element) -> bool: | ||
link_protocol = self.protocol_from_link(link_node) | ||
if link_protocol is None: | ||
return False | ||
return link_protocol[:7].lower() in ["ogc:wms", "ogc:wfs", "ogc:wcs"] | ||
|
||
def layerproperties_from_link( | ||
self, link_node: etree._Element | ||
) -> dict[str, str | None]: | ||
return { | ||
"server_url": self.url_from_link(link_node), | ||
"name": self.name_from_link(link_node), | ||
"description": self.desc_from_link(link_node), | ||
"protocol": self.protocol_from_link(link_node), | ||
} | ||
|
||
def url_from_link(self, link_node: etree._Element) -> str | None: | ||
return self.property_from_link(link_node, f"{self.prefix}:linkage") | ||
|
||
def name_from_link(self, link_node: etree._Element) -> str | None: | ||
return self.property_from_link(link_node, f"{self.prefix}:name") | ||
|
||
def desc_from_link(self, link_node: etree._Element) -> str | None: | ||
return self.property_from_link(link_node, f"{self.prefix}:description") | ||
|
||
def protocol_from_link(self, link_node: etree._Element) -> str | None: | ||
return self.property_from_link(link_node, f"{self.prefix}:protocol") | ||
|
||
def property_from_link(self, link_node: etree._Element, tag: str) -> str | None: | ||
property_node = link_node.find(tag, self.namespaces) | ||
if property_node is not None: | ||
text_node = property_node.find("./") | ||
if text_node is not None: | ||
return str(text_node.text) | ||
return None |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import os | ||
from maelstro.metadata import Meta | ||
|
||
|
||
def test_iso19139(): | ||
with open(os.path.join(os.path.dirname(__file__), 'demo_iso19139.zip'), 'rb') as zf: | ||
mm = Meta(zf.read()) | ||
assert mm.properties["schema"] == "iso19139" | ||
|
||
assert mm.get_ogc_geoserver_layers() == [ | ||
{ | ||
'server_url': 'https://public.sig.rennesmetropole.fr/geoserver/ows?service=wms&request=GetCapabilities', | ||
'name': 'trp_doux:reparation_velo', | ||
'description': 'Stations de réparation et gonflage pour vélo sur Rennes Métropole', | ||
'protocol': 'OGC:WMS' | ||
}, | ||
{ | ||
'server_url': 'https://public.sig.rennesmetropole.fr/geoserver/ows?service=wfs&request=GetCapabilities', | ||
'name': 'trp_doux:reparation_velo', | ||
'description': 'Stations de réparation et gonflage pour vélo sur Rennes Métropole', | ||
'protocol': 'OGC:WFS' | ||
} | ||
] | ||
|
||
|
||
def test_iso19115(): | ||
with open(os.path.join(os.path.dirname(__file__), 'lille_iso19115-3.zip'), 'rb') as zf: | ||
mm = Meta(zf.read()) | ||
assert mm.properties["schema"] == "iso19115-3.2018" | ||
|
||
assert mm.get_ogc_geoserver_layers() == [ | ||
{ | ||
'server_url': 'https://data.lillemetropole.fr/geoserver/ows', | ||
'name': 'mel_espacepublic:voies_vertes_chemins', | ||
'description': 'mel_espacepublic:voies_vertes_chemins', | ||
'protocol': 'OGC:WMS' | ||
}, | ||
{ | ||
'server_url': 'https://data.lillemetropole.fr/geoserver/ows', | ||
'name': 'mel_espacepublic:voies_vertes_chemins', | ||
'description': 'mel_espacepublic:voies_vertes_chemins', | ||
'protocol': 'OGC:WFS' | ||
} | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,8 @@ services: | |
volumes: | ||
- ./backend:/app | ||
- georchestra_datadir:/etc/georchestra | ||
environment: | ||
- LOCAL_LOGIN=admin | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see it use anywhere There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I think this goes with the other PR |
||
healthcheck: | ||
test: "health_check" | ||
interval: 10s | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would make 2 init class depending on which format we use (from xml for from mef zip)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, good idea.
there will be more functionalities with the zip file, but there is also common stuff like layer parsing