From 546a68c2dfd382ef1469b733bf56a1379f5422fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20Michael=20O=2E=20Hegg=C3=B8?= Date: Wed, 8 Nov 2017 18:13:20 +0100 Subject: [PATCH] [#42] Let 010 and 016 take precedence over 001 This fixes the NALT URIs. --- README.rst | 8 +++++-- examples/{nalt-142.ttl => nalt-1396.ttl} | 4 ++-- examples/{nalt-142.xml => nalt-1396.xml} | 0 mc2skos/record.py | 27 +++++++++++++++++++----- mc2skos/vocabularies.yml | 2 +- 5 files changed, 31 insertions(+), 10 deletions(-) rename examples/{nalt-142.ttl => nalt-1396.ttl} (93%) rename examples/{nalt-142.xml => nalt-1396.xml} (100%) diff --git a/README.rst b/README.rst index 9dd9dd2..dcad116 100644 --- a/README.rst +++ b/README.rst @@ -87,7 +87,9 @@ Pull requests for adding more vocabularies are very welcome! URIs can be also be generated on the fly from an URI template specified with option ``--uri``. The following template parameters are recognized: -* ``{control_number}`` is the 001 value +* ``{control_number}`` is the control number from 001, 010 or 016. The current approach + is to use 010 or 016 if defined, otherwise 001. If you find examples where this approach + fails, please add them to [#42](https://github.com/scriptotek/mc2skos/issues/42). * ``{collection}`` is "class", "table" or "scheme" * ``{object}`` is a member of the classification scheme (with spaces replaced by hyphens) and part of a ``{collection}``, such as a specific class or table. @@ -123,10 +125,12 @@ the 7XX fields to skos:altLabel. ========================================================== ===================================== MARC21XML RDF ========================================================== ===================================== -``001`` Control Number ``dcterms:identifier`` +``001`` Control Number (see note above on 001, 010 & 016) ``dcterms:identifier`` ``005`` Date and time of latest transaction ``dcterms:modified`` ``008[0:6]`` Date entered on file ``dcterms:created`` ``008[8]="d" or "e"`` Classification validity ``owl:deprecated`` +``010`` Control Number (see note above on 001, 010 & 016) ``dcterms:identifier`` +``016`` Control Number (see note above on 001, 010 & 016) ``dcterms:identifier`` ``153 $a``, ``$c``, ``$z`` Classification number ``skos:notation`` ``153 $j`` Caption ``skos:prefLabel`` ``153 $e``, ``$f``, ``$z`` Classification number hierarchy ``skos:broader`` diff --git a/examples/nalt-142.ttl b/examples/nalt-1396.ttl similarity index 93% rename from examples/nalt-142.ttl rename to examples/nalt-1396.ttl index 142849c..146a6ee 100644 --- a/examples/nalt-142.ttl +++ b/examples/nalt-1396.ttl @@ -6,9 +6,9 @@ @prefix xml: . @prefix xsd: . - a skos:Concept ; + a skos:Concept ; dcterms:created "2016-12-08"^^xsd:date ; - dcterms:identifier "142" ; + dcterms:identifier "nalt00001396" ; dcterms:modified "2016-12-08"^^xsd:date ; skos:altLabel "2-oxoisocaproate dehydrogenase"@en, "2-oxoisovalerate (lipoate) dehydrogenase"@en, diff --git a/examples/nalt-142.xml b/examples/nalt-1396.xml similarity index 100% rename from examples/nalt-142.xml rename to examples/nalt-1396.xml diff --git a/mc2skos/record.py b/mc2skos/record.py index 5ebc396..966c5d3 100644 --- a/mc2skos/record.py +++ b/mc2skos/record.py @@ -137,11 +137,16 @@ def process_formatter(matches): start = int(matches.group('start')) if matches.group('start') else None end = int(matches.group('end')) if matches.group('end') else None value = kwargs[matches.group('param')][start:end] - formatter_str = '{0' + matches.group('formatter') + '}' if matches.group('formatter') else '{0}' - if 'd' in formatter_str: - value = int(value) - elif 'f' in formatter_str: - value = float(value) + if len(value) == 0: + # Empty string can be used for the scheme URI. + # Trying to convert this to decimal or float will fail! + formatter_str = '{0}' + else: + formatter_str = '{0' + matches.group('formatter') + '}' if matches.group('formatter') else '{0}' + if 'd' in formatter_str: + value = int(value) + elif 'f' in formatter_str: + value = float(value) return formatter_str.format(value) @@ -231,6 +236,18 @@ def parse(self, options): # 001 self.control_number = self.record.text('mx:controlfield[@tag="001"]') + # 010 : If present, it takes precedence over 001. + # + value = self.record.text('mx:datafield[@tag="010"]/mx:subfield[@code="a"]') + if value is not None: + self.control_number = value + + # 016 : If present, it takes precedence over 001 + # + value = self.record.text('mx:datafield[@tag="016"]/mx:subfield[@code="a"]') + if value is not None: + self.control_number = value + # 003 self.control_number_identifier = self.record.text('mx:controlfield[@tag="003"]') diff --git a/mc2skos/vocabularies.yml b/mc2skos/vocabularies.yml index 509d2d3..fa765e4 100644 --- a/mc2skos/vocabularies.yml +++ b/mc2skos/vocabularies.yml @@ -10,7 +10,7 @@ subject_schemes: a: concept: http://id.loc.gov/authorities/subjects/{control_number} scheme: http://id.loc.gov/authorities/subjects - d: http://lod.nal.usda.gov/nalt/{control_number} + d: http://lod.nal.usda.gov/nalt/{control_number[4:]:d} usvd: concept: http://data.ub.uio.no/usvd/c{control_number[4:]} scheme: http://data.ub.uio.no/usvd/