diff --git a/README.rst b/README.rst index 22d9b11..e6a3852 100644 --- a/README.rst +++ b/README.rst @@ -240,9 +240,9 @@ Mapping schema for MARC21 Authority Only a small part of the MARC21 Authority data model is converted. -========================================================== ===================================== +========================================================== =================================================================== MARC21XML RDF -========================================================== ===================================== +========================================================== =================================================================== ``001`` Control Number ``dcterms:identifier`` ``005`` Date and time of latest transaction ``dcterms:modified`` ``008[0:6]`` Date entered on file ``dcterms:created`` @@ -251,7 +251,7 @@ MARC21XML RDF ``083`` Dewey Decimal Classification Number ``skos:exactMatch`` (see below) ``1XX`` Headings ``skos:prefLabel`` ``4XX`` See From Tracings ``skos:altLabel`` -``5XX`` See Also From Tracings ``skos:related`` or `skos:broader`` (see below) +``5XX`` See Also From Tracings ``skos:related``, ``skos:broader`` or ``skos:narrower`` (see below) ``667`` Nonpublic General Note ``skos:editorialNote`` ``670`` Source Data Found ``skos:note`` ``677`` Definition ``skos:definition`` @@ -260,13 +260,15 @@ MARC21XML RDF ``681`` Subject Example Tracing Note ``skos:example`` ``682`` Deleted Heading Information ``skos:changeNote`` ``688`` Application History Note ``skos:historyNote`` -========================================================== ===================================== +========================================================== =================================================================== Notes: * Mappings are generated for 065, 080 and 083 only if an URI pattern for the classification scheme has been defined in the config. -* ``skos:related`` and ``skos:broader`` is currently only generated from 5XX fields - if the fields contain a ``$0`` subfield containing either the control number or the - URI of the related record. +* SKOS relations are generated from 5XX fields if the fields contain a ``$0`` + subfield containing either the control number or the URI of the related record. + The relationship type is ``skos:broader`` if ``$w=g``, ``skos:narrower`` if ``$w=h``, + and ``skos:related`` otherwise. + If ``$w=r`` and ``$4`` contains an URI, that URI is used as the relationship type. diff --git a/mc2skos/mc2skos.py b/mc2skos/mc2skos.py index 3d94735..aaf88ea 100755 --- a/mc2skos/mc2skos.py +++ b/mc2skos/mc2skos.py @@ -84,18 +84,10 @@ def add_record_to_graph(graph, record, options): for label in record.altLabel: graph.add((record_uri, SKOS.altLabel, Literal(label['term'], lang=record.lang))) - # Add skos:broader - for uri in record.broader: - graph.add((record_uri, SKOS.broader, URIRef(uri))) - - # Add skos:related - for uri in record.related: - graph.add((record_uri, SKOS.related, URIRef(uri))) - - # Add mappings - for mapping in record.mappings: - if mapping.get('uri') is not None: - graph.add((record_uri, mapping.get('relation'), URIRef(mapping['uri']))) + # Add relations (SKOS:broader, SKOS:narrower, SKOS:xxxMatch, etc.) + for relation in record.relations: + if relation.get('uri') is not None: + graph.add((record_uri, relation.get('relation'), URIRef(relation['uri']))) # Add notes if options.get('include_notes'): diff --git a/mc2skos/record.py b/mc2skos/record.py index 68d9791..eaf6f4f 100644 --- a/mc2skos/record.py +++ b/mc2skos/record.py @@ -50,6 +50,10 @@ } +def is_uri(value): + return value.startswith('http://') or value.startswith('https://') + + class InvalidRecordError(RuntimeError): pass @@ -159,8 +163,6 @@ def __init__(self, record, options=None): self.control_number_identifier = None self.created = None self.modified = None - self.broader = [] - self.related = [] self.lang = None self.prefLabel = None self.altLabel = [] @@ -172,7 +174,7 @@ def __init__(self, record, options=None): self.historyNote = [] self.changeNote = [] self.example = [] - self.mappings = [] + self.relations = [] self.webDeweyExtras = {} self.deprecated = False self.is_top_concept = False @@ -288,7 +290,10 @@ def parse(self, options): if parent_notation is not None: parent_uri = self.scheme.get_uri(collection='class', object=parent_notation) if parent_uri is not None: - self.broader.append(parent_uri) + self.relations.append({ + 'uri': parent_uri, + 'relation': SKOS.broader + }) # 253 : Complex See Reference (R) # Example: @@ -617,10 +622,10 @@ def append_class_uri(class_obj): return class_obj - def append_mapping(self, scheme, relation, **kwargs): + def append_relation(self, scheme, relation, **kwargs): uri = scheme.get_uri(**kwargs) if uri: - self.mappings.append({ + self.relations.append({ 'uri': uri, 'relation': relation, }) @@ -644,7 +649,7 @@ def parse(self, options): # 065: Other Classification Number el = self.record.first('mx:datafield[@tag="065"]') if el is not None: - self.append_mapping( + self.append_relation( ConceptScheme(el.text('mx:subfield[@code="2"]'), ClassificationRecord), SKOS.exactMatch, object=self.get_class_number(el) @@ -653,7 +658,7 @@ def parse(self, options): # 080: Universal Decimal Classification Number el = self.record.first('mx:datafield[@tag="080"]') if el is not None: - self.append_mapping( + self.append_relation( ConceptScheme('udc', ClassificationRecord), SKOS.exactMatch, object=self.get_class_number(el) @@ -662,7 +667,7 @@ def parse(self, options): # 083: Dewey Decimal Classification Number el = self.record.first('mx:datafield[@tag="083"]') if el is not None: - self.append_mapping( + self.append_relation( ConceptScheme('ddc', ClassificationRecord, edition=el.text('mx:subfield[@code="2"]')), SKOS.exactMatch, object=self.get_class_number(el) @@ -682,18 +687,30 @@ def parse(self, options): for heading in self.get_terms('5'): local_id = heading['node'].text('mx:subfield[@code="0"]') if local_id: - m = re.match('^\(.+\)(.+)$', local_id) - if m: - local_id = m.group(1) - if local_id.startswith('http'): - uri = local_id - else: - uri = self.scheme.get_uri(control_number=local_id) if local_id: - if heading['node'].text('mx:subfield[@code="w"]') == 'g': - self.broader.append(uri) + sf_w = heading['node'].text('mx:subfield[@code="w"]') + sf_4 = heading['node'].text('mx:subfield[@code="4"]') + + if sf_w == 'g': + relation = SKOS.broader + elif sf_w == 'h': + relation = SKOS.narrower + elif sf_w == 'r' and is_uri(sf_4): + relation = sf_4 + else: + relation = SKOS.related + + if is_uri(local_id): + self.relations.append({ + 'uri': uri, + 'relation': relation, + }) else: - self.related.append(uri) + self.append_relation( + self.scheme, + relation, + control_number=local_id + ) # 667 : Nonpublic General Note # madsrdf:editorialNote diff --git a/tests/test_process_record.py b/tests/test_process_record.py index 7b9c789..928fb21 100644 --- a/tests/test_process_record.py +++ b/tests/test_process_record.py @@ -58,8 +58,9 @@ def testAddTableNumber(self): assert rec.display is True assert rec.synthesized is False assert rec.notation == '811-818:2;4' - assert len(rec.broader) == 1 - assert rec.broader[0] == 'http://dewey.info/class/811-818/e23/' + assert len(rec.relations) == 1 + assert rec.relations[0]['uri'] == 'http://dewey.info/class/811-818/e23/' + assert rec.relations[0]['relation'] == SKOS.broader def testHistoricalAddTableNumber(self): rec = ClassificationRecord('''