Skip to content

Commit

Permalink
Refactor: generalize relations and add SKOS.narrower
Browse files Browse the repository at this point in the history
- Generalized "broader", "related" and "mappings" as "relations".
- [#31] Added mapping of 5XX $w=h to skos:narrower
  • Loading branch information
danmichaelo committed Jul 6, 2017
1 parent 4e4692e commit 16b2531
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 40 deletions.
16 changes: 9 additions & 7 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,9 @@ Mapping schema for MARC21 Authority

Only a small part of the MARC21 Authority data model is converted.

========================================================== =====================================
========================================================== ===================================================================
MARC21XML RDF
========================================================== =====================================
========================================================== ===================================================================
``001`` Control Number ``dcterms:identifier``
``005`` Date and time of latest transaction ``dcterms:modified``
``008[0:6]`` Date entered on file ``dcterms:created``
Expand All @@ -251,7 +251,7 @@ MARC21XML RDF
``083`` Dewey Decimal Classification Number ``skos:exactMatch`` (see below)
``1XX`` Headings ``skos:prefLabel``
``4XX`` See From Tracings ``skos:altLabel``
``5XX`` See Also From Tracings ``skos:related`` or `skos:broader`` (see below)
``5XX`` See Also From Tracings ``skos:related``, ``skos:broader`` or ``skos:narrower`` (see below)
``667`` Nonpublic General Note ``skos:editorialNote``
``670`` Source Data Found ``skos:note``
``677`` Definition ``skos:definition``
Expand All @@ -260,13 +260,15 @@ MARC21XML RDF
``681`` Subject Example Tracing Note ``skos:example``
``682`` Deleted Heading Information ``skos:changeNote``
``688`` Application History Note ``skos:historyNote``
========================================================== =====================================
========================================================== ===================================================================

Notes:

* Mappings are generated for 065, 080 and 083 only if an URI pattern for the
classification scheme has been defined in the config.

* ``skos:related`` and ``skos:broader`` is currently only generated from 5XX fields
if the fields contain a ``$0`` subfield containing either the control number or the
URI of the related record.
* SKOS relations are generated from 5XX fields if the fields contain a ``$0``
subfield containing either the control number or the URI of the related record.
The relationship type is ``skos:broader`` if ``$w=g``, ``skos:narrower`` if ``$w=h``,
and ``skos:related`` otherwise.
If ``$w=r`` and ``$4`` contains an URI, that URI is used as the relationship type.
16 changes: 4 additions & 12 deletions mc2skos/mc2skos.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,10 @@ def add_record_to_graph(graph, record, options):
for label in record.altLabel:
graph.add((record_uri, SKOS.altLabel, Literal(label['term'], lang=record.lang)))

# Add skos:broader
for uri in record.broader:
graph.add((record_uri, SKOS.broader, URIRef(uri)))

# Add skos:related
for uri in record.related:
graph.add((record_uri, SKOS.related, URIRef(uri)))

# Add mappings
for mapping in record.mappings:
if mapping.get('uri') is not None:
graph.add((record_uri, mapping.get('relation'), URIRef(mapping['uri'])))
# Add relations (SKOS:broader, SKOS:narrower, SKOS:xxxMatch, etc.)
for relation in record.relations:
if relation.get('uri') is not None:
graph.add((record_uri, relation.get('relation'), URIRef(relation['uri'])))

# Add notes
if options.get('include_notes'):
Expand Down
55 changes: 36 additions & 19 deletions mc2skos/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@
}


def is_uri(value):
return value.startswith('http://') or value.startswith('https://')


class InvalidRecordError(RuntimeError):
pass

Expand Down Expand Up @@ -159,8 +163,6 @@ def __init__(self, record, options=None):
self.control_number_identifier = None
self.created = None
self.modified = None
self.broader = []
self.related = []
self.lang = None
self.prefLabel = None
self.altLabel = []
Expand All @@ -172,7 +174,7 @@ def __init__(self, record, options=None):
self.historyNote = []
self.changeNote = []
self.example = []
self.mappings = []
self.relations = []
self.webDeweyExtras = {}
self.deprecated = False
self.is_top_concept = False
Expand Down Expand Up @@ -288,7 +290,10 @@ def parse(self, options):
if parent_notation is not None:
parent_uri = self.scheme.get_uri(collection='class', object=parent_notation)
if parent_uri is not None:
self.broader.append(parent_uri)
self.relations.append({
'uri': parent_uri,
'relation': SKOS.broader
})

# 253 : Complex See Reference (R)
# Example:
Expand Down Expand Up @@ -617,10 +622,10 @@ def append_class_uri(class_obj):

return class_obj

def append_mapping(self, scheme, relation, **kwargs):
def append_relation(self, scheme, relation, **kwargs):
uri = scheme.get_uri(**kwargs)
if uri:
self.mappings.append({
self.relations.append({
'uri': uri,
'relation': relation,
})
Expand All @@ -644,7 +649,7 @@ def parse(self, options):
# 065: Other Classification Number
el = self.record.first('mx:datafield[@tag="065"]')
if el is not None:
self.append_mapping(
self.append_relation(
ConceptScheme(el.text('mx:subfield[@code="2"]'), ClassificationRecord),
SKOS.exactMatch,
object=self.get_class_number(el)
Expand All @@ -653,7 +658,7 @@ def parse(self, options):
# 080: Universal Decimal Classification Number
el = self.record.first('mx:datafield[@tag="080"]')
if el is not None:
self.append_mapping(
self.append_relation(
ConceptScheme('udc', ClassificationRecord),
SKOS.exactMatch,
object=self.get_class_number(el)
Expand All @@ -662,7 +667,7 @@ def parse(self, options):
# 083: Dewey Decimal Classification Number
el = self.record.first('mx:datafield[@tag="083"]')
if el is not None:
self.append_mapping(
self.append_relation(
ConceptScheme('ddc', ClassificationRecord, edition=el.text('mx:subfield[@code="2"]')),
SKOS.exactMatch,
object=self.get_class_number(el)
Expand All @@ -682,18 +687,30 @@ def parse(self, options):
for heading in self.get_terms('5'):
local_id = heading['node'].text('mx:subfield[@code="0"]')
if local_id:
m = re.match('^\(.+\)(.+)$', local_id)
if m:
local_id = m.group(1)
if local_id.startswith('http'):
uri = local_id
else:
uri = self.scheme.get_uri(control_number=local_id)
if local_id:
if heading['node'].text('mx:subfield[@code="w"]') == 'g':
self.broader.append(uri)
sf_w = heading['node'].text('mx:subfield[@code="w"]')
sf_4 = heading['node'].text('mx:subfield[@code="4"]')

if sf_w == 'g':
relation = SKOS.broader
elif sf_w == 'h':
relation = SKOS.narrower
elif sf_w == 'r' and is_uri(sf_4):
relation = sf_4
else:
relation = SKOS.related

if is_uri(local_id):
self.relations.append({
'uri': uri,
'relation': relation,
})
else:
self.related.append(uri)
self.append_relation(
self.scheme,
relation,
control_number=local_id
)

# 667 : Nonpublic General Note
# madsrdf:editorialNote
Expand Down
5 changes: 3 additions & 2 deletions tests/test_process_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def testAddTableNumber(self):
assert rec.display is True
assert rec.synthesized is False
assert rec.notation == '811-818:2;4'
assert len(rec.broader) == 1
assert rec.broader[0] == 'http://dewey.info/class/811-818/e23/'
assert len(rec.relations) == 1
assert rec.relations[0]['uri'] == 'http://dewey.info/class/811-818/e23/'
assert rec.relations[0]['relation'] == SKOS.broader

def testHistoricalAddTableNumber(self):
rec = ClassificationRecord('''
Expand Down

0 comments on commit 16b2531

Please sign in to comment.