Merge pull request #34 from OpenLXP/json-ld

add support for JSON-LD
OpenLXP · Mar 4, 2024 · 3f595ef · 3f595ef
2 parents 8769617 + 20c08cf
commit 3f595ef
Show file tree

Hide file tree

Showing 4 changed files with 175 additions and 13 deletions.
diff --git a/app/api/serializers.py b/app/api/serializers.py
@@ -2,7 +2,7 @@
 
 from rest_framework import serializers
 
-from core.models import SchemaLedger, TermSet, TransformationLedger
+from core.models import SchemaLedger, Term, TermSet, TransformationLedger
 
 logger = logging.getLogger('dict_config_logger')
 
@@ -27,6 +27,26 @@ class Meta:
         fields = ('iri', 'name', 'version', 'schema')
 
 
+class TermSetJSONLDSerializer(serializers.ModelSerializer):
+    """Serializes the TermSet Model"""
+    graph = serializers.DictField(source='json_ld')
+
+    class Meta:
+        model = TermSet
+
+        fields = ('graph',)
+
+
+class TermJSONLDSerializer(serializers.ModelSerializer):
+    """Serializes the TermSet Model"""
+    graph = serializers.DictField(source='json_ld')
+
+    class Meta:
+        model = Term
+
+        fields = ('graph',)
+
+
 class TransformationLedgerSerializer(serializers.ModelSerializer):
     """Serializes the SchemaLedger Model"""
 

diff --git a/app/api/urls.py b/app/api/urls.py
@@ -8,8 +8,10 @@
 app_name = 'api'
 
 urlpatterns = [
-     path('schemas/', views.SchemaLedgerDataView.as_view(),
-          name='schemaledger'),
-     path('mappings/', views.TransformationLedgerDataView.as_view(),
-          name='transformationledger'),
+    path('schemas/', views.SchemaLedgerDataView.as_view(),
+         name='schemaledger'),
+    path('mappings/', views.TransformationLedgerDataView.as_view(),
+         name='transformationledger'),
+    path('json-ld/<path:pk>', views.JSONLDDataView.as_view(),
+         name='json-ld'),
 ]
diff --git a/app/api/views.py b/app/api/views.py
@@ -1,14 +1,19 @@
 import logging
 
+from django.conf import settings
 from django.core.exceptions import ObjectDoesNotExist
+from django.urls import reverse
 from requests.exceptions import HTTPError
 from rest_framework import status
-from rest_framework.generics import GenericAPIView
+from rest_framework.generics import GenericAPIView, RetrieveAPIView
+from rest_framework.renderers import JSONRenderer
 from rest_framework.response import Response
+from rest_framework.settings import api_settings
 
-from api.serializers import TermSetSerializer
+from api.serializers import (TermJSONLDSerializer, TermSetJSONLDSerializer,
+                             TermSetSerializer)
 from core.management.utils.xss_helper import sort_version
-from core.models import TermSet
+from core.models import Term, TermSet
 
 logger = logging.getLogger('dict_config_logger')
 
@@ -24,6 +29,65 @@ def check_status(messages, queryset):
     return queryset
 
 
+class JSONLDRenderer(JSONRenderer):
+    """Renderer restricted to JSON-LD"""
+    media_type = 'application/ld+json'
+    format = 'jsonld'
+
+
+class JSONLDDataView(RetrieveAPIView):
+    """Handles HTTP requests to for JSON-LD schemas"""
+    renderer_classes = [JSONLDRenderer, *api_settings.DEFAULT_RENDERER_CLASSES]
+
+    def get_queryset(self):
+        """
+        Determines if the requested object is a Term or TermSet and returns
+        the queryset
+        """
+        # Due to the IRI a term has a '?' so check for a param without a value
+        if self.request.query_params:
+            for _, v in self.request.query_params.items():
+                if len(v) == 0:
+                    return Term.objects.all().filter(status='published')
+        return TermSet.objects.all().filter(status='published')
+
+    def get_serializer_class(self):
+        """
+        Determines if the requested object is a Term or TermSet and returns
+        the serializer
+        """
+        # Due to the IRI a term has a '?' so check for a param without a value
+        if self.request.query_params:
+            for _, v in self.request.query_params.items():
+                if len(v) == 0:
+                    return TermSetJSONLDSerializer
+        return TermJSONLDSerializer
+
+    def retrieve(self, request, *args, **kwargs):
+        """
+        Return a JSON-LD representation of the requested object
+        """
+        # Due to the IRI a term has a '?' so check for a param without a value
+        if self.request.query_params:
+            for k, v in self.request.query_params.items():
+                if len(v) == 0:
+                    self.kwargs['pk'] = self.kwargs['pk'] + \
+                        '?' + k
+                    break
+        # get the specific object and serializer
+        instance = self.get_object()
+        serializer = self.get_serializer(instance)
+        # generated JSON-LD is stored as a python dict labeled 'graph'
+        ld_dict = serializer.data['graph']
+        # build the external URL to this API and add it to the context
+        ldss = request.build_absolute_uri(
+            reverse('api:json-ld', args=[1]))[:-1]
+        if hasattr(settings, 'BAD_HOST') and hasattr(settings, 'OVERIDE_HOST'):
+            ldss = ldss.replace(settings.BAD_HOST, settings.OVERIDE_HOST)
+        ld_dict['@context']['ldss'] = ldss
+        return Response(ld_dict)
+
+
 class SchemaLedgerDataView(GenericAPIView):
     """Handles HTTP requests to the Schema Ledger"""
 
@@ -89,14 +153,11 @@ def get(self, request):
             logger.error(messages)
             return Response(errorMsg, status.HTTP_400_BAD_REQUEST)
         try:
-            serializer_class = TermSetSerializer(queryset[0])
-            logger.info(queryset[0])
             # only way messages gets sent is if there was
             # an error serializing or in the response process.
             messages.append(
                 "Error fetching records please check the logs.")
-            return Response(serializer_class.data,
-                            status.HTTP_200_OK)
+            return self.handle_response(queryset)
         except ObjectDoesNotExist:
             errorMsg = {
                 "message": messages
@@ -111,6 +172,19 @@ def get(self, request):
             return Response(errorMsg,
                             status.HTTP_500_INTERNAL_SERVER_ERROR)
 
+    def handle_response(self, queryset):
+        serializer_class = TermSetSerializer(queryset[0])
+        logger.info(queryset[0])
+        # could be used to add link header if needed
+        # if 'format' in request.query_params:
+        #     link = '<%s>;' % request.get_full_path().replace(
+        #         request.query_params.get('format'), 'jsonld')
+        # else:
+        #     link = f'<{request.get_full_path()}>;'
+        # link += ' rel="alternate"; type="application/ld+json"'
+        return Response(serializer_class.data,
+                        status.HTTP_200_OK)
+
 
 class TransformationLedgerDataView(GenericAPIView):
     """Handles HTTP requests to the Transformation Ledger"""

diff --git a/app/core/models.py b/app/core/models.py
@@ -19,6 +19,12 @@
 logger = logging.getLogger('dict_config_logger')
 
 
+data_type_matching = {
+    'str': 'schema:Text',
+    'int': 'schema:Number',
+    'bool': 'schema:Boolean',
+    'datetime': 'schema:DateTime'
+}
 regex_check = (r'(?!(\A( \x09\x0A\x0D\x20-\x7E # ASCII '
                r'| \xC2-\xDF # non-overlong 2-byte '
                r'| \xE0\xA0-\xBF # excluding overlongs '
@@ -67,6 +73,40 @@ def export(self):
                  for term in self.terms.filter(status='published')}
         return {**children, **terms}
 
+    def json_ld(self):
+        """Generate python representation of JSON-LD"""
+        # create graph and context dicts
+        graph = {}
+        context = {}
+        # add elements to graph and context
+        graph['@id'] = 'ldss:' + self.iri
+        graph['@type'] = 'rdfs:Class'
+        graph['rdfs:label'] = self.name
+        context['rdfs'] = 'http://www.w3.org/2000/01/rdf-schema#'
+        if hasattr(self, 'childtermset'):
+            graph['schema:domainIncludes'] = {
+                '@id': 'ldss:' +
+                self.childtermset.parent_term_set.iri}
+            context['schema'] = 'https://schema.org/'
+        # iterate over child term sets and collect their graphs and contexts
+        children = []
+        for kid in self.children.filter(status='published'):
+            kid_ld = kid.json_ld()
+            children.extend(kid_ld['@graph'])
+            # add children's context to current context, but current has
+            # higher priority
+            context = {**kid_ld['@context'], **context}
+        # iterate over terms and collect their graphs and contexts
+        terms = []
+        for term in self.terms.filter(status='published'):
+            term_ld = term.json_ld()
+            terms.extend(term_ld['@graph'])
+            # add terms' context to current context, but current has higher
+            # priority
+            context = {**term_ld['@context'], **context}
+        # return the graph and context
+        return {'@context': context, '@graph': [graph, *children, *terms]}
+
     def mapped_to(self, target_root):
         """Return dict of Terms mapped to anything in target_root string"""
 
@@ -152,6 +192,32 @@ def export(self):
             attrs['description'] = self.description
         return {**attrs}
 
+    def json_ld(self):
+        """Generate python representation of JSON-LD"""
+        # create graph and context dicts
+        graph = {}
+        context = {}
+        # add elements to graph and context
+        graph['@id'] = 'ldss:' + self.iri
+        graph['@type'] = 'rdf:Property'
+        if self.description is not None and len(self.description.strip()) > 0:
+            graph['rdfs:comment'] = self.description
+        if self.data_type is not None and len(self.data_type.strip()) > 0 and\
+                self.data_type in data_type_matching:
+            graph['schema:rangeIncludes'] = {
+                '@id': data_type_matching[self.data_type]}
+        if self.mapping.exists():
+            graph['owl:equivalentProperty'] = [
+                {'@id': 'ldss:' + alt.iri} for alt in self.mapping.all()]
+            context['owl'] = 'http://www.w3.org/2002/07/owl#'
+        graph['rdfs:label'] = self.name
+        graph['schema:domainIncludes'] = {'@id': 'ldss:' + self.term_set.iri}
+        context['schema'] = 'https://schema.org/'
+        context['rdfs'] = 'http://www.w3.org/2000/01/rdf-schema#'
+        context['rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+        # return the graph and context
+        return {'@context': context, '@graph': [graph, ]}
+
     def path(self):
         """Get the path of the Term"""
         path = self.name
@@ -231,7 +297,7 @@ def clean(self):
                     logger.error(
                         '%s %s in xss:%s@%s',
                         issue_type, issue, self.version, self.schema_name
-                        )
+                    )
             # only load json if no issues found
             else:
                 # rewind buffer