Skip to content

Commit

Permalink
workflows: support storage of original source
Browse files Browse the repository at this point in the history
* Implements easy API functions:
  store_root_json() and retrieve_root_json() allowing to preserve and
  later retrieve the original JSON source corresponding to a given
  publisher. (closes inspirehep#1666)

Signed-off-by: Samuele Kaplun <[email protected]>
  • Loading branch information
kaplun committed May 26, 2017
1 parent 576e20a commit d7b746b
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 1 deletion.
26 changes: 26 additions & 0 deletions inspirehep/modules/workflows/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@

from datetime import datetime

from sqlalchemy.dialects import postgresql
from sqlalchemy_utils.types import JSONType, UUIDType

from invenio_db import db


Expand Down Expand Up @@ -74,3 +77,26 @@ class WorkflowsPendingRecord(db.Model):
nullable=False,
)
record_id = db.Column(db.Integer, nullable=False)


class WorkflowsRecordSources(db.Model):

__tablename__ = "workflows_record_sources"
__table_args__ = (
db.PrimaryKeyConstraint('record_id', 'source'),
)
source = db.Column(db.Text, default="", nullable=False)
record_id = db.Column(
UUIDType,
db.ForeignKey("records_metadata.id", ondelete='CASCADE'),
primary_key=True,
nullable=False,
)
json = db.Column(
JSONType().with_variant(
postgresql.JSON(none_as_null=True),
'postgresql',
),
default=lambda: dict(),
nullable=True
)
40 changes: 39 additions & 1 deletion inspirehep/modules/workflows/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@
import requests
import urllib3
from flask import current_app
from invenio_db import db

from .models import WorkflowsAudit
from .models import WorkflowsAudit, WorkflowsRecordSources


LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -176,3 +177,40 @@ def download_file_to_workflow(workflow, name, url):
if req.status_code == 200:
workflow.files[name] = req.raw
return workflow.files[name]


def store_root_json(record_uuid, source, json):
"""Store the root json for a given source.
Given the ``record_uuid``, the ``source`` information (e.g. `arXiv` or
`Elsevier`) and the actual record in ``data`` store this information so
that it can be retrieved later.
"""
with db.session.begin_nested():
record_source = WorkflowsRecordSources.query.filter(
WorkflowsRecordSources.record_id == record_uuid,
WorkflowsRecordSources.source == source
).one_or_none()
if record_source is None:
record_source = WorkflowsRecordSources(
source=source,
json=json,
record_id=record_uuid
)
else:
record_source.json = json
db.session.add(record_source)


def retrieve_root_json(record_uuid, source):
"""Retrieve the root json for a given source.
Given a previously matched ``record_uuid``, the ``source`` information
(e.g. `arXiv` or `Elsevier`) returns the original record if existing or
empty json object otherwise.
"""
entry = WorkflowsRecordSources.query.filter(
WorkflowsRecordSources.record_id == record_uuid,
WorkflowsRecordSources.source == source
).one_or_none()
return entry.json if entry else {}

0 comments on commit d7b746b

Please sign in to comment.