Skip to content

Commit

Permalink
workflows: support storage of original source
Browse files Browse the repository at this point in the history
* Implements easy API functions:
  store_root_json() and retrieve_root_json() allowing to preserve and
  later retrieve the original JSON source corresponding to a given
  publisher. (closes inspirehep#1666)

Signed-off-by: Samuele Kaplun <[email protected]>
  • Loading branch information
kaplun authored and rikirenz committed Jun 19, 2017
1 parent a6654e3 commit b9c7bb5
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 1 deletion.
26 changes: 26 additions & 0 deletions inspirehep/modules/workflows/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@

from datetime import datetime

from sqlalchemy.dialects import postgresql
from sqlalchemy_utils.types import JSONType, UUIDType

from invenio_db import db


Expand Down Expand Up @@ -76,3 +79,26 @@ class WorkflowsPendingRecord(db.Model):
nullable=False,
)
record_id = db.Column(db.Integer, nullable=False)


class WorkflowsRecordSources(db.Model):

__tablename__ = "workflows_record_sources"
__table_args__ = (
db.PrimaryKeyConstraint('record_id', 'source'),
)
source = db.Column(db.Text, default="", nullable=False)
record_id = db.Column(
UUIDType,
db.ForeignKey("records_metadata.id", ondelete='CASCADE'),
primary_key=True,
nullable=False,
)
json = db.Column(
JSONType().with_variant(
postgresql.JSON(none_as_null=True),
'postgresql',
),
default=lambda: dict(),
nullable=True
)
40 changes: 39 additions & 1 deletion inspirehep/modules/workflows/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@
import requests
import urllib3
from flask import current_app
from invenio_db import db

from .models import WorkflowsAudit
from .models import WorkflowsAudit, WorkflowsRecordSources


LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -177,3 +178,40 @@ def download_file_to_workflow(workflow, name, url):
req.raw.decode_content = True
workflow.files[name] = req.raw
return workflow.files[name]


def store_root_json(record_uuid, source, json):
"""Store the root json for a given source.
Given the ``record_uuid``, the ``source`` information (e.g. `arXiv` or
`Elsevier`) and the actual record in ``data`` store this information so
that it can be retrieved later.
"""
with db.session.begin_nested():
record_source = WorkflowsRecordSources.query.filter(
WorkflowsRecordSources.record_id == record_uuid,
WorkflowsRecordSources.source == source
).one_or_none()
if record_source is None:
record_source = WorkflowsRecordSources(
source=source,
json=json,
record_id=record_uuid
)
else:
record_source.json = json
db.session.add(record_source)


def retrieve_root_json(record_uuid, source):
"""Retrieve the root json for a given source.
Given a previously matched ``record_uuid``, the ``source`` information
(e.g. `arXiv` or `Elsevier`) returns the original record if existing or
empty json object otherwise.
"""
entry = WorkflowsRecordSources.query.filter(
WorkflowsRecordSources.record_id == record_uuid,
WorkflowsRecordSources.source == source
).one_or_none()
return entry.json if entry else {}
65 changes: 65 additions & 0 deletions tests/integration/workflows/test_workflows_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

from __future__ import absolute_import, division, print_function

import pytest

from invenio_db import db

from inspirehep.modules.records.api import InspireRecord
from inspirehep.modules.workflows.utils import store_root_json, retrieve_root_json


@pytest.fixture()
def dummy_record(small_app):
record = InspireRecord.create({
"$schema": "http://localhost:5000/schemas/records/hep.json",
"titles": [{
"title": "foo"
}],
"document_type": ["thesis"]
})
yield record
record._delete(force=True)


def test_store_and_retrieve_of_root_json(dummy_record):
record_uuid = dummy_record.id
original_root = {"title": "baz"}
store_root_json(record_uuid=record_uuid, source='arXiv', json=original_root)
db.session.commit()
retrieved_root = retrieve_root_json(record_uuid=record_uuid, source='arXiv')

assert original_root == retrieved_root

updated_root = {"title": "bar"}
store_root_json(record_uuid=record_uuid, source='arXiv', json=updated_root)
db.session.commit()
retrieved_root = retrieve_root_json(record_uuid=record_uuid, source='arXiv')

assert updated_root == retrieved_root

retrieved_root = retrieve_root_json(record_uuid=record_uuid, source='Elsevier')
expected_root = {}

assert expected_root == {}

0 comments on commit b9c7bb5

Please sign in to comment.