diff --git a/inspirehep/modules/workflows/actions/hep_approval.py b/inspirehep/modules/workflows/actions/hep_approval.py index 29288e1a71..20da305482 100644 --- a/inspirehep/modules/workflows/actions/hep_approval.py +++ b/inspirehep/modules/workflows/actions/hep_approval.py @@ -63,7 +63,6 @@ def resolve(obj, *args, **kwargs): obj.extra_data["approved"] = approved obj.remove_action() - obj.extra_data["user_action"] = value obj.extra_data["upload_pdf"] = upload_pdf obj.extra_data["core"] = value == "accept_core" diff --git a/inspirehep/modules/workflows/actions/merge_approval.py b/inspirehep/modules/workflows/actions/merge_approval.py index 5bc410fd4c..432920eb19 100644 --- a/inspirehep/modules/workflows/actions/merge_approval.py +++ b/inspirehep/modules/workflows/actions/merge_approval.py @@ -32,4 +32,9 @@ class MergeApproval(object): @staticmethod def resolve(obj, *args, **kwargs): """Resolve the action taken in the approval action.""" - pass + obj.extra_data["approved"] = True + obj.extra_data["conflicts_solved_by_curator"] = True + obj.remove_action() + obj.save() + obj.continue_workflow(delayed=True) + return True diff --git a/inspirehep/modules/workflows/workflows/article.py b/inspirehep/modules/workflows/workflows/article.py index 99b14e572e..5785323fc5 100644 --- a/inspirehep/modules/workflows/workflows/article.py +++ b/inspirehep/modules/workflows/workflows/article.py @@ -27,7 +27,6 @@ from workflow.patterns.controlflow import ( IF, IF_ELSE, - IF_NOT, ) from inspire_dojson.hep import hep2marc @@ -43,7 +42,6 @@ add_core, halt_record, is_record_relevant, - in_production_mode, is_record_accepted, reject_record, is_experimental_paper, @@ -54,6 +52,12 @@ refextract, submission_fulltext_download, ) +from inspirehep.modules.workflows.tasks.merging import ( + merge_articles, + put_root_in_extra_data, + update_record, + store_root, +) from inspirehep.modules.workflows.tasks.classifier import ( classify_paper, filter_core_keywords, @@ -200,21 +204,6 @@ ] -CHECK_IF_SUBMISSION_AND_ASK_FOR_APPROVAL = [ - IF_ELSE( - is_record_relevant, - [halt_record( - action="hep_approval", - message="Submission halted for curator approval.", - )], - [ - reject_record("Article automatically rejected"), - stop_processing - ] - ), -] - - NOTIFY_NOT_ACCEPTED = [ IF( is_submission, @@ -234,6 +223,7 @@ context_factory=reply_ticket_context ), close_ticket(ticket_id_key="ticket_id"), + mark('stop', True) ] @@ -287,22 +277,6 @@ ), ] -CHECK_IF_MERGE_AND_STOP_IF_SO = [ - IF( - is_marked('is-update'), - [ - IF_ELSE( - is_submission, - NOTIFY_ALREADY_EXISTING, - [ - # halt_record(action="merge_approval"), - delete_self_and_stop_processing, - ] - ), - ] - ) -] - ADD_MARKS = [ IF( @@ -327,6 +301,65 @@ ] +MERGE_IF_UPDATE = [ + put_root_in_extra_data, + IF( + is_marked('is-update'), + [ + merge_articles, + mark('merged', True) + # TODO: save record with new non-conflicting merged fields + ], + ), +] + + +STOP_IF_EXISTING_SUBMISSION = [ + IF( + is_submission, + IF( + is_marked('is-update'), + NOTIFY_ALREADY_EXISTING + ) + ) +] + + +HALT_FOR_APPROVAL = [ + IF_ELSE( + is_record_relevant, + [ + IF_ELSE( + article_exists, + halt_record( + action="merge_approval", + message="Submission halted for curator approval.", + ), + halt_record( + action="hep_approval", + message="Submission halted for curator approval.", + ), + ) + ], + # record not relevant + [ + reject_record("Article automatically rejected"), + stop_processing + ] + ) +] + + +STORE_RECORD_AND_ROOT = [ + IF_ELSE( + is_marked('is-update'), + update_record, + store_record + ), + store_root, +] + + class Article(object): """Article ingestion workflow for Literature collection.""" name = "HEP" @@ -340,23 +373,17 @@ class Article(object): ADD_MARKS + DELETE_AND_STOP_IF_NEEDED + ENHANCE_RECORD + - # TODO: Once we have a way to resolve merges, we should - # use that instead of stopping - CHECK_IF_MERGE_AND_STOP_IF_SO + - CHECK_IF_SUBMISSION_AND_ASK_FOR_APPROVAL + + STOP_IF_EXISTING_SUBMISSION + + MERGE_IF_UPDATE + + HALT_FOR_APPROVAL + [ IF_ELSE( is_record_accepted, ( POSTENHANCE_RECORD + + STORE_RECORD_AND_ROOT + SEND_TO_LEGACY_AND_WAIT + - NOTIFY_USER_OR_CURATOR + - [ - # TODO: once legacy is out, this should become - # unconditional, and remove the SEND_TO_LEGACY_AND_WAIT - # steps - IF_NOT(in_production_mode, [store_record]), - ] + NOTIFY_USER_OR_CURATOR ), NOTIFY_NOT_ACCEPTED, ), diff --git a/tests/integration/workflows/conftest.py b/tests/integration/workflows/conftest.py index b7da47e227..7d4efc02cf 100644 --- a/tests/integration/workflows/conftest.py +++ b/tests/integration/workflows/conftest.py @@ -23,49 +23,23 @@ from __future__ import absolute_import, division, print_function import os -import sys - +import re import pytest +import requests_mock +import sys from invenio_db import db -from invenio_workflows import workflow_object_class from inspirehep.factory import create_app -from inspirehep.modules.workflows.models import ( - WorkflowsAudit, - WorkflowsPendingRecord, -) - +from inspirehep.modules.fixtures.collections import init_collections +from inspirehep.modules.fixtures.files import init_all_storage_paths +from inspirehep.modules.fixtures.users import init_users_and_permissions # Use the helpers folder to store test helpers. # See: http://stackoverflow.com/a/33515264/374865 sys.path.append(os.path.join(os.path.dirname(__file__), 'helpers')) -@pytest.fixture(autouse=True) -def cleanup_workflows_tables(small_app): - """Delete the contents of the workflow tables after each test. - - .. deprecated:: 2017-09-18 - Tests that need to clean up should do so explicitly. - """ - with small_app.app_context(): - obj_types = ( - WorkflowsAudit.query.all(), - WorkflowsPendingRecord.query.all(), - workflow_object_class.query(), - ) - for obj_type in obj_types: - for obj in obj_type: - obj.delete() - - db.session.commit() - - _es = small_app.extensions['invenio-search'] - list(_es.delete(ignore=[404])) - list(_es.create(ignore=[404])) - - @pytest.fixture def workflow_app(): """Flask application with no records and function scope. @@ -85,8 +59,71 @@ def workflow_app(): 'http://localhost:1234' ), MAGPIE_API_URL="http://example.com/magpie", + WORKFLOWS_MATCH_REMOTE_SERVER_URL="http://legacy_search.endpoint/", WTF_CSRF_ENABLED=False, ) with app.app_context(): yield app + + +def drop_all(app): + db.drop_all() + _es = app.extensions['invenio-search'] + list(_es.delete(ignore=[404])) + + +def create_all(app): + db.create_all() + _es = app.extensions['invenio-search'] + list(_es.create(ignore=[400])) + + init_all_storage_paths() + init_users_and_permissions() + init_collections() + + +@pytest.fixture(autouse=True) +def cleanup_workflows(workflow_app): + db.session.close_all() + drop_all(app=workflow_app) + create_all(app=workflow_app) + + +@pytest.fixture +def mocked_external_services(workflow_app): + with requests_mock.Mocker() as requests_mocker: + requests_mocker.register_uri( + requests_mock.ANY, + re.compile('.*(indexer|localhost).*'), + real_http=True, + ) + requests_mocker.register_uri( + 'POST', + re.compile( + 'https?://localhost:1234.*', + ), + text=u'[INFO]', + status_code=200, + ) + requests_mocker.register_uri( + requests_mock.ANY, + re.compile( + '.*' + + workflow_app.config['WORKFLOWS_MATCH_REMOTE_SERVER_URL'] + + '.*' + ), + status_code=200, + json=[], + ) + requests_mocker.register_uri( + requests_mock.ANY, + re.compile( + '.*' + + workflow_app.config['BEARD_API_URL'] + + '/text/phonetic_blocks.*' + ), + status_code=200, + json={'phonetic_blocks': {}}, + ) + yield diff --git a/tests/integration/workflows/fixtures/1511.01097 b/tests/integration/workflows/fixtures/1511.01097 deleted file mode 100644 index 1ebdf5c952..0000000000 Binary files a/tests/integration/workflows/fixtures/1511.01097 and /dev/null differ diff --git a/tests/integration/workflows/fixtures/1511.01097v1.pdf b/tests/integration/workflows/fixtures/1511.01097v1.pdf deleted file mode 100644 index df7db56582..0000000000 Binary files a/tests/integration/workflows/fixtures/1511.01097v1.pdf and /dev/null differ diff --git a/tests/integration/workflows/fixtures/1705.02541 b/tests/integration/workflows/fixtures/1705.02541 new file mode 100644 index 0000000000..8244cfa7c6 Binary files /dev/null and b/tests/integration/workflows/fixtures/1705.02541 differ diff --git a/tests/integration/workflows/fixtures/1705.02541.pdf b/tests/integration/workflows/fixtures/1705.02541.pdf new file mode 100644 index 0000000000..d28435203a Binary files /dev/null and b/tests/integration/workflows/fixtures/1705.02541.pdf differ diff --git a/tests/integration/workflows/fixtures/merger_head.json b/tests/integration/workflows/fixtures/merger_head.json new file mode 100644 index 0000000000..dd4fd60b58 --- /dev/null +++ b/tests/integration/workflows/fixtures/merger_head.json @@ -0,0 +1,348 @@ +{ + "$schema": "http://localhost:5000/schemas/records/hep.json", + "_collections": [ + "Literature" + ], + "_files": [ + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:52cb0a0ed6e662eb823327a42c5d6e9b", + "key": "1705.02541.pdf", + "size": 2806666, + "version_id": "1043bcee-dd8a-4047-b60b-7952de1c1cac" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:b225eb2916d88b261cb5f4538e5ced55", + "key": "1705.02541.tar.gz", + "size": 2213267, + "version_id": "42fd3671-c31f-4543-af99-17d794098526" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:16775d1d276f4afa1743bf39a91fc142", + "key": "LuWu_Density", + "size": 140, + "version_id": "51ad5938-f9f1-4f10-8816-1523c526afbc" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:41ddcc333d7ea991f18619183eb4944b", + "key": "sq_BK_density", + "size": 141, + "version_id": "5841885a-c9be-48fe-8ad3-1b7786ba0a55" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:cd0d7854d644663b84ca7faa46bb4e38", + "key": "sq_PB_w16_roots", + "size": 143, + "version_id": "b8f89ff8-9e69-4eb6-b80a-427f218c9719" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:2aec26e18fb162931f984fad57305cba", + "key": "8bis-no-titles", + "size": 142, + "version_id": "a3c68e57-91a3-4afb-9ada-15ae13352dbf" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:00cc4ef2b90133f075bb0efb4b8c365d", + "key": "equmod_L8", + "size": 137, + "version_id": "2481cdd6-c622-4927-8bc2-0a1e8b1cca76" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:02ac4e4d5ab376b141a6b327ea520730", + "key": "sq_BK_w22_inner_dens", + "size": 148, + "version_id": "9afda232-d1a9-4f85-bc22-6737a544ee34" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:b2dd5c2ed3ce0a02fde2bad06277d2a3", + "key": "equmod_P0_L12", + "size": 141, + "version_id": "6ce586a5-a89f-4360-ad39-417fa5ba19a0" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:5b5f0d8de1ec99a9e54c39fc0fb9ec1f", + "key": "sq_BK_w22_inner", + "size": 143, + "version_id": "e79efbf3-f350-445e-90aa-16485fbbb094" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:e9026732a8d25639f9eeda6854f2ce4c", + "key": "sq_BK_w20_roots", + "size": 143, + "version_id": "2d933753-ca11-4465-b32e-8909cc2f579e" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:f4821dce9b9db66d2c0e083049eeb5d6", + "key": "6bis-no-titles", + "size": 142, + "version_id": "fbe57fd9-ff85-458d-98d5-038d0c38e149" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:d30334cc2af846cc9209169b3829be06", + "key": "equmod_P0_L8", + "size": 140, + "version_id": "444bcc4d-d7c6-442f-a6fe-895f97110a84" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:89834c873f7307d2a66ef0d20bbb1342", + "key": "sq_BK_w22_roots_field", + "size": 149, + "version_id": "df127a9a-2cc4-47c6-a23e-f5b92a6ce81b" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:c4acf2e5f7ad3b05f4f18fca5234ac0d", + "key": "sq_CF_w20_roots", + "size": 143, + "version_id": "c6659840-a149-4403-ae8c-8050c5e0d3b1" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:616677d92283017707f401a76966490c", + "key": "equmod_P0_L10", + "size": 141, + "version_id": "fe2a3a6c-b53d-4e95-ad8e-48eab984926a" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:4f5b38f45a3b1eeec08983f47465b6dd", + "key": "HS_CF_w40_roots", + "size": 143, + "version_id": "05a0a5cf-ca59-480e-9101-26f69f5edeb8" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:fd0ffabb89ae61628f5839cb68ccd268", + "key": "6bis_zoom", + "size": 137, + "version_id": "967579b0-db22-404d-8c2b-dd5c039a9bb7" + }, + { + "bucket": "c2658e45-6d9c-41b7-bd6d-b16cddb561d6", + "checksum": "md5:b947af7c21c65bf4153cd4f4be1eb571", + "key": "HH_CF_w39_roots", + "size": 143, + "version_id": "ac57f1d6-d962-42cc-bb0f-2c6fb9d2ad3a" + } + ], + "abstracts": [ + { + "source": "arXiv", + "value": "We discuss the implications of studies of partition function zeros and equimodular curves for the analytic properties of the Ising model on a square lattice in a magnetic field. In particular we consider the dense set of singularities in the susceptibility of the Ising model at $H=0$ found by Nickel and its relation to the analyticity of the field theory computations of Fonseca and Zamolodchikov. Curator change.\n" + } + ], + "accelerator_experiments": [ + { + "legacy_name": "CERN-LHC-ALICE" + } + ], + "acquisition_source": { + "datetime": "2017-05-11T08:50:25.184741", + "method": "hepcrawl", + "source": "arXiv", + "submission_number": "db9325b2362611e78bfd0242ac12000b" + }, + "arxiv_eprints": [ + { + "categories": [ + "math-ph", + "cond-mat.stat-mech", + "math.MP" + ], + "value": "1705.02541" + } + ], + "authors": [ + { + "affiliations": [ + { + "curated_relation": true, + "record": { + "$ref": "http://localhost:5000/api/institutions/902725" + }, + "value": "CERN" + } + ], + "full_name": "Assis, Mathieu", + "signature_block": "ASm", + "uuid": "e3b0a6e6-5950-41c4-ba8a-76cd597cb0d5" + }, + { + "affiliations": [ + { + "curated_relation": true, + "record": { + "$ref": "http://localhost:5000/api/institutions/902725" + }, + "value": "CERN" + } + ], + "full_name": "Jacobsen, J.L.", + "signature_block": "JACABSANj", + "uuid": "3f4cc9b7-ade5-4b9c-885c-f20bba88f5e2" + }, + { + "affiliations": [ + { + "curated_relation": true, + "record": { + "$ref": "http://localhost:5000/api/institutions/902725" + }, + "value": "C" + } + ], + "full_name": "Jensen, I.", + "signature_block": "JANSANi", + "uuid": "7b11df9d-6ac8-44f4-93bd-1003b3dff38e" + }, + { + "affiliations": [ + { + "curated_relation": true, + "record": { + "$ref": "http://localhost:5000/api/institutions/902725" + }, + "value": "CER" + } + ], + "full_name": "Maillard, J-M.", + "signature_block": "MALADj", + "uuid": "1d5ed399-9577-4d53-be4b-1e2279a3a493" + }, + { + "affiliations": [ + { + "curated_relation": true, + "record": { + "$ref": "http://localhost:5000/api/institutions/902725" + }, + "value": "CERN" + } + ], + "full_name": "McCoy, B.M.", + "signature_block": "MCYb", + "uuid": "3a75404c-664c-4369-8ced-3fd9e9050c19" + } + ], + "citeable": true, + "collaborations": [ + { + "value": "ALICE" + } + ], + "control_number": 1, + "core": true, + "document_type": [ + "article" + ], + "dois": [ + { + "value": "10.0001/test_doi" + } + ], + "inspire_categories": [ + { + "source": "arxiv", + "term": "Math and Math Physics" + }, + { + "source": "arxiv", + "term": "General Physics" + }, + { + "term": "Instrumentation" + } + ], + "keywords": [ + { + "source": "magpie", + "value": "Ising model" + }, + { + "source": "magpie", + "value": "partition function" + }, + { + "source": "magpie", + "value": "lattice field theory" + }, + { + "source": "magpie", + "value": "analytic properties" + }, + { + "source": "magpie", + "value": "lattice" + }, + { + "source": "magpie", + "value": "singularity" + }, + { + "source": "magpie", + "value": "dimension: 2" + }, + { + "schema": "INSPIRE", + "source": "", + "value": "whatever\n" + } + ], + "license": [ + { + "license": "arXiv-1.0", + "url": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/" + } + ], + "number_of_pages": 21, + "preprint_date": "2017-05-06", + "public_notes": [ + { + "source": "arXiv", + "value": "21 pages, 13 figures" + }, + { + "value": "*Temporary entry*" + }, + { + "value": "This is a test public note by the curator\n" + } + ], + "refereed": true, + "report_numbers": [ + { + "source": "hepcrawl", + "value": "LPTENS/17/12" + }, + { + "value": "CURATOR-001" + } + ], + "titles": [ + { + "source": "arXiv", + "title": "Analyticity of the Ising curation: An interpretation" + } + ], + "documents": [ + { + "key": "1_1705.02541.pdf", + "original_url": "http://export.arxiv.org/pdf/1705.02541", + "source": "arxiv", + "fulltext": true + } + ] +} diff --git a/tests/integration/workflows/fixtures/merger_root.json b/tests/integration/workflows/fixtures/merger_root.json new file mode 100644 index 0000000000..b9f2db0158 --- /dev/null +++ b/tests/integration/workflows/fixtures/merger_root.json @@ -0,0 +1,241 @@ +{ + "$schema": "http://localhost:5000/schemas/records/hep.json", + "_collections": ["Literature"], + "_files": [ + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:52cb0a0ed6e662eb823327a42c5d6e9b", + "key": "1705.02541.pdf", + "size": 2806666, + "version_id": "72f78ea9-84d8-4908-8227-0f500feb609b" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:b225eb2916d88b261cb5f4538e5ced55", + "key": "1705.02541.tar.gz", + "size": 2213267, + "version_id": "b65d63b8-190c-40aa-ac65-5e1c7734b562" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:164fc5e2686d91d096a4c872c2de064c", + "description": "00000 Partition function zeros for the $22\\times 22$ lattice with Brascamp-Kunz boundary conditions on the inner loop in the plane $y=u^2x^{1/2}$ for $x=1.0,~0.99,~0.98,~0.95.~0.90,~0.80$", + "key": "sq_BK_w22_inner", + "size": 143, + "version_id": "6a553746-ff9f-4ceb-a04a-cec285b0de27" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:2c84bddce537eca1a70b99a69b60a8c6", + "description": "00001 The nearest neighbor density of zeros (\\ref{bkdensity}) of the $22\\times 22$ lattice with Brascamp-Kunz boundary conditions in the plane $y=u^2x^{1/2}$ for $x=0.94,~0.90,~0.80,~0.50,~0.10,~0.01$ versus the the index $j$ .", + "key": "sq_BK_w22_inner_dens", + "size": 148, + "version_id": "3cb965f6-ebce-4651-8130-c53b4fb9e784" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:cc94a7188ea307a28b137df0bea5cf19", + "description": "00002 Equimodular curves in the $u$ plane for $x=0.99$ of $T_C(L_h)$ for $L_h=6$ on the left and $L_h=8$ on the right. Red is for singlet-singlet crossings, green is for singlet-doublet and blue is for doublet-doublet", + "key": "6bis-no-titles", + "size": 142, + "version_id": "289bac58-207a-43cc-89a1-03e47c9b283e" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:7c6315f3edaf03e6a6866109e9e3b0d6", + "description": "00003 Equimodular curves in the $u$ plane for $x=0.99$ of $T_C(L_h)$ for $L_h=6$ on the left and $L_h=8$ on the right. Red is for singlet-singlet crossings, green is for singlet-doublet and blue is for doublet-doublet", + "key": "8bis-no-titles", + "size": 142, + "version_id": "4f993d5b-1e8b-414a-b1ca-ad74fc98e303" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:b7e4074df17aafdaab321d98b8c22ef5", + "description": "00004 The equimodular curves in the $u$ plane for $T_C(L_h)$ for $L_h=8$. On the left all eigenvalues are considered and on the right the restriction to the momentum sector $P=0$ is made. The sectors where $\\lambda_{+}$ is dominant is marked by $+$ and the sector where $\\lambda_{-}$ is dominant is marked by a circle. The multiplicity of the crossings on the curves are indicated by colors. On left panel:red=2, green=3, black=4, blue=8, yellow=16, purple=32, brown=64 On right panel: red=2, green=4, blue=8, brown =3, black=9.", + "key": "equmod_L8", + "size": 137, + "version_id": "0a6b366f-6809-4235-8fef-739113c6a7a4" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:5fc8807d6308fdaf8dd216c0a5074841", + "description": "00005 Plots of the scale dependent density $g(\\alpha;a)_N$ for the Brascamp-Kunz zeros as a function of the angle $\\alpha/\\pi$ for the $20\\times 20$ lattice on the left and the $100\\times 100$ lattice on the right. In the first row $a=1,$ in the second row $a=[L^{1/2}]$ and in the third row $a=L=N^{1/2}$. This limiting density (\\ref{lwdensity}) of \\cite{luwu} is shown in red.", + "key": "sq_BK_density", + "size": 141, + "version_id": "03eb4d4e-508c-4c3e-8c32-0b749b224dd9" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:55dc88aeecf9776013791a26cb335bce", + "description": "00006 Equimodular curves in the $u$ plane for $x=0.99$ expanded near $u=i$ for $T_c(L_h)$ with $L_h=8$. Red is for singlet-singlet crossings, green is for singlet-doublet and blue is for doublet-doublet", + "key": "6bis_zoom", + "size": 137, + "version_id": "33ef5f17-78d7-4160-b284-e0a74e2cb9f6" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:5abb9a717bec1c9c371f60a26b5698f0", + "description": "00007 The density $g(\\alpha)$ of Lu and Wu \\cite{luwu}.", + "key": "LuWu_Density", + "size": 140, + "version_id": "631871ca-faf2-4a22-8115-e24b1d911244" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:a22dd5e86c6829cfd3dfd8e90cbf3eac", + "description": "00008 Brascamp-Kunz zeros in the plane $y=u^2x^{1/2}$ on the $22\\times 22$ lattice for values of $x=0.99,~0.90,~0.50,~0.10,~0.01,~0.0001$.", + "key": "sq_BK_w22_roots_field", + "size": 149, + "version_id": "4b824772-161c-479b-8189-19fc5b26e100" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:973cdbd515213bebf0f45041bcceeffd", + "description": "00009 The equimodular curves in the $u$ plane for $T_C(L_h)$ for $L_h=8$. On the left all eigenvalues are considered and on the right the restriction to the momentum sector $P=0$ is made. The sectors where $\\lambda_{+}$ is dominant is marked by $+$ and the sector where $\\lambda_{-}$ is dominant is marked by a circle. The multiplicity of the crossings on the curves are indicated by colors. On left panel:red=2, green=3, black=4, blue=8, yellow=16, purple=32, brown=64 On right panel: red=2, green=4, blue=8, brown =3, black=9.", + "key": "equmod_P0_L8", + "size": 140, + "version_id": "c70a0d91-b597-457d-b501-73fe989286ae" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:0ff8ba5245b2266503bd552bb3c2d42e", + "description": "00010 The zeros in the plane of $y=ux^{1/4}$ for the $16\\times 17$ lattice with toroidal boundary conditions for $x=1.0,~0.9,~0.5,~0.1,~0.01,~0.001.$", + "key": "sq_PB_w16_roots", + "size": 143, + "version_id": "7d79d73c-5b5d-47c3-86c2-bffba88f937c" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:c93dac1ab364373502ac9f121ac983a6", + "description": "00011 Comparison in the complex fugacity plane $z$ of the zeros of the partition function with cylindrical boundary of hard squares on the $40\\times 40$ lattice to hard hexagons on the $39\\times 39$ lattice taken from Figure 2 of ref. \\cite{mccoy1}.", + "key": "HS_CF_w40_roots", + "size": 143, + "version_id": "d676e67b-c0e3-4c21-b6c8-995d3e4b5ff2" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:137bf00011891dca5658507b7e694f2b", + "description": "00012 The equimodular curves in the $u$ plane for $T_C(L_h)$ at $P=0$ for $L_h=10$ on the left and 12 on the right. Red indicates a multiplicity of 2, green of 4 and blue of 8. For $L=10$ the sequence of multiplicities on the upper (antiferromagnetic) sequence (increasing towards $u=i$) is 2,4,8,4,18,24 and the lower (ferromagnetic) sequence 2,2,4,4,8,8,18,28. For $L=12$ the upper sequence 2,4,8,2,18,18,52,84 and the lower sequence is 2,2,4,4,8,8,18,26,52,88", + "key": "equmod_P0_L12", + "size": 141, + "version_id": "09da8dd7-ae48-43ac-8b67-3e7adcf715b9" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:9aba2c07af823bf1bd9455c48592206c", + "description": "00013 Zeros of the isotropic Ising model partition function at $H=0~(x=1)$ with Brascamp-Kunz boundary conditions for the $20\\times 20$ lattice. The full $s$ plane is plotted on the left. On the right the zeros are plotted in the $u$ plane; the zeros are on the two circles $u=\\pm 1+2^{1/2}e^{i\\theta}$ and only the first quadrant is shown.", + "key": "sq_BK_w20_roots", + "size": 143, + "version_id": "0dd9f7af-02d2-4fd0-b152-a3a195bf3554" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:55425363806822c1fc5ef37416bbeb8b", + "description": "00014 The zeros in the $y=ux^{1/4}$ plane for the $20\\times 20$ lattice with cylindrical boundary conditions for $x=1.0,~0.5,~0.1, ~0.01,~0.001,~0.0001,~0.00001,~0.000001.$", + "key": "sq_CF_w20_roots", + "size": 143, + "version_id": "9b61737a-34e2-441f-842f-4178a7176ddf" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:c913ce3bcc62ef43a88e603cef96b7f5", + "description": "00015 The equimodular curves in the $u$ plane for $T_C(L_h)$ at $P=0$ for $L_h=10$ on the left and 12 on the right. Red indicates a multiplicity of 2, green of 4 and blue of 8. For $L=10$ the sequence of multiplicities on the upper (antiferromagnetic) sequence (increasing towards $u=i$) is 2,4,8,4,18,24 and the lower (ferromagnetic) sequence 2,2,4,4,8,8,18,28. For $L=12$ the upper sequence 2,4,8,2,18,18,52,84 and the lower sequence is 2,2,4,4,8,8,18,26,52,88", + "key": "equmod_P0_L10", + "size": 141, + "version_id": "e5f2c336-1f55-4318-aea6-069224f3d008" + }, + { + "bucket": "81e83c90-8f6d-4db4-9571-4b01bbb8a5f9", + "checksum": "md5:32ac6df8ac4e4be53ed79ae6ec72bedc", + "description": "00016 Comparison in the complex fugacity plane $z$ of the zeros of the partition function with cylindrical boundary of hard squares on the $40\\times 40$ lattice to hard hexagons on the $39\\times 39$ lattice taken from Figure 2 of ref. \\cite{mccoy1}.", + "key": "HH_CF_w39_roots", + "size": 143, + "version_id": "6f33ab15-29a0-447e-af90-19b0165cf081" + } + ], + "abstracts": [ + { + "source": "arXiv", + "value": "We discuss the implications of studies of partition function zeros and equimodular curves for the analytic properties of the Ising model on a square lattice in a magnetic field. In particular we consider the dense set of singularities in the susceptibility of the Ising model at $H=0$ found by Nickel and its relation to the analyticity of the field theory computations of Fonseca and Zamolodchikov." + } + ], + "acquisition_source": { + "datetime": "2017-05-11T08:50:25.184741", + "method": "hepcrawl", + "source": "arXiv", + "submission_number": "db9325b2362611e78bfd0242ac12000b" + }, + "arxiv_eprints": [ + { + "categories": [ + "math-ph", + "cond-mat.stat-mech", + "math.MP" + ], + "value": "1705.02541" + } + ], + "authors": [ + { + "affiliations": [], + "full_name": "Assis, M." + }, + { + "affiliations": [], + "full_name": "Jacobsen, J.L." + }, + { + "affiliations": [], + "full_name": "Jensen, I." + }, + { + "affiliations": [], + "full_name": "Maillard, J-M." + }, + { + "affiliations": [], + "full_name": "McCoy, B.M." + } + ], + "citeable": true, + "document_type": [ + "article" + ], + "inspire_categories": [ + { + "source": "arxiv", + "term": "Math and Math Physics" + }, + { + "source": "arxiv", + "term": "General Physics" + } + ], + "license": [ + { + "license": "arXiv-1.0", + "url": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/" + } + ], + "number_of_pages": 20, + "preprint_date": "2017-05-06", + "public_notes": [ + { + "source": "arXiv", + "value": "21 pages, 13 figures" + } + ], + "report_numbers": [ + { + "source": "hepcrawl", + "value": "LPTENS/17/12" + } + ], + "titles": [ + { + "source": "arXiv", + "title": "Analyticity of the Ising susceptibility: An interpretation" + } + ], + "control_number": 1 +} diff --git a/tests/integration/workflows/fixtures/merger_update.json b/tests/integration/workflows/fixtures/merger_update.json new file mode 100644 index 0000000000..a47df50507 --- /dev/null +++ b/tests/integration/workflows/fixtures/merger_update.json @@ -0,0 +1,183 @@ +{ + "preprint_date": "2017-05-06", + "report_numbers": [ + { + "source": "hepcrawl", + "value": "LPTENS/17/12" + } + ], + "acquisition_source": { + "source": "arXiv", + "datetime": "2017-05-11T08:50:25.184741", + "method": "hepcrawl", + "submission_number": "db9325b2362611e78bfd0242ac12000b" + }, + "license": [ + { + "url": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/", + "license": "arXiv-1.0" + } + ], + "public_notes": [ + { + "source": "arXiv", + "value": "21 pages, 13 figures" + } + ], + "number_of_pages": 23, + "_files": [ + { + "key": "1705.02541.pdf", + "size": 2806666 + }, + { + "key": "1705.02541.tar.gz", + "size": 2213267 + }, + { + "description": "00000 Partition function zeros for the $22\\times 22$ lattice with Brascamp-Kunz boundary conditions on the inner loop in the plane $y=u^2x^{1/2}$ for $x=1.0,~0.99,~0.98,~0.95.~0.90,~0.80$", + "key": "sq_BK_w22_inner", + "size": 143 + }, + { + "description": "00001 The nearest neighbor density of zeros (\\ref{bkdensity}) of the $22\\times 22$ lattice with Brascamp-Kunz boundary conditions in the plane $y=u^2x^{1/2}$ for $x=0.94,~0.90,~0.80,~0.50,~0.10,~0.01$ versus the the index $j$ .", + "key": "sq_BK_w22_inner_dens", + "size": 148 + }, + { + "description": "00002 Equimodular curves in the $u$ plane for $x=0.99$ of $T_C(L_h)$ for $L_h=6$ on the left and $L_h=8$ on the right. Red is for singlet-singlet crossings, green is for singlet-doublet and blue is for doublet-doublet", + "key": "6bis-no-titles", + "size": 142 + }, + { + "description": "00003 Equimodular curves in the $u$ plane for $x=0.99$ of $T_C(L_h)$ for $L_h=6$ on the left and $L_h=8$ on the right. Red is for singlet-singlet crossings, green is for singlet-doublet and blue is for doublet-doublet", + "key": "8bis-no-titles", + "size": 142 + }, + { + "description": "00004 The equimodular curves in the $u$ plane for $T_C(L_h)$ for $L_h=8$. On the left all eigenvalues are considered and on the right the restriction to the momentum sector $P=0$ is made. The sectors where $\\lambda_{+}$ is dominant is marked by $+$ and the sector where $\\lambda_{-}$ is dominant is marked by a circle. The multiplicity of the crossings on the curves are indicated by colors. On left panel:red=2, green=3, black=4, blue=8, yellow=16, purple=32, brown=64 On right panel: red=2, green=4, blue=8, brown =3, black=9.", + "key": "equmod_L8", + "size": 137 + }, + { + "description": "00005 Plots of the scale dependent density $g(\\alpha;a)_N$ for the Brascamp-Kunz zeros as a function of the angle $\\alpha/\\pi$ for the $20\\times 20$ lattice on the left and the $100\\times 100$ lattice on the right. In the first row $a=1,$ in the second row $a=[L^{1/2}]$ and in the third row $a=L=N^{1/2}$. This limiting density (\\ref{lwdensity}) of \\cite{luwu} is shown in red.", + "key": "sq_BK_density", + "size": 141 + }, + { + "description": "00006 Equimodular curves in the $u$ plane for $x=0.99$ expanded near $u=i$ for $T_c(L_h)$ with $L_h=8$. Red is for singlet-singlet crossings, green is for singlet-doublet and blue is for doublet-doublet", + "key": "6bis_zoom", + "size": 137 + }, + { + "description": "00007 The density $g(\\alpha)$ of Lu and Wu \\cite{luwu}.", + "key": "LuWu_Density", + "size": 140 + }, + { + "description": "00008 Brascamp-Kunz zeros in the plane $y=u^2x^{1/2}$ on the $22\\times 22$ lattice for values of $x=0.99,~0.90,~0.50,~0.10,~0.01,~0.0001$.", + "key": "sq_BK_w22_roots_field", + "size": 149 + }, + { + "description": "00009 The equimodular curves in the $u$ plane for $T_C(L_h)$ for $L_h=8$. On the left all eigenvalues are considered and on the right the restriction to the momentum sector $P=0$ is made. The sectors where $\\lambda_{+}$ is dominant is marked by $+$ and the sector where $\\lambda_{-}$ is dominant is marked by a circle. The multiplicity of the crossings on the curves are indicated by colors. On left panel:red=2, green=3, black=4, blue=8, yellow=16, purple=32, brown=64 On right panel: red=2, green=4, blue=8, brown =3, black=9.", + "key": "equmod_P0_L8", + "size": 140 + }, + { + "description": "00010 The zeros in the plane of $y=ux^{1/4}$ for the $16\\times 17$ lattice with toroidal boundary conditions for $x=1.0,~0.9,~0.5,~0.1,~0.01,~0.001.$", + "key": "sq_PB_w16_roots", + "size": 143 + }, + { + "description": "00011 Comparison in the complex fugacity plane $z$ of the zeros of the partition function with cylindrical boundary of hard squares on the $40\\times 40$ lattice to hard hexagons on the $39\\times 39$ lattice taken from Figure 2 of ref. \\cite{mccoy1}.", + "key": "HS_CF_w40_roots", + "size": 143 + }, + { + "description": "00012 The equimodular curves in the $u$ plane for $T_C(L_h)$ at $P=0$ for $L_h=10$ on the left and 12 on the right. Red indicates a multiplicity of 2, green of 4 and blue of 8. For $L=10$ the sequence of multiplicities on the upper (antiferromagnetic) sequence (increasing towards $u=i$) is 2,4,8,4,18,24 and the lower (ferromagnetic) sequence 2,2,4,4,8,8,18,28. For $L=12$ the upper sequence 2,4,8,2,18,18,52,84 and the lower sequence is 2,2,4,4,8,8,18,26,52,88", + "key": "equmod_P0_L12", + "size": 141 + }, + { + "description": "00013 Zeros of the isotropic Ising model partition function at $H=0~(x=1)$ with Brascamp-Kunz boundary conditions for the $20\\times 20$ lattice. The full $s$ plane is plotted on the left. On the right the zeros are plotted in the $u$ plane; the zeros are on the two circles $u=\\pm 1+2^{1/2}e^{i\\theta}$ and only the first quadrant is shown.", + "key": "sq_BK_w20_roots", + "size": 143 + }, + { + "description": "00014 The zeros in the $y=ux^{1/4}$ plane for the $20\\times 20$ lattice with cylindrical boundary conditions for $x=1.0,~0.5,~0.1, ~0.01,~0.001,~0.0001,~0.00001,~0.000001.$", + "key": "sq_CF_w20_roots", + "size": 143 + }, + { + "description": "00015 The equimodular curves in the $u$ plane for $T_C(L_h)$ at $P=0$ for $L_h=10$ on the left and 12 on the right. Red indicates a multiplicity of 2, green of 4 and blue of 8. For $L=10$ the sequence of multiplicities on the upper (antiferromagnetic) sequence (increasing towards $u=i$) is 2,4,8,4,18,24 and the lower (ferromagnetic) sequence 2,2,4,4,8,8,18,28. For $L=12$ the upper sequence 2,4,8,2,18,18,52,84 and the lower sequence is 2,2,4,4,8,8,18,26,52,88", + "key": "equmod_P0_L10", + "size": 141 + }, + { + "description": "00016 Comparison in the complex fugacity plane $z$ of the zeros of the partition function with cylindrical boundary of hard squares on the $40\\times 40$ lattice to hard hexagons on the $39\\times 39$ lattice taken from Figure 2 of ref. \\cite{mccoy1}.", + "key": "HH_CF_w39_roots", + "size": 143 + } + ], + "inspire_categories": [ + { + "source": "arxiv", + "term": "Math and Math Physics" + }, + { + "source": "arxiv", + "term": "General Physics" + } + ], + "authors": [ + { + "affiliations": [], + "full_name": "Assis, M." + }, + { + "affiliations": [], + "full_name": "Jacobsen, J.L." + }, + { + "affiliations": [], + "full_name": "Jensen, I." + }, + { + "affiliations": [], + "full_name": "Maillard, J-M." + }, + { + "affiliations": [], + "full_name": "McCoy, B.M." + } + ], + "titles": [ + { + "source": "arXiv", + "title": "Analyticity of the Ising susceptibility: An interpretation" + } + ], + "arxiv_eprints": [ + { + "categories": [ + "math-ph", + "cond-mat.stat-mech", + "math.MP" + ], + "value": "1705.02541" + } + ], + "$schema": "http://localhost:5000/schemas/records/hep.json", + "_collections": ["Literature"], + "document_type": [ + "article" + ], + "abstracts": [ + { + "source": "arXiv", + "value": "We discuss the implications of studies of partition function zeros and equimodular curves for the analytic properties of the Ising model on a square lattice in a magnetic field. In particular we consider the dense set of singularities in the susceptibility of the Ising model at $H=0$ found by Nickel and its relation to the analyticity of the field theory computations of Fonseca and Zamolodchikov." + } + ], + "citeable": true +} diff --git a/tests/integration/workflows/helpers/mocks.py b/tests/integration/workflows/helpers/mocks.py index 6b5c47c456..fc337672e2 100644 --- a/tests/integration/workflows/helpers/mocks.py +++ b/tests/integration/workflows/helpers/mocks.py @@ -34,7 +34,8 @@ def fake_download_file(workflow, name, url): workflow.files[name] = pkg_resources.resource_stream( __name__, os.path.join( - '../fixtures', + os.pardir, + 'fixtures', '1407.7587v1' ) ) @@ -43,11 +44,32 @@ def fake_download_file(workflow, name, url): workflow.files[name] = pkg_resources.resource_stream( __name__, os.path.join( - '../fixtures', + os.pardir, + 'fixtures', '1407.7587v1.pdf', ) ) return workflow.files[name] + if url == 'http://export.arxiv.org/e-print/1705.02541': + workflow.files[name] = pkg_resources.resource_stream( + __name__, + os.path.join( + os.pardir, + 'fixtures', + '1705.02541' + ) + ) + return workflow.files[name] + elif url == 'http://export.arxiv.org/pdf/1705.02541': + workflow.files[name] = pkg_resources.resource_stream( + __name__, + os.path.join( + os.pardir, + 'fixtures', + '1705.02541.pdf', + ) + ) + return workflow.files[name] raise Exception("Download file not mocked!") diff --git a/tests/integration/workflows/test_arxiv_workflow.py b/tests/integration/workflows/test_arxiv_workflow.py index 5bf01f72d1..e19cb0c192 100644 --- a/tests/integration/workflows/test_arxiv_workflow.py +++ b/tests/integration/workflows/test_arxiv_workflow.py @@ -24,11 +24,18 @@ from __future__ import absolute_import, division, print_function +import datetime +import json import mock -import re -import requests_mock +import os +import pkg_resources +import pytest +from dojson.contrib.marc21.utils import create_record + +from invenio_search import current_search_client as es from invenio_db import db +from invenio_records.models import RecordMetadata from invenio_workflows import ( ObjectStatus, WorkflowEngine, @@ -48,13 +55,105 @@ fake_beard_api_request, fake_magpie_api_request, ) + +from inspire_dojson.hep import hep + +from inspirehep.modules.pidstore.minters import inspire_recid_minter + +from inspirehep.modules.records.api import InspireRecord + +from inspirehep.modules.workflows.utils import convert + + from utils import get_halted_workflow +from inspirehep.modules.workflows.tasks.merging import ( + _insert_wf_record_source, + _get_match_recid +) + +from inspirehep.modules.migrator.tasks import record_insert_or_replace + + +def read_file(test_dir, file_name): + base_dir = os.path.dirname(os.path.realpath(__file__)) + with open(os.path.join(base_dir, test_dir, file_name)) as f: + return json.loads(f.read()) + + +def create_head_record(): + head = read_file('fixtures', 'merger_head.json') + record = InspireRecord.create(head) + inspire_recid_minter(str(record.id), record) + return record.id + + +@pytest.fixture +def record(): + """Provide record fixture.""" + record_oai_arxiv_plots = pkg_resources.resource_string( + __name__, + os.path.join( + 'fixtures', + 'oai_arxiv_record_with_plots.xml' + ) + ) + # Convert to MARCXML, then dict, then HEP JSON + record_oai_arxiv_plots_marcxml = convert( + record_oai_arxiv_plots, + "oaiarXiv2marcxml.xsl" + ) + record_marc = create_record(record_oai_arxiv_plots_marcxml) + json_data = hep.do(record_marc) + + if 'preprint_date' in json_data: + json_data['preprint_date'] = datetime.date.today().isoformat() + + return json_data + + +@pytest.fixture +def to_accept_record(): + """Provide record fixture.""" + record_oai_arxiv_plots = pkg_resources.resource_string( + __name__, + os.path.join( + 'fixtures', + 'oai_arxiv_record_to_accept.xml' + ) + ) + # Convert to MARCXML, then dict, then HEP JSON + record_oai_arxiv_plots_marcxml = convert( + record_oai_arxiv_plots, + "oaiarXiv2marcxml.xsl" + ) + record_marc = create_record(record_oai_arxiv_plots_marcxml) + json_data = hep.do(record_marc) + + return json_data + + +@mock.patch( + 'inspirehep.modules.workflows.tasks.arxiv.is_pdf_link' +) +def run_workflow(cls, app, record, extra_config=None): + extra_config = extra_config or {} + with mock.patch.dict(app.config, extra_config): + workflow_uuid = start('article', [record]) + + eng = WorkflowEngine.from_uuid(workflow_uuid) + obj = eng.processed_objects[0] + + return obj.id @mock.patch( 'inspirehep.modules.workflows.utils.download_file_to_workflow', side_effect=fake_download_file, ) +@mock.patch( + 'inspirehep.modules.workflows.tasks.arxiv.download_file_to_workflow', + side_effect=fake_download_file, +) @mock.patch( 'inspirehep.modules.workflows.tasks.beard.json_api_request', side_effect=fake_beard_api_request, @@ -68,50 +167,56 @@ return_value=[], ) def test_harvesting_arxiv_workflow_manual_rejected( - mocked_refextract_extract_refs, - mocked_api_request_magpie, - mocked_api_request_beard, mocked_download, - small_app, + mocked_arxiv_download, + mocked_api_request_beard, + mocked_api_request_magpie, + mocked_refextract_extract_refs, + workflow_app, + mocked_external_services, ): """Test a full harvesting workflow.""" + record = generate_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } - workflow_uuid = None - with small_app.app_context(): - workflow_uuid, eng, obj = get_halted_workflow( - app=small_app, - extra_config=extra_config, - record=record, - ) + workflow_uuid, eng, obj = get_halted_workflow( + app=workflow_app, + extra_config=extra_config, + record=record, + ) - # Now let's resolve it as accepted and continue - # FIXME Should be accept, but record validation prevents us. - obj.remove_action() - obj.extra_data["approved"] = False - # obj.extra_data["core"] = True - obj.save() + obj.remove_action() + obj.extra_data["approved"] = False + obj.save() - db.session.commit() + db.session.commit() - eng = WorkflowEngine.from_uuid(workflow_uuid) - obj = eng.processed_objects[0] - obj_id = obj.id - obj.continue_workflow() + eng = WorkflowEngine.from_uuid(workflow_uuid) + obj = eng.processed_objects[0] + obj_id = obj.id + obj.continue_workflow() - obj = workflow_object_class.get(obj_id) - # It was rejected - assert obj.status == ObjectStatus.COMPLETED + obj = workflow_object_class.get(obj_id) + # It was rejected + assert obj.status == ObjectStatus.COMPLETED + + +def fake_is_pdf_url(url): + return True @mock.patch( - 'inspirehep.modules.workflows.utils.download_file_to_workflow', + 'inspirehep.modules.workflows.tasks.arxiv.download_file_to_workflow', side_effect=fake_download_file, ) +@mock.patch( + 'inspirehep.modules.workflows.tasks.arxiv.is_pdf_link', + side_effect=fake_is_pdf_url +) @mock.patch( 'inspirehep.modules.workflows.tasks.beard.json_api_request', side_effect=fake_beard_api_request, @@ -125,11 +230,13 @@ def test_harvesting_arxiv_workflow_manual_rejected( return_value=[], ) def test_harvesting_arxiv_workflow_already_on_legacy( - mocked_refextract_extract_refs, - mocked_api_request_magpie, - mocked_api_request_beard, mocked_download, - small_app + mocked_is_pdf, + mocked_api_request_beard, + mocked_api_request_magpie, + mocked_refextract_extract_refs, + workflow_app, + mocked_external_services ): """Test a full harvesting workflow.""" extra_config = { @@ -137,8 +244,8 @@ def test_harvesting_arxiv_workflow_already_on_legacy( "MAGPIE_API_URL": "http://example.com/magpie", } - with small_app.app_context(): - with mock.patch.dict(small_app.config, extra_config): + with workflow_app.app_context(): + with mock.patch.dict(workflow_app.config, extra_config): workflow_uuid = start( 'article', [ @@ -186,53 +293,98 @@ def test_harvesting_arxiv_workflow_manual_accepted( mocked_download_utils, mocked_download_arxiv, workflow_app, + mocked_external_services, ): record = generate_record() """Test a full harvesting workflow.""" - with requests_mock.Mocker() as requests_mocker: - requests_mocker.register_uri( - requests_mock.ANY, - re.compile('.*(indexer|localhost).*'), - real_http=True, - ) - requests_mocker.register_uri( - 'POST', - re.compile( - 'https?://localhost:1234.*', - ), - text=u'[INFO]', - status_code=200, - ) - workflow_uuid, eng, obj = get_halted_workflow( - app=workflow_app, - extra_config={'PRODUCTION_MODE': False}, - record=record, - ) + workflow_uuid, eng, obj = get_halted_workflow( + app=workflow_app, + extra_config={'PRODUCTION_MODE': False}, + record=record, + ) - do_accept_core( - app=workflow_app, - workflow_id=obj.id, - ) + do_accept_core( + app=workflow_app, + workflow_id=obj.id, + ) - eng = WorkflowEngine.from_uuid(workflow_uuid) - obj = eng.processed_objects[0] - assert obj.status == ObjectStatus.WAITING + eng = WorkflowEngine.from_uuid(workflow_uuid) + obj = eng.processed_objects[0] + assert obj.status == ObjectStatus.WAITING - response = do_robotupload_callback( - app=workflow_app, - workflow_id=obj.id, - recids=[12345], - ) - assert response.status_code == 200 + response = do_robotupload_callback( + app=workflow_app, + workflow_id=obj.id, + recids=[12345], + ) + assert response.status_code == 200 - obj = workflow_object_class.get(obj.id) - assert obj.status == ObjectStatus.WAITING + obj = workflow_object_class.get(obj.id) + assert obj.status == ObjectStatus.WAITING - response = do_webcoll_callback(app=workflow_app, recids=[12345]) - assert response.status_code == 200 + response = do_webcoll_callback(app=workflow_app, recids=[12345]) + assert response.status_code == 200 - eng = WorkflowEngine.from_uuid(workflow_uuid) - obj = eng.processed_objects[0] - # It was accepted - assert obj.status == ObjectStatus.COMPLETED + eng = WorkflowEngine.from_uuid(workflow_uuid) + obj = eng.processed_objects[0] + # It was accepted + assert obj.status == ObjectStatus.COMPLETED + + +@mock.patch( + 'inspirehep.modules.workflows.tasks.arxiv.download_file_to_workflow', + side_effect=fake_download_file, +) +@mock.patch( + 'inspirehep.modules.workflows.tasks.beard.json_api_request', + side_effect=fake_beard_api_request, +) +@mock.patch( + 'inspirehep.modules.workflows.tasks.magpie.json_api_request', + side_effect=fake_magpie_api_request, +) +def test_merge_with_already_existing_article_in_the_db( + mocked_download_arxiv, + mocked_api_request_beard, + mocked_api_request_magpie, + workflow_app, + mocked_external_services, +): + head = record_insert_or_replace(read_file('fixtures', 'merger_head.json')) + es.indices.refresh('records-hep') + + _insert_wf_record_source( + record_uuid=head.id, + source='arXiv', + json=read_file('fixtures', 'merger_root.json'), + ) + es.indices.refresh('records-hep') + update = read_file('fixtures', 'merger_update.json') + + obj_id = run_workflow( + app=workflow_app, + extra_config={ + 'ARXIV_CATEGORIES_ALREADY_HARVESTED_ON_LEGACY': [], + 'PRODUCTION_MODE': False, + }, + record=update, + ) + + do_accept_core( + app=workflow_app, + workflow_id=obj_id, + ) + + obj = workflow_object_class.get(obj_id) + + response = do_robotupload_callback( + app=workflow_app, + workflow_id=obj_id, + recids=[_get_match_recid(obj)], + ) + + assert response.status_code == 200 + assert obj.status == ObjectStatus.COMPLETED + assert obj.extra_data['is-update'] is True + assert obj.extra_data['merged'] is True diff --git a/tests/integration/workflows/test_audit.py b/tests/integration/workflows/test_audit.py index 076af75c58..91b321eedf 100644 --- a/tests/integration/workflows/test_audit.py +++ b/tests/integration/workflows/test_audit.py @@ -33,10 +33,10 @@ from inspirehep.modules.workflows.utils import log_workflows_action -def test_audit(small_app): +def test_audit(workflow_app): user_id = None workflow_id = None - with small_app.app_context(): + with workflow_app.app_context(): user = User(email="test@example.com", active=True) user.password = "test" db.session.add(user) @@ -47,7 +47,7 @@ def test_audit(small_app): user_id = user.id workflow_id = workflows_object.id - with small_app.app_context(): + with workflow_app.app_context(): logging_info = { 'object_id': workflow_id, 'user_id': user_id, @@ -73,7 +73,7 @@ def test_audit(small_app): relevance_prediction = dict( max_score=0.222113, decision="Rejected" ) - with small_app.app_context(): + with workflow_app.app_context(): log_workflows_action( action="accept_core", relevance_prediction=relevance_prediction, diff --git a/tests/integration/workflows/test_hep_approval.py b/tests/integration/workflows/test_hep_approval.py index 3dc76e90f3..53807273f2 100644 --- a/tests/integration/workflows/test_hep_approval.py +++ b/tests/integration/workflows/test_hep_approval.py @@ -51,7 +51,7 @@ def workflow(): db.session.commit() -def test_resolve_accept(small_app, workflow): +def test_resolve_accept(workflow_app, workflow): args = { 'request_data': { 'value': 'accept', @@ -70,7 +70,7 @@ def test_resolve_accept(small_app, workflow): assert workflow.extra_data == expected -def test_resolve_accept_core(small_app, workflow): +def test_resolve_accept_core(workflow_app, workflow): args = { 'request_data': { 'value': 'accept_core' @@ -89,7 +89,7 @@ def test_resolve_accept_core(small_app, workflow): assert workflow.extra_data == expected -def test_resolve_rejected(small_app, workflow): +def test_resolve_rejected(workflow_app, workflow): args = { 'request_data': { 'value': 'rejected', @@ -109,7 +109,7 @@ def test_resolve_rejected(small_app, workflow): assert workflow.extra_data == expected -def test_resolve_attach_pdf(small_app, workflow): +def test_resolve_attach_pdf(workflow_app, workflow): args = { 'request_data': { 'value': 'accept', @@ -138,7 +138,7 @@ def test_resolve_attach_pdf(small_app, workflow): assert 'fulltext.pdf' in [doc['key'] for doc in workflow.data['documents']] -def test_resolve_remove_pdf(small_app, workflow): +def test_resolve_remove_pdf(workflow_app, workflow): args = { 'request_data': { 'value': 'accept',