From 5fb53116c9ee83a5c76b4cf9d0793adbd0230e12 Mon Sep 17 00:00:00 2001 From: Victor Balbuena Date: Wed, 13 Feb 2019 10:23:30 +0100 Subject: [PATCH 1/2] tohep: remove validating Signed-off-by: Victor Balbuena --- hepcrawl/tohep.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/hepcrawl/tohep.py b/hepcrawl/tohep.py index adfac13d..2c4dd52f 100644 --- a/hepcrawl/tohep.py +++ b/hepcrawl/tohep.py @@ -172,11 +172,7 @@ def hepcrawl_to_hep(crawler_record): Returns: - dict: The hep formatted (and validated) record. - - Raises: - Exception: if there was a validation error (the exact class depends on - :class:`inspire_schemas.api.validate`). + dict: The hep formatted record. """ def _filter_affiliation(affiliations): @@ -355,6 +351,4 @@ def _filter_affiliation(affiliations): url=document['url'], ) - builder.validate_record() - return builder.record From 7f7b62dfdf1a0c598820eb084b8789d44a1c2b8e Mon Sep 17 00:00:00 2001 From: Victor Balbuena Date: Wed, 13 Feb 2019 15:23:57 +0100 Subject: [PATCH 2/2] APS: handle page number 0 in APS Signed-off-by: Victor Balbuena --- hepcrawl/spiders/aps_spider.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hepcrawl/spiders/aps_spider.py b/hepcrawl/spiders/aps_spider.py index 681b94ce..2d6e4650 100644 --- a/hepcrawl/spiders/aps_spider.py +++ b/hepcrawl/spiders/aps_spider.py @@ -138,7 +138,8 @@ def _parse_json_on_failure(self, failure): doi = get_value(article, 'identifiers.doi', default='') record.add_dois(dois_values=[doi]) - record.add_value('page_nr', str(article.get('numPages', ''))) + if article.get('numPages', -1) > 0: + record.add_value('page_nr', str(article.get('numPages', ''))) record.add_value('abstract', get_value(article, 'abstract.value', default='')) record.add_value('title', get_value(article, 'title.value', default=''))