diff --git a/hepcrawl/scrapy.cfg b/hepcrawl/scrapy.cfg index 1ec7711e..adffa153 100644 --- a/hepcrawl/scrapy.cfg +++ b/hepcrawl/scrapy.cfg @@ -14,7 +14,7 @@ default = hepcrawl.settings [deploy] -url = http://localhost:6800/ +url = http://scrapyd:6800/ project = hepcrawl #username = scrapy #password = secret diff --git a/hepcrawl/settings.py b/hepcrawl/settings.py index c5c11c4a..025e7186 100644 --- a/hepcrawl/settings.py +++ b/hepcrawl/settings.py @@ -73,11 +73,12 @@ 'hepcrawl.middlewares.HepcrawlCrawlOnceMiddleware': 100, } -DOWNLOAD_HANDLERS_BASE = dict(default_settings.DOWNLOAD_HANDLERS_BASE) -DOWNLOAD_HANDLERS_BASE.update({ +# Configure custom downloaders +# See https://doc.scrapy.org/en/0.20/topics/settings.html#download-handlers +DOWNLOAD_HANDLERS = { 'oaipmh+http': 'hepcrawl.downloaders.DummyDownloadHandler', 'oaipmh+https': 'hepcrawl.downloaders.DummyDownloadHandler', -}) +} # Enable or disable downloader middlewares # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html diff --git a/hepcrawl/spiders/arxiv_spider.py b/hepcrawl/spiders/arxiv_spider.py index 82e086fb..64d076dc 100644 --- a/hepcrawl/spiders/arxiv_spider.py +++ b/hepcrawl/spiders/arxiv_spider.py @@ -46,7 +46,7 @@ class ArxivSpider(StatefulSpider, XMLFeedSpider): """ name = 'arXiv' - iterator = 'iternodes' + iterator = 'xml' itertag = 'OAI-PMH:record' namespaces = [ ("OAI-PMH", "http://www.openarchives.org/OAI/2.0/") diff --git a/hepcrawl/spiders/cds_spider.py b/hepcrawl/spiders/cds_spider.py index 25ff0071..c11d3d77 100644 --- a/hepcrawl/spiders/cds_spider.py +++ b/hepcrawl/spiders/cds_spider.py @@ -34,7 +34,7 @@ class CDSSpider(OAIPMHSpider): Using OAI-PMH XML files:: $ scrapy crawl CDS \\ - -a "set=forINSPIRE" -a "from_date=2017-10-10" + -a "oai_set=forINSPIRE" -a "from_date=2017-10-10" It uses `HarvestingKit `_ to translate from CDS's MARCXML into INSPIRE Legacy's MARCXML flavor. It then @@ -44,8 +44,13 @@ class CDSSpider(OAIPMHSpider): name = 'CDS' - def __init__(self, from_date=None, set="forINSPIRE", *args, **kwargs): - super(CDSSpider, self).__init__(url='http://cds.cern.ch/oai2d', metadata_prefix='marcxml', set=set, from_date=from_date, **kwargs) + def __init__(self, from_date=None, oai_set="forINSPIRE", *args, **kwargs): + super(CDSSpider, self).__init__( + url='http://cds.cern.ch/oai2d', + metadata_prefix='marcxml', + oai_set=oai_set, + from_date=from_date, + **kwargs) def parse_record(self, record): response = XmlResponse(self.url, encoding='utf-8', body=record.raw) @@ -65,6 +70,8 @@ def parse_record(self, record): ) with app.app_context(): json_record = hep.do(record) + base_uri = self.settings['SCHEMA_BASE_URI'] + json_record['$schema'] = base_uri + 'hep.json' return ParsedItem(record=json_record, record_format='hep') except Exception: logger.exception("Error when parsing record") diff --git a/hepcrawl/spiders/oaipmh_spider.py b/hepcrawl/spiders/oaipmh_spider.py index 375799e9..3bd429e1 100644 --- a/hepcrawl/spiders/oaipmh_spider.py +++ b/hepcrawl/spiders/oaipmh_spider.py @@ -10,7 +10,7 @@ """Generic spider for OAI-PMH servers.""" import logging -import sickle +from enum import Enum from datetime import datetime from sickle import Sickle @@ -22,6 +22,19 @@ logger = logging.getLogger(__name__) + +class _Granularity(Enum): + DATE = 'YYYY-MM-DD' + SECOND = 'YYYY-MM-DDThh:mm:ssZ' + + def format(self, datetime_object): + if self == self.DATE: + return datetime_object.strftime('%Y-%m-%d') + if self == self.SECOND: + return datetime_object.strftime('%Y-%m-%dT%H:%M:%SZ') + raise ValueError("Invalid granularity: %s" % self.granularity) + + class OAIPMHSpider(Spider): """ Implements a spider for the OAI-PMH protocol by using the Python sickle library. @@ -31,12 +44,15 @@ class OAIPMHSpider(Spider): """ name = 'OAI-PMH' state = {} + granularity = _Granularity.DATE - def __init__(self, url, metadata_prefix='marcxml', set=None, alias=None, from_date=None, until_date=None, granularity='YYYY-MM-DD', record_class=Record, *args, **kwargs): + def __init__(self, url, metadata_prefix='marcxml', oai_set=None, alias=None, + from_date=None, until_date=None, granularity='', + record_class=Record, *args, **kwargs): super(OAIPMHSpider, self).__init__(*args, **kwargs) self.url = url self.metadata_prefix = metadata_prefix - self.set = set + self.set = oai_set self.granularity = granularity self.alias = alias or self._make_alias() self.from_date = from_date @@ -47,7 +63,9 @@ def __init__(self, url, metadata_prefix='marcxml', set=None, alias=None, from_da def start_requests(self): self.from_date = self.from_date or self.state.get(self.alias) logger.info("Current state 2:{}".format(self.state)) - logger.info("Starting harvesting of {url} with set={set} and metadataPrefix={metadata_prefix}, from={from_date}, until={until_date}".format( + logger.info("Starting harvesting of {url} with set={set} and " + "metadataPrefix={metadata_prefix}, from={from_date}, " + "until={until_date}".format( url=self.url, set=self.set, metadata_prefix=self.metadata_prefix, @@ -57,7 +75,7 @@ def start_requests(self): now = datetime.utcnow() request = Request('oaipmh+{}'.format(self.url), self.parse) yield request - self.state[self.alias] = self._format_date(now) + self.state[self.alias] = self.granularity.format(now) logger.info("Harvesting completed. Next harvesting will resume from {}".format(self.state[self.alias])) def parse_record(self, record): @@ -84,14 +102,6 @@ def parse(self, response): for record in records: yield self.parse_record(record) - def _format_date(self, datetime_object): - if self.granularity == 'YYYY-MM-DD': - return datetime_object.strftime('%Y-%m-%d') - elif self.granularity == 'YYYY-MM-DDThh:mm:ssZ': - return datetime_object.strftime('%Y-%m-%dT%H:%M:%SZ') - else: - raise RuntimeError("Invalid granularity: %s" % self.granularity) - def _make_alias(self): return '{url}-{metadata_prefix}-{set}'.format( url=self.url, diff --git a/tests/functional/cds/fixtures/cds.xml b/tests/functional/cds/fixtures/cds.xml new file mode 100644 index 00000000..9bec8576 --- /dev/null +++ b/tests/functional/cds/fixtures/cds.xml @@ -0,0 +1,1480 @@ + + + +2017-12-07T15:05:26Zhttp://cds.cern.ch/oai2d +
oai:cds.cern.ch:12007522017-11-16T08:09:30Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1200752 + SzGeCERN + 20171116090930.0 + + oai:cds.cern.ch:1200752 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509577 + + + eng + + + Dubus, G + Grenoble Observ. + + + High and very high energy gamma-ray emission from binaries + + + 2009 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS MQW7-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + 018 + PoS + MQW7 + 2008 + + + http://cds.cern.ch/record/1200752/files/MQW7_018.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090817 + + + 1129423 + 018 + izmir20080901 + + + PUBLIC + + + 002842486CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12007532017-11-16T08:09:30Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1200753 + SzGeCERN + 20171116090930.0 + + oai:cds.cern.ch:1200753 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509578 + + + eng + + + Dubois, R + SLAC + + + GLAST: Launched and Being Commissioned - Status and Prospects for Microquasars + + + Fermi: Launched and Being Commissioned - Status and Prospects for Microquasars + Other title + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS MQW7-2009 + + + No authors + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + for the Fermi LAT Collaboration + + + 019 + PoS + MQW7 + 2008 + + + http://cds.cern.ch/record/1200753/files/MQW7_019.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090817 + + + 1129423 + 019 + izmir20080901 + + + PUBLIC + + + 002842487CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12007542017-11-16T08:09:30Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1200754 + SzGeCERN + 20171116090930.0 + + oai:cds.cern.ch:1200754 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509579 + + + eng + + + Romero, G E + Villa Elisa, Inst. Argentino Radioastron. + La Plata U. + + + Hadronic models of high-energy radiation from microquasars: recent developments + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS MQW7-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + 020 + PoS + MQW7 + 2008 + + + http://cds.cern.ch/record/1200754/files/MQW7_020.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090817 + + + 1129423 + 020 + izmir20080901 + + + PUBLIC + + + 002842488CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12032802017-11-16T08:09:52Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203280 + SzGeCERN + 20171116090952.0 + + oai:cds.cern.ch:1203280 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509595 + + + eng + + + Guess, C J + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + Studying matrix elements for the neutrinoless double beta decay of 150Nd via the 150Sm(t,3He)150Pm* and 150Nd(3He,t)150Pm* reactions + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Austin, S M + Michigan State U., NSCL + Michigan State U., JINA + + + Bazin, D + Michigan State U., NSCL + + + Brown, B A + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + Caesar, C + Michigan State U., NSCL + Mainz U. + + + Deaven, J M + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + Herlitzius, C + Michigan State U., NSCL + Mainz U. + + + Hitt, G W + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + Meharchand, R T + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + Perdikakis, G + Michigan State U., NSCL + Michigan State U., JINA + + + Shimbara, Y + Niigata U., Grad. Sch. Sci. Tech. + + + Tur, C + Michigan State U., NSCL + Michigan State U., JINA + + + Zegers, R G T + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + 104 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203280/files/NIC20X_104.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 104 + mackinacisland20080727 + + + PUBLIC + + + 002844587CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12032812017-11-16T08:09:55Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203281 + SzGeCERN + 20171116090955.0 + + oai:cds.cern.ch:1203281 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509596 + + + eng + + + Jachowicz, N + Ghent U. + + + Untangling supernova-neutrino oscillations with beta-beam data + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + McLaughlin, G C + North Carolina State U. + + + Volpe, C + Orsay, IPN + + + 107 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203281/files/NIC20X_107.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 107 + mackinacisland20080727 + + + PUBLIC + + + 002844588CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033612017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203361 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203361 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509597 + + + eng + + + Kawagoe, S + Tokyo U. + + + Neutrino oscillations in non-spherical supernova explosions + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Takiwaki, T + Tokyo U. + + + Kotake, K + Natl. Astron. Observ. of Japan + + + 109 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203361/files/NIC20X_109.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 109 + mackinacisland20080727 + + + PUBLIC + + + 002844668CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033622017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203362 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203362 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509598 + + + eng + + + Nakazato, K + Waseda U. + + + Neutrino Emission from Stellar Collapse including Hadron-Quark Mixed Phase + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Sumiyoshi, K + Numazu Coll. Tech. + + + Yamada, s + Waseda U. + + + 116 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203362/files/NIC20X_116.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 116 + mackinacisland20080727 + + + PUBLIC + + + 002844669CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033632017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203363 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203363 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509599 + + + eng + + + Sumiyoshi, K + Numazu Coll. Tech. + + + Short neutrino burst from failed supernovae as a probe of dense matter with hyperon mixture + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Ishizuka, C + Hokkaido U. + + + Ohnishi, A + Kyoto U., Yukawa Inst., Kyoto + + + Yamada, S + Waseda U. + + + Suzuki, H + Tokyo U. of Sci. + + + 122 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203363/files/NICX_122.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 122 + mackinacisland20080727 + + + PUBLIC + + + 002844670CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033642017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203364 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203364 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509600 + + + eng + + + Suzuki, T + Tokyo U. + + + Neutrino Nucleus Reactions and Nucleosynthesis in Stars + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Yoshida, T + Natl. Astron. Observ. of Japan + + + Chiba, S + JAEA, Ibaraki + + + Honma, M + Aizu U. + + + Higashiyama, K + Chiba Inst. Tech. + + + Umeda, H + Tokyo U. + + + Nomoto, K + Tokyo U. + + + Kajino, T + Tokyo U. + Natl. Astron. Observ. of Japan + + + Otsuka, T + Tokyo U. + + + 123 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203364/files/NICX_123.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 123 + mackinacisland20080727 + + + PUBLIC + + + 002844671CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033652017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203365 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203365 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509601 + + + eng + + + Whitehouse, S + Basel U. + + + Neutrino transport in 3D simulations of core-collapse supernovae + + + A new approach to neutrino transport in 3D simulations of core-collapse supernovae + Other title + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Liebendörfer, M + Basel U. + + + 243 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203365/files/NICX_243.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 243 + mackinacisland20080727 + + + PUBLIC + + + 002844672CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033662017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203366 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203366 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509602 + + + eng + + + Arcones, A + Damstadt, Tech. Hochsch. + Darmstadt, GSI + + + Neutrino-driven winds and nucleosynthesis + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Martínez-Pinedo, G + Darmstadt, GSI + + + Schwenk, A + TRIUMF + + + O’Connor, E + TRIUMF + Caltech + + + Langanke, K + Damstadt, Tech. Hochsch. + Darmstadt, GSI + + + Horowitz, C J + Indiana U. + + + Janka, H T + Garching, Max Planck Inst. + + + 128 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203366/files/NIC20X_128.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 128 + mackinacisland20080727 + + + PUBLIC + + + 002844673CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033672017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203367 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203367 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509603 + + + eng + + + Roberts, L + UC, Santa Cruz, Astron. Astrophys. + + + Nucleosynthesis in the Neutrino Driven Wind of Protoneutron Stars + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Woosley, S + UC, Santa Cruz, Astron. Astrophys. + + + Heger, A + Minnesota U. + + + Hoffman, R + LLNL, Livermore + + + 146 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203367/files/NICX_146.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 146 + mackinacisland20080727 + + + PUBLIC + + + 002844674CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033692017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203369 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203369 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509604 + + + eng + + + Kojima, K + Tokyo U. + Natl. Astron. Observ. of Japan + + + Neutrino effect in cosmology with the primordial magnetic field + + + Neutrino effects in cosmology with A primordial magnetic field + Other title + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Ichiki, K + Nagoya U. + + + Kajino, T + Tokyo U. + Natl. Astron. Observ. of Japan + + + Mathews, G J + Notre Dame U. + Natl. Astron. Observ. of Japan + + + 226 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203369/files/NICX_226.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 226 + mackinacisland20080727 + + + PUBLIC + + + 002844676CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033702017-11-16T08:09:47Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203370 + SzGeCERN + 20171116090947.0 + + oai:cds.cern.ch:1203370 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509605 + + + eng + + + Yamazaki, D G + Natl. Astron. Observ. of Japan + + + A Strong Constraint on the Neutrino Mass from the Formation of Large Scale Structure in the Presence of the Primordial Magnetic Field + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Ichiki, K + Tokyo U. + + + Kajino, T + Natl. Astron. Observ. of Japan + + + Mathews, G J + Notre Dame U. + + + 239 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203370/files/NICX_239.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 239 + mackinacisland20080727 + + + PUBLIC + + + 002844677CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
+
+ diff --git a/tests/functional/cds/fixtures/cds_expected.json b/tests/functional/cds/fixtures/cds_expected.json new file mode 100644 index 00000000..cfb94ced --- /dev/null +++ b/tests/functional/cds/fixtures/cds_expected.json @@ -0,0 +1,1369 @@ +[ + { + "refereed": true, + "core": true, + "preprint_date": "2009", + "documents": [ + { + "url": "http://cds.cern.ch/record/1200752/files/MQW7_018.pdf", + "key": "document" + } + ], + "citeable": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:16.980315", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "High and very high energy gamma-ray emission from binaries" + } + ], + "publication_info": [ + { + "journal_volume": "MQW7", + "page_start": "018", + "year": 2008, + "artid": "018", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Grenoble Observ." + } + ], + "full_name": "Dubus, G." + } + ], + "external_system_identifiers": [ + { + "value": "1200752", + "schema": "CDS" + }, + { + "value": "1509577", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "curated": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1200753/files/MQW7_019.pdf", + "key": "document" + } + ], + "citeable": true, + "_collections": [ + "Literature" + ], + "collaborations": [ + { + "value": "Fermi LAT" + } + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.101983", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "GLAST: Launched and Being Commissioned - Status and Prospects for Microquasars" + }, + { + "title": "Fermi: Launched and Being Commissioned - Status and Prospects for Microquasars" + } + ], + "publication_info": [ + { + "journal_volume": "MQW7", + "page_start": "019", + "year": 2008, + "artid": "019", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "SLAC" + } + ], + "full_name": "Dubois, R." + } + ], + "external_system_identifiers": [ + { + "value": "1200753", + "schema": "CDS" + }, + { + "value": "1509578", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "curated": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1200754/files/MQW7_020.pdf", + "key": "document" + } + ], + "citeable": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.153496", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Hadronic models of high-energy radiation from microquasars: recent developments" + } + ], + "publication_info": [ + { + "journal_volume": "MQW7", + "page_start": "020", + "year": 2008, + "artid": "020", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Villa Elisa, Inst. Argentino Radioastron." + }, + { + "value": "La Plata U." + } + ], + "full_name": "Romero, G. E." + } + ], + "external_system_identifiers": [ + { + "value": "1200754", + "schema": "CDS" + }, + { + "value": "1509579", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "curated": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203280/files/NIC20X_104.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.209845", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Studying matrix elements for the neutrinoless double beta decay of 150Nd via the 150Sm(t,3He)150Pm* and 150Nd(3He,t)150Pm* reactions" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "104", + "year": 2008, + "artid": "104", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Michigan U." + }, + { + "value": "Michigan State U., JINA" + } + ], + "full_name": "Guess, C. J." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Michigan State U., JINA" + } + ], + "full_name": "Austin, S. M." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + } + ], + "full_name": "Bazin, D." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Michigan U." + }, + { + "value": "Michigan State U., JINA" + } + ], + "full_name": "Brown, B. A." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Mainz U." + } + ], + "full_name": "Caesar, C." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Michigan U." + }, + { + "value": "Michigan State U., JINA" + } + ], + "full_name": "Deaven, J. M." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Mainz U." + } + ], + "full_name": "Herlitzius, C." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Michigan U." + }, + { + "value": "Michigan State U., JINA" + } + ], + "full_name": "Hitt, G. W." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Michigan U." + }, + { + "value": "Michigan State U., JINA" + } + ], + "full_name": "Meharchand, R. T." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Michigan State U., JINA" + } + ], + "full_name": "Perdikakis, G." + }, + { + "affiliations": [ + { + "value": "Niigata U., Grad. Sch. Sci. Tech." + } + ], + "full_name": "Shimbara, Y." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Michigan State U., JINA" + } + ], + "full_name": "Tur, C." + }, + { + "affiliations": [ + { + "value": "Michigan State U., NSCL" + }, + { + "value": "Michigan U." + }, + { + "value": "Michigan State U., JINA" + } + ], + "full_name": "Zegers, R. G. T." + } + ], + "external_system_identifiers": [ + { + "value": "1203280", + "schema": "CDS" + }, + { + "value": "1509595", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203281/files/NIC20X_107.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.271385", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Untangling supernova-neutrino oscillations with beta-beam data" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "107", + "year": 2008, + "artid": "107", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Ghent U." + } + ], + "full_name": "Jachowicz, N." + }, + { + "affiliations": [ + { + "value": "North Carolina State U." + } + ], + "full_name": "McLaughlin, G. C." + }, + { + "affiliations": [ + { + "value": "Orsay, IPN" + } + ], + "full_name": "Volpe, C." + } + ], + "external_system_identifiers": [ + { + "value": "1203281", + "schema": "CDS" + }, + { + "value": "1509596", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203361/files/NIC20X_109.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.324204", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Neutrino oscillations in non-spherical supernova explosions" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "109", + "year": 2008, + "artid": "109", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Tokyo U." + } + ], + "full_name": "Kawagoe, S." + }, + { + "affiliations": [ + { + "value": "Tokyo U." + } + ], + "full_name": "Takiwaki, T." + }, + { + "affiliations": [ + { + "value": "Natl. Astron. Observ. of Japan" + } + ], + "full_name": "Kotake, K." + } + ], + "external_system_identifiers": [ + { + "value": "1203361", + "schema": "CDS" + }, + { + "value": "1509597", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203362/files/NIC20X_116.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.378354", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Neutrino Emission from Stellar Collapse including Hadron-Quark Mixed Phase" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "116", + "year": 2008, + "artid": "116", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Waseda U." + } + ], + "full_name": "Nakazato, K." + }, + { + "affiliations": [ + { + "value": "Numazu Coll. Tech." + } + ], + "full_name": "Sumiyoshi, K." + }, + { + "affiliations": [ + { + "value": "Waseda U." + } + ], + "full_name": "Yamada, s." + } + ], + "external_system_identifiers": [ + { + "value": "1203362", + "schema": "CDS" + }, + { + "value": "1509598", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203363/files/NICX_122.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.431432", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Short neutrino burst from failed supernovae as a probe of dense matter with hyperon mixture" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "122", + "year": 2008, + "artid": "122", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Numazu Coll. Tech." + } + ], + "full_name": "Sumiyoshi, K." + }, + { + "affiliations": [ + { + "value": "Hokkaido U." + } + ], + "full_name": "Ishizuka, C." + }, + { + "affiliations": [ + { + "value": "Kyoto U., Yukawa Inst., Kyoto" + } + ], + "full_name": "Ohnishi, A." + }, + { + "affiliations": [ + { + "value": "Waseda U." + } + ], + "full_name": "Yamada, S." + }, + { + "affiliations": [ + { + "value": "Tokyo U. of Sci." + } + ], + "full_name": "Suzuki, H." + } + ], + "external_system_identifiers": [ + { + "value": "1203363", + "schema": "CDS" + }, + { + "value": "1509599", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203364/files/NICX_123.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.486814", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Neutrino Nucleus Reactions and Nucleosynthesis in Stars" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "123", + "year": 2008, + "artid": "123", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Tokyo U." + } + ], + "full_name": "Suzuki, T." + }, + { + "affiliations": [ + { + "value": "Natl. Astron. Observ. of Japan" + } + ], + "full_name": "Yoshida, T." + }, + { + "affiliations": [ + { + "value": "JAEA, Ibaraki" + } + ], + "full_name": "Chiba, S." + }, + { + "affiliations": [ + { + "value": "Aizu U." + } + ], + "full_name": "Honma, M." + }, + { + "affiliations": [ + { + "value": "Chiba Inst. Tech." + } + ], + "full_name": "Higashiyama, K." + }, + { + "affiliations": [ + { + "value": "Tokyo U." + } + ], + "full_name": "Umeda, H." + }, + { + "affiliations": [ + { + "value": "Tokyo U." + } + ], + "full_name": "Nomoto, K." + }, + { + "affiliations": [ + { + "value": "Tokyo U." + }, + { + "value": "Natl. Astron. Observ. of Japan" + } + ], + "full_name": "Kajino, T." + }, + { + "affiliations": [ + { + "value": "Tokyo U." + } + ], + "full_name": "Otsuka, T." + } + ], + "external_system_identifiers": [ + { + "value": "1203364", + "schema": "CDS" + }, + { + "value": "1509600", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203365/files/NICX_243.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.541467", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Neutrino transport in 3D simulations of core-collapse supernovae" + }, + { + "title": "A new approach to neutrino transport in 3D simulations of core-collapse supernovae" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "243", + "year": 2008, + "artid": "243", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Basel U." + } + ], + "full_name": "Whitehouse, S." + }, + { + "affiliations": [ + { + "value": "Basel U." + } + ], + "full_name": "Liebendörfer, M." + } + ], + "external_system_identifiers": [ + { + "value": "1203365", + "schema": "CDS" + }, + { + "value": "1509601", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203366/files/NIC20X_128.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.595372", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Neutrino-driven winds and nucleosynthesis" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "128", + "year": 2008, + "artid": "128", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Damstadt, Tech. Hochsch." + }, + { + "value": "Darmstadt, GSI" + } + ], + "full_name": "Arcones, A." + }, + { + "affiliations": [ + { + "value": "Darmstadt, GSI" + } + ], + "full_name": "Martínez-Pinedo, G." + }, + { + "affiliations": [ + { + "value": "TRIUMF" + } + ], + "full_name": "Schwenk, A." + }, + { + "affiliations": [ + { + "value": "TRIUMF" + }, + { + "value": "Caltech" + } + ], + "full_name": "O’Connor, E." + }, + { + "affiliations": [ + { + "value": "Damstadt, Tech. Hochsch." + }, + { + "value": "Darmstadt, GSI" + } + ], + "full_name": "Langanke, K." + }, + { + "affiliations": [ + { + "value": "Indiana U." + } + ], + "full_name": "Horowitz, C. J." + }, + { + "affiliations": [ + { + "value": "Garching, Max Planck Inst." + } + ], + "full_name": "Janka, H. T." + } + ], + "external_system_identifiers": [ + { + "value": "1203366", + "schema": "CDS" + }, + { + "value": "1509602", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203367/files/NICX_146.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.650283", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Nucleosynthesis in the Neutrino Driven Wind of Protoneutron Stars" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "146", + "year": 2008, + "artid": "146", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "UC, Santa Cruz, Astron. Astrophys." + } + ], + "full_name": "Roberts, L." + }, + { + "affiliations": [ + { + "value": "UC, Santa Cruz, Astron. Astrophys." + } + ], + "full_name": "Woosley, S." + }, + { + "affiliations": [ + { + "value": "Minnesota U." + } + ], + "full_name": "Heger, A." + }, + { + "affiliations": [ + { + "value": "LLNL, Livermore" + } + ], + "full_name": "Hoffman, R." + } + ], + "external_system_identifiers": [ + { + "value": "1203367", + "schema": "CDS" + }, + { + "value": "1509603", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203369/files/NICX_226.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.704338", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "Neutrino effect in cosmology with the primordial magnetic field" + }, + { + "title": "Neutrino effects in cosmology with A primordial magnetic field" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "226", + "year": 2008, + "artid": "226", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Tokyo U." + }, + { + "value": "Natl. Astron. Observ. of Japan" + } + ], + "full_name": "Kojima, K." + }, + { + "affiliations": [ + { + "value": "Nagoya U." + } + ], + "full_name": "Ichiki, K." + }, + { + "affiliations": [ + { + "value": "Tokyo U." + }, + { + "value": "Natl. Astron. Observ. of Japan" + } + ], + "full_name": "Kajino, T." + }, + { + "affiliations": [ + { + "value": "Notre Dame U." + }, + { + "value": "Natl. Astron. Observ. of Japan" + } + ], + "full_name": "Mathews, G. J." + } + ], + "external_system_identifiers": [ + { + "value": "1203369", + "schema": "CDS" + }, + { + "value": "1509604", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + }, + { + "refereed": true, + "core": true, + "preprint_date": "2008", + "documents": [ + { + "url": "http://cds.cern.ch/record/1203370/files/NICX_239.pdf", + "key": "document" + } + ], + "curated": true, + "_collections": [ + "Literature" + ], + "acquisition_source": { + "source": "CDS", + "datetime": "2017-12-07T15:54:17.758624", + "method": "hepcrawl", + "submission_number": "None" + }, + "inspire_categories": [ + { + "term": "Astrophysics" + } + ], + "titles": [ + { + "title": "A Strong Constraint on the Neutrino Mass from the Formation of Large Scale Structure in the Presence of the Primordial Magnetic Field" + } + ], + "publication_info": [ + { + "journal_volume": "NIC X", + "page_start": "239", + "year": 2008, + "artid": "239", + "journal_title": "PoS" + } + ], + "authors": [ + { + "affiliations": [ + { + "value": "Natl. Astron. Observ. of Japan" + } + ], + "full_name": "Yamazaki, D. G." + }, + { + "affiliations": [ + { + "value": "Tokyo U." + } + ], + "full_name": "Ichiki, K." + }, + { + "affiliations": [ + { + "value": "Natl. Astron. Observ. of Japan" + } + ], + "full_name": "Kajino, T." + }, + { + "affiliations": [ + { + "value": "Notre Dame U." + } + ], + "full_name": "Mathews, G. J." + } + ], + "external_system_identifiers": [ + { + "value": "1203370", + "schema": "CDS" + }, + { + "value": "1509605", + "schema": "Inspire" + } + ], + "$schema": "http://localhost/schemas/records/hep.json", + "document_type": [ + "conference paper" + ], + "citeable": true + } +] diff --git a/tests/functional/cds/fixtures/cds_smoke_records_expected.json b/tests/functional/cds/fixtures/cds_smoke_records_expected.json deleted file mode 100644 index f6f6a7f8..00000000 --- a/tests/functional/cds/fixtures/cds_smoke_records_expected.json +++ /dev/null @@ -1,153 +0,0 @@ -[ - { - "$schema": "http://localhost/schemas/records/hep.json", - "_collections": [ - "Literature" - ], - "accelerator_experiments": [ - { - "legacy_name": "CERN-SPS---" - } - ], - "acquisition_source": { - "datetime": "2017-10-04T14:07:59.746165", - "method": "hepcrawl", - "source": "CDS", - "submission_number": "None" - }, - "core": true, - "curated": true, - "corporate_author": [ - "European Organization for Nuclear Research" - ], - "documents": [ - { - "url": "http://cds.cern.ch/record/21099/files/CM-P00077286-e.pdf", - "key": "document" - }, - { - "url": "http://cds.cern.ch/record/21099/files/CM-P00078235-f.pdf", - "key": "1_document" - } - ], - "document_type": [ - "article" - ], - "external_system_identifiers": [ - { - "schema": "Inspire", - "value": "1614043" - }, - { - "schema": "ADMADM", - "value": "0003711" - }, - { - "schema": "CDS", - "value": "21099" - } - ], - "inspire_categories": [ - { - "term": "Accelerators" - } - ], - "languages": [ - "fr" - ], - "preprint_date": "1967-05-30", - "report_numbers": [ - { - "value": "CERN/0702" - }, - { - "value": "CM-P00077286-e" - }, - { - "value": "CM-P00078235-f" - } - ], - "titles": [ - { - "title": "Addendum to the Report on the Design Study of a 300 GeV Proton Synchrotron (CERN/563) (AR/Int. SG/64-15)" - }, - { - "title": "Suppl\u00e9ment au Rapport sur le projet du synchrotron \u00e0 prontons de 300 GeV (CERN/563) (Ar/Int. SG/64-15)" - } - ] - }, - { - "$schema": "http://localhost/schemas/records/hep.json", - "_collections": [ - "Literature" - ], - "accelerator_experiments": [ - { - "legacy_name": "CERN-LEP---" - } - ], - "acquisition_source": { - "datetime": "2017-10-04T14:07:59.783028", - "method": "hepcrawl", - "source": "CDS", - "submission_number": "None" - }, - "core": true, - "curated": true, - "documents": [ - { - "url": "http://cds.cern.ch/record/60936/files/CM-P00098683-f.pdf", - "key": "document" - }, - { - "url": "http://cds.cern.ch/record/60936/files/CERN-SPC-426.pdf", - "key": "1_document" - } - ], - "document_type": [ - "article" - ], - "external_system_identifiers": [ - { - "schema": "ADMADM", - "value": "0009846" - }, - { - "schema": "Inspire", - "value": "1614044" - }, - { - "schema": "CDS", - "value": "60936" - } - ], - "inspire_categories": [ - { - "term": "Accelerators" - } - ], - "languages": [ - "fr" - ], - "preprint_date": "1978-10-06", - "report_numbers": [ - { - "value": "CERN/SPC/0426" - }, - { - "value": "CM-P00095369-e" - }, - { - "value": "CM-P00098683-f" - } - ], - "titles": [ - { - "title": "LEP Studies 1979 to 1981" - }, - { - "title": "Les Etudes sur le LEP de 1979 -1981" - } - ] - } -] diff --git a/tests/functional/cds/test_cds.py b/tests/functional/cds/test_cds.py index 3b825a31..02ec2cd4 100644 --- a/tests/functional/cds/test_cds.py +++ b/tests/functional/cds/test_cds.py @@ -12,23 +12,82 @@ import pytest import requests_mock +import copy +import json from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings +from tempfile import NamedTemporaryFile +from twisted.internet import reactor -from hepcrawl.testlib.fixtures import get_test_suite_path +from hepcrawl.testlib.fixtures import ( + get_test_suite_path, + expected_json_results_from_file, +) @pytest.fixture def cds_oai_server(): with requests_mock.Mocker() as m: - m.get('http://cds.cern.ch/oai2d?from=2017-10-10&verb=ListRecords&set=forINSPIRE&metadataPrefix=marcxml', - text=open(get_test_suite_path('cds', 'fixtures', 'cds1.xml', test_suite='functional')).read()) - m.get('http://cds.cern.ch/oai2d?from=2017-10-10&verb=ListRecords&&resumptionToken=___kuYtYs', - text=open(get_test_suite_path('cds', 'fixtures', 'cds2.xml', test_suite='functional')).read()) + m.get('http://cds.cern.ch/oai2d?from=2017-11-15&verb=ListRecords&set=forINSPIRE&metadataPrefix=marcxml', + text=open(get_test_suite_path('cds', 'fixtures', 'cds.xml', test_suite='functional')).read()) yield m +def override_dynamic_fields_on_records(records): + clean_records = [] + for record in records: + clean_record = override_dynamic_fields_on_record(record) + clean_records.append(clean_record) + + return clean_records + + +def override_dynamic_fields_on_record(record): + def _override(field_key, original_dict, backup_dict, new_value): + backup_dict[field_key] = original_dict[field_key] + original_dict[field_key] = new_value + + clean_record = copy.deepcopy(record) + overriden_fields = {} + dummy_random_date = u'2017-04-03T10:26:40.365216' + + overriden_fields['acquisition_source'] = {} + _override( + field_key='datetime', + original_dict=clean_record['acquisition_source'], + backup_dict=overriden_fields['acquisition_source'], + new_value=dummy_random_date, + ) + _override( + field_key='submission_number', + original_dict=clean_record['acquisition_source'], + backup_dict=overriden_fields['acquisition_source'], + new_value=u'5652c7f6190f11e79e8000224dabeaad', + ) + + return clean_record + + def test_cds(cds_oai_server): - process = CrawlerProcess(get_project_settings()) - process.crawl('CDS', from_date='2017-10-10') + f = NamedTemporaryFile('rw') + + settings = get_project_settings() + settings.set('FEED_FORMAT', 'json') + settings.set('FEED_URI', f.name) + + process = CrawlerProcess(settings) + process.crawl('CDS', from_date='2017-11-15', oai_set='forINSPIRE') process.start() + + result = json.load(f) + + expected = expected_json_results_from_file( + 'cds', 'fixtures', 'cds_expected.json' + ) + + expected = override_dynamic_fields_on_records(expected) + result = override_dynamic_fields_on_records(result) + + assert result == expected + + f.close()