diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..4b068a74 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +source = dojson diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..e6c4ad96 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +.git +*.pyc +__pycache__/ +.tox +.cache diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..b372cdac --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +bin/ +build/ +develop-eggs/ +dist/ +.eggs +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +# Sphinx documentation +docs/_build/ \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..186815db --- /dev/null +++ b/.travis.yml @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# +# This file is part of DoJSON +# Copyright (C) 2015 CERN. +# +# DoJSON is free software; you can redistribute it and/or modify +# it under the terms of the Revised BSD License; see LICENSE file for +# more details. + +sudo: false + +language: python + +python: + - "2.6" + - "2.7" +# FIXME esmre is not Python 3 compatible +# - "3.3" +# - "3.4" + +install: + - pip install --upgrade pip + - pip install coveralls pep257 + - pip install pytest pytest-pep8 pytest-cov pytest-cache + - pip install -e .[docs,tests] + +script: + - pep257 --match-dir='dojson' + - "sphinx-build -qnNW docs docs/_build/html" + - python setup.py test + +after_success: + - coveralls + +notifications: + email: false diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 00000000..e9bb1c79 --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,8 @@ +Authors +======= + +Active contributors: + +* Esteban J. G. Gabancho +* Harris Tzovanakis +* Leonardo Rossi diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 00000000..e69de29b diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst new file mode 100644 index 00000000..37a42c20 --- /dev/null +++ b/CONTRIBUTING.rst @@ -0,0 +1,30 @@ +Contributing +============ + +Bug reports, feature requests, and other contributions are welcome. +If you find a demonstrable problem that is caused by the code of this +library, please: + +1. Search for `already reported problems + `_. +2. Check if the issue has been fixed or is still reproducible on the + latest `master` branch. +3. Create an issue with **a test case**. + +If you create a feature branch, you can run the tests to ensure everything is +operating correctly: + +.. code-block:: console + + $ ./run-tests.sh + + ... + + ====== 31 passed, 23 skipped in 1.37 seconds ====== + +You can also test your feature branch using Docker:: + + $ docker-compose build + $ docker-compose run web python setup.py test + $ docker-compose run web python setup.py build_sphinx + $ docker-compose run web pep257 --match-dir='dojson' diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..72e20f4c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,40 @@ +# This file is part of DoJSON +# Copyright (C) 2015 CERN. +# +# DoJSON is free software; you can redistribute it and/or +# modify it under the terms of the Revised BSD License; see LICENSE +# file for more details. + +# Use Python-2.7: +FROM python:2.7 + +# Install some prerequisites ahead of `setup.py` in order to profit +# from the docker build cache: +RUN pip install coveralls \ + esmre \ + ipython \ + lxml \ + mock \ + pep257 \ + pytest \ + pytest-cache \ + pytest-cov \ + pytest-pep8 \ + six \ + sphinx_rtd_theme + +# Add sources to `code` and work there: +WORKDIR /code +ADD . /code + +# Install dojson: +RUN pip install -e .[docs] + +# Run container as user `dojson` with UID `1000`, which should match +# current host user in most situations: +RUN adduser --uid 1000 --disabled-password --gecos '' dojson && \ + chown -R dojson:dojson /code + +# Run test suite instead of starting the application: +USER dojson +CMD ["python", "setup.py", "test"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..150da32a --- /dev/null +++ b/LICENSE @@ -0,0 +1,38 @@ +DoJSON is free software; you can redistribute it and/or modify it +under the terms of the Revised BSD License quoted below. + +Copyright (C) 2015 CERN. + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. + +In applying this license, CERN does not waive the privileges and +immunities granted to it by virtue of its status as an +Intergovernmental Organization or submit itself to any jurisdiction. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..42f9a7f9 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,23 @@ +# This file is part of DoJSON +# Copyright (C) 2015 CERN. +# +# DoJSON is free software; you can redistribute it and/or +# modify it under the terms of the Revised BSD License; see LICENSE +# file for more details. + +include *.rst +include *.sh +include *.yml +include .coveragerc +include .travis.yml +include .dockerignore +include Dockerfile +include LICENSE +include pytest.ini +include tox.ini +recursive-include docs *.py +recursive-include docs *.rst +recursive-include docs Makefile +recursive-include dojson *.py +recursive-include dojson *.json +recursive-include tests *.py diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..9dc79ccd --- /dev/null +++ b/README.rst @@ -0,0 +1,43 @@ +======== + DoJSON +======== + +.. image:: https://img.shields.io/travis/CERNDocumentServer/cds_dojson.svg + :target: https://travis-ci.org/CERNDocumentServer/cds_dojson + +.. image:: https://img.shields.io/coveralls/CERNDocumentServer/cds_dojson.svg + :target: https://coveralls.io/r/CERNDocumentServer/cds_dojson + +.. image:: https://img.shields.io/github/tag/CERNDocumentServer/cds_dojson.svg + :target: https://github.com/CERNDocumentServer/cds_dojson/releases + +.. image:: https://img.shields.io/pypi/dm/dojson.svg + :target: https://pypi.python.org/pypi/dojson + +.. image:: https://img.shields.io/github/license/CERNDocumentServer/cds_dojson.svg + :target: https://github.com/CERNDocumentServer/cds_dojson/blob/master/LICENSE + + +About +===== + + + +Installation +============ + + +Documentation +============= + +Documentation can be built using Sphinx: :: + + pip install cds_dojson[docs] + python setup.py build_sphinx + +Testing +======= + +Running the test suite is as simple as: :: + + python setup.py test diff --git a/RELEASE-NOTES.rst b/RELEASE-NOTES.rst new file mode 100644 index 00000000..e69de29b diff --git a/cds_dojson/__init__.py b/cds_dojson/__init__.py new file mode 100644 index 00000000..41ff2beb --- /dev/null +++ b/cds_dojson/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. diff --git a/cds_dojson/marc21/__init__.py b/cds_dojson/marc21/__init__.py new file mode 100644 index 00000000..804dc1eb --- /dev/null +++ b/cds_dojson/marc21/__init__.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""Marc21 init.""" + +from __future__ import absolute_import + +import pkg_resources +import logging + +from invenio_utils.datastructures import SmartDict + +from ..query import Query +from .models.default import model as marc21_default_model + + +def convert_cdsmarcxml(source): + """Convert CDS to JSON.""" + from dojson.contrib.marc21.utils import create_record, split_blob + + for data in split_blob(source.read()): + record = create_record(data) + yield query_matcher(record).do(record) + + +def query_matcher(record): + """Record query matcher. + + :param record: :func:`dojson.contrib.marc21.utils.create_record` object. + + :returns: a model instance + :rtype: :class:`~cds_dojson.marc21.translations.default.CDSMarc21` + """ + logger = logging.getLogger(__name__ + ".query_matcher") + + _smart_dict_record = SmartDict(dict(record)) + _matches = [] + for entry_point in pkg_resources.iter_entry_points( + 'cds_dojson.marc21.models'): + name = entry_point.name + model = entry_point.load() + query = Query(model.__query__) + + if query.match(_smart_dict_record): + logger.info("Model `{0}` found matching the query {1}.".format( + name, model + )) + _matches.append([name, model]) + + try: + if len(_matches) > 1: + logger.error( + ("Found more than one matches `{0}`, now it'll fallback to {1}" + " for record {2}.").format( + _matches, _matches[0], _smart_dict_record + ) + ) + return _matches[0][1] + except IndexError: + logger.warning( + "Model *not* found, fallback to default {0} for record {1}".format( + marc21_default_model, _smart_dict_record + ) + ) + return marc21_default_model diff --git a/cds_dojson/marc21/fields/__init__.py b/cds_dojson/marc21/fields/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cds_dojson/marc21/fields/album.py b/cds_dojson/marc21/fields/album.py new file mode 100644 index 00000000..39193d10 --- /dev/null +++ b/cds_dojson/marc21/fields/album.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""CDS Album MARC 21 field definitions.""" + +from cds_dojson.marc21.models.album import model as marc21 + +from dojson import utils + + +@marc21.over('images', '^774[10_][8_]', override=True) +@utils.for_each_value +@utils.filter_values +def images(self, key, value): + """Images contained in this album""" + reference = None + if value.get('r'): + reference = 'http://cds.cern.ch/record/' + value['r'] + return { + '$ref': reference, + 'record_type': value.get('a'), + 'relation': value.get('n') + } + + +@marc21.over('place_of_photo', '^923..') +@utils.for_each_value +@utils.filter_values +def place_of_photo(self, key, value): + """Place of photo where it was taken and requester info""" + return { + 'place': value.get('p'), + 'requester': value.get('r') + } + + +@marc21.over('photolab', '^924..') +@utils.for_each_value +@utils.filter_values +def photolab(self, key, value): + """Photolab""" + return { + 'tirage': value.get('a'), + 'photolab_1': value.get('b'), + 'photolab_2': value.get('t'), + } diff --git a/cds_dojson/marc21/fields/default/__init__.py b/cds_dojson/marc21/fields/default/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cds_dojson/marc21/fields/default/bd01x09x.py b/cds_dojson/marc21/fields/default/bd01x09x.py new file mode 100644 index 00000000..96f4dd8d --- /dev/null +++ b/cds_dojson/marc21/fields/default/bd01x09x.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""CDS special/custom tags.""" + +from cds_dojson.marc21.models.default import model as marc21 + +from dojson import utils + + +@marc21.over('international_standard_number', '^021..') +@utils.for_each_value +def international_standard_number(self, key, value): + """Report Number.""" + return value.get('a') + + +@marc21.over('system_control_number', '^035..', override=True) +@utils.for_each_value +@utils.filter_values +def system_control_number(self, key, value): + """System Control Number.""" + return { + 'system_control_number': value.get('a'), + 'field_link_and_sequence_number': utils.force_list( + value.get('8') + ), + 'canceled_invalid_control_number': utils.force_list( + value.get('z') + ), + 'linkage': value.get('6'), + 'inst': value.get('9'), + } + + +@marc21.over('report_number', '^088..', override=True) +@utils.for_each_value +@utils.filter_values +def report_number(self, key, value): + """Report Number.""" + return { + 'report_number': value.get('a'), + 'field_link_and_sequence_number': utils.force_list( + value.get('8') + ), + 'canceled_invalid_report_number': utils.force_list( + value.get('z') + ), + 'linkage': value.get('6'), + '_report_number': value.get('9'), # not displayed but searchable + } diff --git a/cds_dojson/marc21/fields/default/bd2xx.py b/cds_dojson/marc21/fields/default/bd2xx.py new file mode 100644 index 00000000..5737b4fb --- /dev/null +++ b/cds_dojson/marc21/fields/default/bd2xx.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""CDS special/custom tags.""" + +from cds_dojson import utils as cds_utils +from cds_dojson.marc21.models.default import model as marc21 + +from dojson import utils + + +@marc21.over('title_statement', '^245[10_][_1032547698]', override=True) +@cds_utils.for_each_squash +@utils.filter_values +def title_statement(self, key, value): + """Title Statement.""" + indicator_map1 = {"0": "No added entry", "1": "Added entry"} + indicator_map2 = { + "0": "No nonfiling characters", + "1": "Number of nonfiling characters", + "2": "Number of nonfiling characters", + "3": "Number of nonfiling characters", + "4": "Number of nonfiling characters", + "5": "Number of nonfiling characters", + "6": "Number of nonfiling characters", + "7": "Number of nonfiling characters", + "8": "Number of nonfiling characters", + "9": "Number of nonfiling characters"} + return { + 'title': value.get('a'), + 'statement_of_responsibility': value.get('c'), + 'remainder_of_title': value.get('b'), + 'bulk_dates': value.get('g'), + 'inclusive_dates': value.get('f'), + 'medium': value.get('h'), + 'form': utils.force_list( + value.get('k') + ), + 'number_of_part_section_of_a_work': utils.force_list( + value.get('n') + ), + 'name_of_part_section_of_a_work': utils.force_list( + value.get('p') + ), + 'version': value.get('s'), + 'linkage': value.get('6'), + 'field_link_and_sequence_number': utils.force_list( + value.get('8') + ), + 'title_added_entry': indicator_map1.get(key[3]), + 'nonfiling_characters': indicator_map2.get(key[4]), + } + + +@marc21.over('imprint', '^269__') +@utils.for_each_value +@utils.filter_values +def imprint(self, key, value): + """Pre-publication, distribution, etc. + + NOTE: Don't use the following lines for CER base=14,2n,41-45 + NOTE: Don't use for THESES + """ + return { + 'place_of_publication': value.get('a'), + 'name_of_publication': value.get('b'), + 'complete_date': value.get('c'), + } diff --git a/cds_dojson/marc21/fields/default/bd5xx.py b/cds_dojson/marc21/fields/default/bd5xx.py new file mode 100644 index 00000000..513e18b8 --- /dev/null +++ b/cds_dojson/marc21/fields/default/bd5xx.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""CDS special/custom tags.""" + +from cds_dojson.marc21.models.default import model as marc21 + +from dojson import utils + + +@marc21.over('french_summary_note', '^590__') +@utils.for_each_value +@utils.filter_values +def french_summary_note(self, key, value): + """French summary note.""" + return { + 'smuary': value.get('a'), + 'expansion_of_summary_note': value.get('b') + } + + +@marc21.over('field_591', '^591__') +@utils.for_each_value +@utils.filter_values +def field_591(self, key, value): + """Type of Document.""" + return { + 'subfield_a': value.get('a'), + 'subfield_b': value.get('b') + } + + +@marc21.over('type_of_document', '^594__') +@utils.for_each_value +def type_of_document(self, key, value): + """Type of Document.""" + return value.get('a') + + +@marc21.over('internal_note', '^595__') +@utils.for_each_value +@utils.filter_values +def internal_note(self, key, value): + """Internal NOTE.""" + return { + 'internal_note': value.get('a'), + 'control_field': value.get('d'), + 'inspec_number': value.get('i'), + 'subject_note': value.get('s'), + 'additional_note': value.get('9') + } + + +@marc21.over('slac_note', '^596.') +@utils.for_each_value +@utils.filter_values +def slac_note(self, key, value): + """Slac note - some kind of internal note""" + return { + 'slac_note': value.get('a'), + 'dump': value.get('b'), + } diff --git a/cds_dojson/marc21/fields/default/bd69x.py b/cds_dojson/marc21/fields/default/bd69x.py new file mode 100644 index 00000000..f3779cb3 --- /dev/null +++ b/cds_dojson/marc21/fields/default/bd69x.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""CDS special/custom tags.""" + +from cds_dojson.marc21.models.default import model as marc21 + +from dojson import utils + + +@marc21.over('subject_indicator', '^69[07]C_') +@utils.for_each_value +def subject_indicator(self, key, value): + """Subject Indicator.""" + return value.get('a') + + +@marc21.over('observation', '^691__') +def observation(self, key, value): + """Observation.""" + return value.get('a') + + +@marc21.over('accelerator_experiment', '^693__') +@utils.for_each_value +@utils.filter_values +def accelerator_experiment(self, key, value): + """Experiment.""" + return { + 'acelerator': value.get('a'), + 'experiment': value.get('e'), + 'facility': value.get('f'), + 'subfield_s': value.get('s'), + } + + +@marc21.over('classification_terms', '^694__') +@utils.for_each_value +@utils.filter_values +def classification_terms(self, key, value): + """Classification terms.""" + return { + 'uncontrolled_term': value.get('a'), + 'institute': value.get('9'), + } + + +@marc21.over('thesaurus_terms', '^695__') +@utils.for_each_value +@utils.filter_values +def thesaurus_terms(self, key, value): + """Thesaurus term.""" + return { + 'uncontrolled_term': value.get('a'), + 'institute': value.get('9'), + } diff --git a/cds_dojson/marc21/fields/default/bd7xx.py b/cds_dojson/marc21/fields/default/bd7xx.py new file mode 100644 index 00000000..004cbfb0 --- /dev/null +++ b/cds_dojson/marc21/fields/default/bd7xx.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""CDS special/custom tags.""" + +from cds_dojson.marc21.models.default import model as marc21 + +from dojson import utils + + +@marc21.over('added_entry_corporate_name', '^710[10_2][_2]', override=True) +@utils.for_each_value +@utils.filter_values +def added_entry_corporate_name(self, key, value): + """Added Entry-Corporate Name.""" + indicator_map1 = { + "0": "Inverted name", + "1": "Jurisdiction name", + "2": "Name in direct order"} + indicator_map2 = {"#": "No information provided", "2": "Analytical entry"} + return { + 'authority_record_control_number': utils.force_list( + value.get('0') + ), + 'materials_specified': value.get('3'), + 'institution_to_which_field_applies': value.get('5'), + 'relator_code': utils.force_list( + value.get('4') + ), + 'linkage': value.get('6'), + 'field_link_and_sequence_number': utils.force_list( + value.get('8') + ), + 'corporate_name_or_jurisdiction_name_as_entry_element': value.get('a'), + 'location_of_meeting': value.get('c'), + 'subordinate_unit': utils.force_list( + value.get('b') + ), + 'relator_term': utils.force_list( + value.get('e') + ), + 'date_of_meeting_or_treaty_signing': utils.force_list( + value.get('d') + ), + 'miscellaneous_information': value.get('g'), + 'date_of_a_work': value.get('f'), + 'relationship_information': utils.force_list( + value.get('i') + ), + 'medium': value.get('h'), + 'form_subheading': utils.force_list( + value.get('k') + ), + 'medium_of_performance_for_music': utils.force_list( + value.get('m') + ), + 'language_of_a_work': value.get('l'), + 'arranged_statement_for_music': value.get('o'), + 'number_of_part_section_meeting': utils.force_list( + value.get('n') + ), + 'name_of_part_section_of_a_work': utils.force_list( + value.get('p') + ), + 'version': value.get('s'), + 'key_for_music': value.get('r'), + 'affiliation': value.get('u'), + 'title_of_a_work': value.get('t'), + 'cern_work': value.get('9'), + 'international_standard_serial_number': value.get('x'), + 'type_of_corporate_name_entry_element': indicator_map1.get(key[3]), + 'type_of_added_entry': indicator_map2.get(key[4]), + } + + +@marc21.over('translator', '^721__') +@utils.for_each_value +@utils.filter_values +def translator(self, key, value): + """Translator.""" + return { + 'personal_name': value.get('a'), + 'words_translated': value.get('1'), + } diff --git a/cds_dojson/marc21/fields/default/bd8xx.py b/cds_dojson/marc21/fields/default/bd8xx.py new file mode 100644 index 00000000..587c333c --- /dev/null +++ b/cds_dojson/marc21/fields/default/bd8xx.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""CDS special/custom tags.""" + +from cds_dojson.marc21.models.default import model as marc21 + +from dojson import utils + + +@marc21.over('electronic_mail_message', '^859__') +@utils.for_each_value +@utils.filter_values +def electronic_mail_message(self, key, value): + """Electronic mail message.""" + return { + 'contact': value.get('a'), + 'e-mail_address': value.get('f'), + 'date': value.get('x'), + } diff --git a/cds_dojson/marc21/fields/default/bd9xx.py b/cds_dojson/marc21/fields/default/bd9xx.py new file mode 100644 index 00000000..59c7e0ba --- /dev/null +++ b/cds_dojson/marc21/fields/default/bd9xx.py @@ -0,0 +1,303 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""CDS special/custom tags.""" + +from cds_dojson import utils as cds_utils +from cds_dojson.marc21.models.default import model as marc21 + +from dojson import utils + + +@marc21.over('affiliation_at_conversion', '^901__') +@utils.for_each_value +def affiliation_at_conversion(self, key, value): + """Affiliation at conversion.""" + return { + 'name_of_institute': value.get('u'), + } + + +@marc21.over('grey_book', '^903__') +@utils.for_each_value +@utils.filter_values +def grey_book(self, key, value): + """Grey book.""" + return { + 'approval': value.get('a'), + 'beam': value.get('b'), + 'status_date': value.get('d'), + 'status': value.get('s'), + } + + +@marc21.over('approval_status_history', '^9031_') +@utils.for_each_value +@utils.filter_values +def approval_status_history(self, key, value): + """Approval status history.""" + return { + 'description': value.get('a'), + 'report_number': value.get('b'), + 'category': value.get('c'), + 'date': value.get('d'), + 'deadline': value.get('e'), + 'e-mail': value.get('f'), + 'status': value.get('s'), + } + + +@marc21.over('spokesman', '^905__') +@utils.for_each_value +@utils.filter_values +def spokesman(self, key, value): + """Spokesman.""" + return { + 'address': value.get('a'), + 'telephone': value.get('k'), + 'fax': value.get('l'), + 'e-mail': value.get('m'), + 'personal_name': value.get('p'), + 'private_address': value.get('q'), + } + + +@marc21.over('referee', '^906__') +@utils.for_each_value +@utils.filter_values +def referee(self, key, value): + """Referee.""" + return { + 'address': value.get('a'), + 'telephone': value.get('k'), + 'fax': value.get('l'), + 'e-mail': value.get('m'), + 'personal_name': value.get('p'), + 'private_address': value.get('q'), + 'affiliation': value.get('u'), + } + + +@marc21.over('fsgo', '^910__') +@utils.for_each_value +@utils.filter_values +def fsgo(self, key, value): + """FSGO.""" + return { + 'personal_name': value.get('f'), + 'alternate_abbreviated_title': value.get('9'), + } + + +@marc21.over('citation', '^913__') +@utils.for_each_value +@utils.filter_values +def citation(self, key, value): + """Citation.""" + return { + 'citation': value.get('c'), + 'unformatted_reference': value.get('p'), + 'title_abbreviation': value.get('t'), + 'uniform_resource_identifier': value.get('u'), + 'volume': value.get('v'), + 'year': value.get('y'), + } + + +@marc21.over('status_week', '^916__') +@utils.for_each_value +@utils.filter_values +def status_week(self, key, value): + """Status week.""" + return { + 'acquisition_of_proceedings_code': value.get('a'), + 'display_period_for_books': value.get('d'), + 'number_of_copies_bought_by_cern': value.get('e'), + 'status_of_record': value.get('s'), + 'status_week': value.get('w'), + 'year_for_annual_list': value.get('y'), + } + + +@marc21.over('dates', '^925__') +@utils.for_each_value +@utils.filter_values +def dates(self, key, value): + """Dates.""" + return { + 'opening': value.get('a'), + 'closing': value.get('b') + } + + +@marc21.over('file_number', '^927__') +@utils.for_each_value +def file_number(self, key, value): + """File Number.""" + return value.get('a') + + +@marc21.over('peri_internal_note', '^937__') +@utils.for_each_value +@utils.filter_values +def peri_internal_note(self, key, value): + """Peri: internal note.""" + return { + 'internal_note': value.get('a'), + 'modification_date': value.get('c'), + 'responsible_of_the_modification': value.get('s'), + } + + +@marc21.over('base', '^960__') +@utils.for_each_value +def base(self, key, value): + """Base.""" + return value.get('a') + + +@marc21.over('cat', '^961__') +@utils.for_each_value +@utils.filter_values +def cat(self, key, value): + """CAT.""" + return { + 'cataloger': value.get('a'), + 'cataloger_level': value.get('b'), + 'modification_date': value.get('c'), + 'library': value.get('l'), + 'hour': value.get('h'), + 'creation_date': value.get('x'), + } + + +@marc21.over('aleph_linking_field', '^962__') +@utils.for_each_value +@utils.filter_values +def aleph_linking_field(self, key, value): + """ALEPH linking field.""" + return { + 'link_type': value.get('a'), + 'sysno': value.get('b'), + 'library': value.get('l'), + 'down_record_link_note': value.get('ln'), + 'up_record_link_note': value.get('m'), + 'year_link': value.get('y'), + 'volume_link': value.get('v'), + 'part_link': value.get('p'), + 'issue_link': value.get('i'), + 'pages_link': value.get('k'), + 'base': value.get('t'), + } + + +# We are squashing this field, because it might contain duplicates +# (even though it shouldn't) and we don't want to lose data +@marc21.over('owner', '^963__') +@cds_utils.for_each_squash +@utils.filter_values +def owner(self, key, value): + """Owner.""" + return { + 'owner': value.get('a'), + 'status': value.get('b') + } + + +@marc21.over('item', '^964__') +def item(self, key, value): + """Item.""" + return { + 'owner': value.get('a'), + } + + +# We are squashing this field, because it might contain duplicates +# (even though it shouldn't) and we don't want to lose data +@marc21.over('sysno', '^970__') +@cds_utils.for_each_squash +@utils.filter_values +def sysno(self, key, value): + """System number taken from AL500 SYS.""" + return { + 'sysno': value.get('a'), + 'surviver': value.get('d'), + } + + +@marc21.over('system_number_of_deleted_double_records', '^981__') +@utils.for_each_value +def system_number_of_deleted_double_records(self, key, value): + """System number of deleted double records.""" + return value.get('a') + + +@marc21.over('additional_subject_added_entry_topical_term', '^993__') +@utils.for_each_value +@utils.filter_values +def additional_subject_added_entry_topical_term(self, key, value): + """Additional subject added entry- topical term.""" + return { + 'processes': value.get('q'), + 'accelerator_physics': value.get('r'), + 'technology': value.get('t'), + } + + +@marc21.over('references', '^999C5') +@utils.for_each_value +@utils.filter_values +def references(self, key, value): + """References.""" + return { + 'doi': value.get('a'), + 'authors': value.get('h'), + 'miscellaneous': utils.force_list( + value.get('m') + ), + 'issue_number': value.get('n'), + 'order_number': value.get('o'), + 'page': value.get('p'), + 'report_number': value.get('r'), + 'journal_publication_note': value.get('s'), + 'journal_title_abbreviation': value.get('t'), + 'uniform_resource_identifier': value.get('u'), + 'volume': value.get('v'), + 'year': value.get('y'), + } + + +@marc21.over('refextract_references', '^999C6') +@utils.for_each_value +def refexctract_references(self, key, value): + """Refextract references.""" + return { + 'refextract_info': value.get('a'), + } + + +@marc21.over('record_type', '^999..') +@utils.for_each_value +@utils.filter_values +def record_type(self, key, value): + """Record type - mostly IMAGE""" + return { + 'record_type': value.get('a'), + 'dump': value.get('9'), + } diff --git a/cds_dojson/marc21/fields/image.py b/cds_dojson/marc21/fields/image.py new file mode 100644 index 00000000..53820923 --- /dev/null +++ b/cds_dojson/marc21/fields/image.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""CDS Image MARC 21 field definitions.""" + +from cds_dojson.marc21.models.image import model as marc21 + +from dojson import utils + + +@marc21.over('album_parent', '^774[10_][8_]', override=True) +@utils.for_each_value +def album_parent(self, key, value): + """Album ID which contains this photo""" + return { + 'dump_album': value.get('a'), + 'album_id': value.get('r') + } + + +@marc21.over('image_url', '^856.[10_28]', override=True) +@utils.for_each_value +@utils.filter_values +def image_url(self, key, value): + """Image URL. Contains the URL to the concrete image file + and information about the format + """ + indicator1_map = {"4": "http", "7": "method_in_subfield"} + return { + 'size': value.get('s'), + 'path': value.get('d'), + 'electronic_format_type': value.get('q'), + 'uri': value.get('u'), + 'link_text': value.get('y'), + 'public_note': value.get('z'), + 'subformat': value.get('x'), + 'photo_id': value.get('8'), + 'access_method_subfield': value.get('2'), + 'access_method': indicator1_map.get(key[3]), + } diff --git a/cds_dojson/marc21/models/__init__.py b/cds_dojson/marc21/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cds_dojson/marc21/models/album.py b/cds_dojson/marc21/models/album.py new file mode 100644 index 00000000..fb2fffac --- /dev/null +++ b/cds_dojson/marc21/models/album.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""Album model.""" + +from .default import CDSMarc21, model as cds_marc21 + + +class CDSAlbum(CDSMarc21): + + """Translation Index for CDS Albums.""" + + __query__ = '999__.a:ALBUM' + +model = CDSAlbum(bases=(cds_marc21, ), + entry_point_group='cds_dojson.marc21.album') diff --git a/cds_dojson/marc21/models/default.py b/cds_dojson/marc21/models/default.py new file mode 100644 index 00000000..19df89fd --- /dev/null +++ b/cds_dojson/marc21/models/default.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +from pkg_resources import iter_entry_points + +from dojson.contrib.marc21 import marc21 +from dojson.overdo import Overdo + + +class CDSMarc21(Overdo): + + """Translation Index for CDS specific MARC21.""" + + __query__ = '690C_.a:CERN' + + def over(self, name, *source_tags, **kwargs): + """Register creator rule. + + :param kwargs: + * override: boolean, overrides the rule if either the `name` or the + regular expression in `source_tags` are equal to the current + ones. + """ + def override(rule): + if name == rule[1][0]: + return True + for field in source_tags: + if field == rule[0]: + return True + return False + + if kwargs.get('override', False): + self.rules[:] = [rule for rule in self.rules if not override(rule)] + + return super(CDSMarc21, self).over(name, *source_tags) + + +model = CDSMarc21(bases=(marc21, ), + entry_point_group='cds_dojson.marc21.default') diff --git a/cds_dojson/marc21/models/image.py b/cds_dojson/marc21/models/image.py new file mode 100644 index 00000000..33b9682f --- /dev/null +++ b/cds_dojson/marc21/models/image.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""Image model.""" + +from .default import CDSMarc21, model as cds_marc21 + + +class CDSImage(CDSMarc21): + + """Translation Index for CDS Images.""" + + __query__ = '999__.a:IMAGE' + +model = CDSImage(bases=(cds_marc21, ), + entry_point_group='cds_dojson.marc21.image') diff --git a/cds_dojson/marc21/models/video.py b/cds_dojson/marc21/models/video.py new file mode 100644 index 00000000..a643318b --- /dev/null +++ b/cds_dojson/marc21/models/video.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +from .default import CDSMarc21, model as cds_marc21 + + +class CDSVideo(CDSMarc21): + + """Translation Index for CDS Videos.""" + + __query__ = '980__.a:PUBLVIDEOMOVIE' + +model = CDSVideo(bases=(cds_marc21, )) diff --git a/cds_dojson/query.py b/cds_dojson/query.py new file mode 100644 index 00000000..08337f7f --- /dev/null +++ b/cds_dojson/query.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Invenio. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA. + +"""Query parser.""" + +import pypeg2 +import six +import re + +from invenio_query_parser.walkers.pypeg_to_ast import PypegConverter +from invenio_query_parser.parser import Main as parser +from invenio_query_parser.visitor import make_visitor +from invenio_query_parser.ast import AndOp, DoubleQuotedValue, EmptyQuery, \ + Keyword, KeywordOp, NotOp, OrOp, RangeOp, RegexValue, SingleQuotedValue, \ + Value, ValueQuery +from collections import MutableMapping, MutableSequence + + +def match_unit(record, p, f=None, m='a', wl=None): + """Match record to basic match unit.""" + if record is None: + return p is None + + if f is not None: + return match_unit(record.get(f), p, f=None, m=m, wl=None) + + # compile search value only once for non exact search + if m != 'e' and isinstance(p, six.string_types): + p = re.compile(p) + + if isinstance(record, MutableSequence): + return any([match_unit(field, p, f=f, m=m, wl=wl) + for field in record]) + elif isinstance(record, MutableMapping): + return any([match_unit(field, p, f=f, m=m, wl=wl) + for field in record.values()]) + + if m == 'e': + return six.text_type(record) == p + + return p.search(six.text_type(record)) is not None + + +class MatchUnit(object): + + """Implement visitor using ``match_unit`` API.""" + + visitor = make_visitor() + + def __init__(self, record): + """Init.""" + self.record = record + + # pylint: disable=W0613,E0102 + + @visitor(AndOp) + def visit(self, node, left, right): + return left & right + + @visitor(OrOp) + def visit(self, node, left, right): + return left | right + + @visitor(NotOp) + def visit(self, node, op): + return not op + + @visitor(KeywordOp) + def visit(self, node, left, right): + if isinstance(right, bool): # second level operator + left.update(dict(p=right)) + else: + left.update(right) + return match_unit(self.record, **left) + + @visitor(ValueQuery) + def visit(self, node, op): + return match_unit(self.record, **op) + + @visitor(Keyword) + def visit(self, node): + return dict(f=node.value) + + @visitor(Value) + def visit(self, node): + return dict(p=node.value) + + @visitor(SingleQuotedValue) + def visit(self, node): + return dict(p=node.value, m='p') + + @visitor(DoubleQuotedValue) + def visit(self, node): + return dict(p=node.value, m='e') + + @visitor(RegexValue) + def visit(self, node): + return dict(p=node.value, m='r') + + @visitor(RangeOp) + def visit(self, node, left, right): + return dict(p="%s->%s" % (left, right)) + + @visitor(EmptyQuery) + def visit(self, node): + return True + + # pylint: enable=W0612,E0102 + + +class Query(object): + + """Query object.""" + + def __init__(self, query): + """Init.""" + self._query = query + + @property + def query(self): + """Parse query string using given grammar.""" + tree = pypeg2.parse(self._query, parser, whitespace="") + return tree.accept(PypegConverter()) + + def match(self, record, user_info=None): + """Return True if record match the query.""" + return self.query.accept(MatchUnit(record)) diff --git a/cds_dojson/utils.py b/cds_dojson/utils.py new file mode 100644 index 00000000..4d670822 --- /dev/null +++ b/cds_dojson/utils.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA. + +import functools +from collections import defaultdict + + +def for_each_squash(f): + """In case of non repeatable field squash them into one. + + .. example:: + [{'a': 'foo'}, {'b': 'bar'}] -> {'a': 'foo', 'b': 'barc'} + [{'a': 'foo'}, {'a': 'bar'}] -> {'a': ['foo', 'barc']} + """ + @functools.wraps(f) + def wrapper(self, key, values, **kwargs): + if not isinstance(values, list): + return f(self, key, values, **kwargs) + + unmerged_list = [f(self, key, value, **kwargs) for value in values] + merge_dict = defaultdict(list) + + for unmerged_dict in unmerged_list: + for key, element in unmerged_dict.iteritems(): + merge_dict[key].append(element) + + merge_dict = {key: (value if len(value) > 1 else value[0]) + for key, value in merge_dict.iteritems()} + return merge_dict + return wrapper diff --git a/cds_dojson/version.py b/cds_dojson/version.py new file mode 100644 index 00000000..d3f4764d --- /dev/null +++ b/cds_dojson/version.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# +# This file is part of cds-dojson +# Copyright (C) 2015 CERN. +# +# cds-dojson is free software; you can redistribute it and/or +# modify it under the terms of the Revised BSD License; see LICENSE +# file for more details. + +""" +Version information for cds-dojson. + +This file is imported by ``cds-dojson.__init__``, and parsed by +``setup.py`` as well as ``docs/conf.py``. +""" + +# Do not change the format of this next line. Doing so risks breaking +# setup.py and docs/conf.py + +__version__ = "0.1.0" diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..209bfddd --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,12 @@ +# This file is part of DoJSON +# Copyright (C) 2015 CERN. +# +# DoJSON is free software; you can redistribute it and/or +# modify it under the terms of the Revised BSD License; see LICENSE +# file for more details. + +web: + build: . + command: python setup.py test + volumes: + - .:/code diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..2d2dd45c --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,177 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/DoJSON.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/DoJSON.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/DoJSON" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/DoJSON" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..b9efebcd --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,279 @@ +# -*- coding: utf-8 -*- +# +# This file is part of DoJSON +# Copyright (C) 2015 CERN. +# +# DoJSON is free software; you can redistribute it and/or +# modify it under the terms of the Revised BSD License; see LICENSE +# file for more details. + +"""Sphinx configuration.""" + +from __future__ import print_function + +import os +import re +import sys + + +_html_theme = "sphinx_rtd_theme" +_html_theme_path = [] +try: + import sphinx_rtd_theme + _html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +except ImportError: + print("`sphinx_rtd_theme` not found, pip install it", file=sys.stderr) + _html_theme = "default" + + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.todo', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'DoJSON' +copyright = u'2014, Invenio collaboration' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. + +with open(os.path.join('..', 'cds_dojson', 'version.py'), 'rt') as f: + version = re.search( + '__version__\s*=\s*"(?P.*)"\n', + f.read() + ).group('version') + +# The full version, including alpha/beta/rc tags. +release = version + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = _html_theme + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] +html_theme_path = _html_theme_path + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +#html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'DoJSONdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ('index', 'DoJSON.tex', u'DoJSON Documentation', + u'Invenio collaboration', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'invenioqueryparser', u'DoJSON Documentation', + [u'Invenio collaboration'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'DoJSON', u'DoJSON Documentation', + u'Invenio collaboration', 'DoJSON', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..1025120a --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,72 @@ +.. + This file is part of cds_dojson + Copyright (C) 2015 CERN. + + cds_dojson is free software; you can redistribute it and/or + modify it under the terms of the Revised BSD License; see LICENSE + file for more details. + +======== + cds_dojson +======== +.. currentmodule:: cds_dojson + +.. raw:: html + +

+ + travis-ci badge + + + coveralls.io badge + +

+ +cds_dojson is a simple Pythonic JSON to JSON converter. + +Installation +============ + +cds_dojson is on PyPI so all you need is: + +.. code-block:: console + + $ pip install cds_dojson + +Example +======= + +A simple example on how to convert MARCXML to JSON: + +.. code:: python + + from dojson.contrib.marc21.utils import create_record, split_blob + from dojson.contrib.marc21 import marc21 + [marc21.do(create_record(data)) for data in split_blob(open('/tmp/data.xml', 'r').read())] + + +API +=== + +.. automodule:: cds_dojson + :members: + + +Contrib +------- + +.. automodule:: dojson.marc21 + :members: + +.. include:: ../CHANGES.rst + +.. include:: ../CONTRIBUTING.rst + +.. include:: ../AUTHORS.rst + +License +======= + +.. include:: ../LICENSE diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..4d1fc7f9 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +# +# This file is part of DoJSON +# Copyright (C) 2014 CERN. +# +# DoJSON is free software; you can redistribute it and/or modify +# it under the terms of the Revised BSD License; see LICENSE file for +# more details. + + +[pytest] +pep8ignore = E501 +addopts = --pep8 --ignore=docs --cov=cds_dojson --cov-report=term-missing diff --git a/run-tests.sh b/run-tests.sh new file mode 100755 index 00000000..5c78c358 --- /dev/null +++ b/run-tests.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# +# This file is part of DoJSON +# Copyright (C) 2015 CERN. +# +# DoJSON is free software; you can redistribute it and/or modify +# it under the terms of the Revised BSD License; see LICENSE file for +# more details. + +pep257 --match-dir='dojson' dojson && \ +sphinx-build -qnNW docs docs/_build/html && \ +python setup.py test diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..2a78e668 --- /dev/null +++ b/setup.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +# +# This file is part of DoJSON +# Copyright (C) 2015 CERN. +# +# DoJSON is free software; you can redistribute it and/or +# modify it under the terms of the Revised BSD License; see LICENSE +# file for more details. + +"""DoJSON is a simple Pythonic JSON to JSON converter.""" + +import os +import re +import sys + +from setuptools import find_packages, setup +from setuptools.command.test import test as TestCommand + + +class PyTest(TestCommand): + + """PyTest test runner. + + See: http://pytest.org/latest/goodpractises.html?highlight=setuptools + """ + + user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")] + + def initialize_options(self): + """Initialise test options.""" + TestCommand.initialize_options(self) + try: + from ConfigParser import ConfigParser + except ImportError: + from configparser import ConfigParser + config = ConfigParser() + config.read("pytest.ini") + self.pytest_args = config.get("pytest", "addopts").split(" ") + + def finalize_options(self): + """Finalise test options.""" + TestCommand.finalize_options(self) + self.test_args = [] + self.test_suite = True + + def run_tests(self): + """Rest tests.""" + # import here, cause outside the eggs aren't loaded + import pytest + errno = pytest.main(self.pytest_args) + sys.exit(errno) + +# Get the version string. Cannot be done with import! +with open(os.path.join('cds_dojson', 'version.py'), 'rt') as f: + version = re.search( + '__version__\s*=\s*"(?P.*)"\n', + f.read() + ).group('version') + +tests_require = [ + 'pytest-cache>=1.0', + 'pytest-cov>=2.1.0', + 'pytest-pep8>=1.0.6', + 'pytest>=2.8.0', + 'coverage>=4.0.0', + 'mock', +] + +setup( + name='cds-dojson', + version=version, + url='http://github.com/CERNDocumentServer/cds-dojson/', + license='BSD', + author='Invenio collaboration', + author_email='info@invenio-software.org', + description=__doc__, + long_description=open('README.rst').read(), + packages=find_packages(), + zip_safe=False, + include_package_data=True, + platforms='any', + install_requires=[ + 'dojson>=0.1.1', + 'invenio-query-parser>=0.2', + 'invenio-utils>=0.2.0', + 'pyPEG2>=2.15.1', + ], + extras_require={ + 'docs': ['sphinx_rtd_theme'], + 'tests': tests_require, + }, + classifiers=[ + 'Intended Audience :: Developers', + 'License :: OSI Approved :: BSD License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Development Status :: 5 - Production/Stable', + ], + tests_require=tests_require, + cmdclass={'test': PyTest}, + entry_points={ + 'cds_dojson.marc21.default': [ + 'bd01x09x = cds_dojson.marc21.fields.default.bd01x09x', + 'bd2xx = cds_dojson.marc21.fields.default.bd2xx', + 'bd5xx = cds_dojson.marc21.fields.default.bd5xx', + 'bd69x = cds_dojson.marc21.fields.default.bd69x', + 'bd7xx = cds_dojson.marc21.fields.default.bd7xx', + 'bd8xx = cds_dojson.marc21.fields.default.bd8xx', + 'bd9xx = cds_dojson.marc21.fields.default.bd9xx', + ], + 'cds_dojson.marc21.album': [ + 'album = cds_dojson.marc21.fields.album' + ], + 'cds_dojson.marc21.image': [ + 'image = cds_dojson.marc21.fields.image' + ], + 'dojson.cli.rule': [ + ], + 'dojson.cli.load': [ + ], + 'dojson.cli.dump': [ + ], + 'cds_dojson.marc21.models': [ + 'album = cds_dojson.marc21.models.album:model', + 'default = cds_dojson.marc21.models.default:model', + 'image = cds_dojson.marc21.models.image:model', + 'video = cds_dojson.marc21.models.video:model', + ], + } +) diff --git a/tests/test_cds_dojson_album.py b/tests/test_cds_dojson_album.py new file mode 100644 index 00000000..d074cc55 --- /dev/null +++ b/tests/test_cds_dojson_album.py @@ -0,0 +1,212 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""Test cds dojson album records.""" + +from __future__ import absolute_import + + +CDS_ALBUM = """ + + 2054964 + SzGeCERN + 20150928110024.0 + + PUBLIC + + + 116 + + + 20050915 + 1520 + MMD01 + 20040702 + + + 000030391MMD + + + PHOTOARC + + + ALBUM + + + Assembly tool for BEBC expansion system + + + 1970 + + + Rubrique: Date Planche:9 70 De:479 A:502 + + + 11142014-37_191-8-70_300-10-70-6cmx6cm + + + SzGeCERN + Industry and Technology + + + IMAGE + 1782445 + + + IMAGE + 1782446 + + + IMAGE + 1782447 + + + IMAGE + Cover + 1782448 + + + IMAGE + 1782449 + + + IMAGE + 1782450 + + + IMAGE + 1782451 + + + IMAGE + 1782452 + + + IMAGE + 1782453 + + + IMAGE + 1782454 + + + IMAGE + 1782455 + + + IMAGE + 1782456 + + + IMAGE + 1782457 + + + IMAGE + 1782458 + + + IMAGE + 1782459 + + + IMAGE + 1782460 + + + IMAGE + 1782461 + + + IMAGE + 1782462 + + + IMAGE + 1782463 + + + IMAGE + 1782464 + + + IMAGE + 1782465 + + + IMAGE + 1782466 + + + IMAGE + 1782467 + + + IMAGE + 1782468 + + + Updated 774 values on run 1443157298 + + + Sep 1970 + + + 40 + + + Album with images scanned from original photo negatives + + + FILM + + + Neg NB 6 x 6 + + +""" + + +class TestCDSDoJSONAlbum(object): + + """Test CDS Albums""" + + def test_image(self): + """Test image model from XML into JSON""" + from dojson.contrib.marc21.utils import create_record + from cds_dojson.marc21.models.album import ( + model as marc21 + ) + + blob = create_record(CDS_ALBUM) + data = marc21.do(blob) + + # Check the control number (doJSON) + assert data['physical_medium'][1][ + 'material_base_and_configuration'] == ['Neg NB 6 x 6'] + + # Check the parent album (CDSImage) + assert data['images'][3]['$ref'] == 'http://cds.cern.ch/record/1782448' + assert data['images'][3]['relation'] == 'Cover' + + # Check the imprint (CDSMarc21) + assert data['imprint'][0]['complete_date'] == 'Sep 1970' + + # Check that no fields are missing their model + assert marc21.missing(blob) == [] diff --git a/tests/test_cds_dojson_image.py b/tests/test_cds_dojson_image.py new file mode 100644 index 00000000..578274d9 --- /dev/null +++ b/tests/test_cds_dojson_image.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""Test cds dojson image records.""" + +from __future__ import absolute_import + +from cds_dojson.marc21 import query_matcher + + +CDS_IMAGE = """ + + 1782445 + 20150925091440.0 + + CERN-PHOTO-7009479 + + + CERN PhotoLab + + + No caption + + + 1970 + + + Geneva + CERN + 1970-9 + + + Photographic negative + 6cmx6cm + + + Image scanned from original photo negative on 14 Nov 2014 + Box 37_191-8-70_300-10-70 + + + 479-9-1970 + + + Archive Collection + Scanned by Contentra Technologies + 11142014-37_191-8-70_300-10-70-6cmx6cm + + + Updated 774 value on run 1443157342 + + + PHOTO + + + ALBUM + 2054964 + + + 1770303 + http://cds.cern.ch/record/1782445/files/70-9-479.jpg + Image 70-9-479 + + + 143713 + http://cds.cern.ch/record/1782445/files/70-9-479.jpg?subformat=icon-640 + icon-640 + + + 411079 + http://cds.cern.ch/record/1782445/files/70-9-479.jpg?subformat=icon-1440 + icon-1440 + + + 74003 + http://cds.cern.ch/record/1782445/files/70-9-479.jpg?subformat=icon-180 + icon-180 + + + n + 201446 + + + 86 + + + Public + notvisible + + + PHOTOARCIMAGES + + + IMAGE + + +""" + + +class TestCDSDoJSONImage(object): + + """Test CDS Images.""" + + def test_image(self): + """Test image model from XML into JSONi.""" + from dojson.contrib.marc21.utils import create_record + from cds_dojson.marc21.models.image import ( + model as marc21 + ) + + match = query_matcher(create_record(CDS_IMAGE)) + + assert isinstance(match, marc21.__class__) + + blob = create_record(CDS_IMAGE) + data = marc21.do(blob) + + # Check the control number (doJSON) + assert data.get('control_number') == '1782445' + + # Check the parent album (CDSImage) + assert data['album_parent'][0]['album_id'] == '2054964' + + # Check the imprint (CDSMarc21) + assert data['imprint'][0]['place_of_publication'] == 'Geneva' + + # Check that no fields are missing their model + assert marc21.missing(blob) == [] diff --git a/tests/test_cds_dojson_video.py b/tests/test_cds_dojson_video.py new file mode 100644 index 00000000..9d88245d --- /dev/null +++ b/tests/test_cds_dojson_video.py @@ -0,0 +1,280 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Invenio +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""Test cds dojson video records.""" + +from __future__ import absolute_import + + +CDS_VIDEO_PROJECT = """ + + 2053119 + 20150916121857.0 + + CERN-MOVIE-2015-038 + + + CERN Video Productions + Produced by + + + Deerhoof / Ex_noise_CERN + + + 2015 + + + publvideomovie + + + Ex/Noise/CERN: experimental/noise music performance inside CERN. It's 2015, and the Large Hadron Collider now operates at the highest energy in human history, 13 trillion electron volts. We have many ideas as to what we'll discover (dark matter, supersymmetry, extra Higgs bosons, quantum black holes), but we are really simply exploring the scientific unknown. The participating musicians explore the musical unknown. Ex/Noise/CERN juxtaposes physics and music by putting these musicians in CERN. + + + Deerhoof + CERN + + + SM18 + CERN + + + noise + CERN + + + music + CERN + + + Noemi Caraban + Director + + + CERN-MOVIE-2015-038-001 + + + noemi.caraban.gonzalez@cern.ch + + + 2015-09-08 + noemi.caraban.gonzalez@cern.ch + + + 85 + + + AVW.project.1082 + + + PUBLVIDEOMOVIE + + + VIDEOMEDIALAB + + +""" + +CDS_VIDEO_CLIP = """ + + 2053121 + 20150918135611.0 + + CERN-MOVIE-2015-038-001 + + + eng + + + CERN, SM18, + + + Ex / Noise / CERN / Deerhoof + + + 2015 + + + 2015-09-15 + + + 00:09:05.280 + 1920x1080 16/9, 25.00 + 25 + 1920x1080 + 16:9 + + + + + + Noemi Caraban + + + Yann Krajewsky + + + Piotr Traczyk + + + Indie rockers Deerhoof battled with the noise of CERN’s magnet test facilities on 30 August 2015. The band visited CERN at the invitation of ATLAS physicist James Beacham, whose pilot project Ex/Noise/CERN collides experimental music artists with experimental particle physics. Credits: -Producer- CERN Video Productions James Beacham François Briard -Director- Noemi Caraban -Camera- Yann Krajewski Piotr Traczyk Noemi Caraban -Crane operator- Antonio Henrique Jorge-Costa -Live recording at CERN- Mixing at Rec studio/Geneva By Serge Morattel -Infography- Daniel Dominguez Noemi Caraban -Deerhoof- John Dieterich Satomi Matsuzaki Ed Rodriguez Greg Saunier w/Deron Pulley SPECIAL THANKS TO: Michal Strychalski Marta Bajko Maryline Charrondiere Luca Bottura Christian Giloux Rodrigue Faes Mariane Catallon Georgina Hobgen Hailey Reissman Marine Bass + + + CERN + 2015 + + + satomi + CERN + + + guitar ed performance + CERN + + + bass and full band + CERN + + + cern-deerhoof-audio tracks-24 bits-48 khz + CERN + + + guitar john performance + CERN + + + noise + CERN + + + ex_noise + CERN + + + CERN + CERN + + + SM18 + CERN + + + music + CERN + + + performance + CERN + + + publvideomovie + + + AVW.project.1082 + CERN-MOVIE-2015-038 + + + MediaArchive + https://mediaarchive.cern.ch/MediaArchive/Video/Public/Movies/CERN/2015/CERN-MOVIE-2015-038/CERN-MOVIE-2015-038-001/CERN-MOVIE-2015-038-001-5872-kbps-1920x1080-audio-128-kbps-stereo.mp4 + mp45872 + 5872 kbps maxH 1080 25 fps audio 128 kbps 48 kHz stereo + + + MediaArchive + https://mediaarchive.cern.ch/MediaArchive/Video/Public/Movies/CERN/2015/CERN-MOVIE-2015-038/CERN-MOVIE-2015-038-001/CERN-MOVIE-2015-038-001-2672-kbps-1280x720-audio-128-kbps-stereo.mp4 + mp42672 + 2672 kbps maxH 720 25 fps audio 128 kbps 48 kHz stereo + + + noemi.caraban.gonzalez@cern.ch + + + 18 Sep 2015 + + + + 85 + + + AVW.clip.1273 + + + PUBLVIDEOMOVIE + + + VIDEOMEDIALAB + + +""" + + +class TestCDSDoJSONVideos(object): + + """Test CDS.""" + + def test_video_clip(self): + """Test video clip loading from XML.""" + from dojson.contrib.marc21.utils import create_record + from cds_dojson.marc21.models.video import ( + model as marc21 + ) + + blob = create_record(CDS_VIDEO_CLIP) + data = marc21.do(blob) + + # Check if credits are correct + assert len(data.get('creation_production_credits_note')) == 3 + # Check if the host entry is correct + assert data[ + 'host_item_entry'][0]['report_number'] == ["CERN-MOVIE-2015-038"] + + # Check physical description + expected_physical_description = [ + { + "accompanying_material": "16:9", + "other_physical_details": "1920x1080 16/9, 25.00", + "dimensions": ["25"], + "extent": ["00:09:05.280"] + } + ] + assert data.get( + 'physical_description') == expected_physical_description + + # Check that no fields are missing their model + assert marc21.missing(blob) == [] + + def test_video_project(self): + """Test video project from XML.""" + from dojson.contrib.marc21.utils import create_record + from cds_dojson.marc21.models.default import ( + model as marc21 + ) + + blob = create_record(CDS_VIDEO_PROJECT) + data = marc21.do(blob) + + # Check if the video file is present + assert data['constituent_unit_entry'][0][ + 'report_number'] == ['CERN-MOVIE-2015-038-001'] + + # Check the control number + assert data.get('control_number') == '2053119' + + # Check that no fields are missing their model + assert marc21.missing(blob) == [] diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..0143027e --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Document Server. +# Copyright (C) 2015 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA. + +from __future__ import absolute_import + + +class TestCDSDoJSONUtils(object): + + """Test DoJSON utils""" + + def test_for_each_squash(self): + """Check if for_each_squash works correctly""" + from cds_dojson.utils import for_each_squash + from dojson.utils import filter_values + + @for_each_squash + @filter_values + def field(self, key, value): + return { + 'a': value.get('1'), + 'b': value.get('2') + } + + squashed = field(None, None, {'1': 'foo', '2': 'bar'}) + assert squashed == {'a': 'foo', 'b': 'bar'} + + squashed = field(None, None, [{'1': 'foo'}, {'2': 'bar'}]) + assert squashed == {'a': 'foo', 'b': 'bar'} + + squashed = field(None, None, [{'1': 'foo', '2': 'bar2'}, {'2': 'bar'}]) + assert squashed == {'a': 'foo', 'b': ['bar2', 'bar']} diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..75e9ac98 --- /dev/null +++ b/tox.ini @@ -0,0 +1,15 @@ +# This file is part of DoJSON +# Copyright (C) 2014 CERN. +# +# DoJSON is free software; you can redistribute it and/or modify +# it under the terms of the Revised BSD License; see LICENSE file for +# more details. + +[tox] +envlist = py26, py27, py33, py34 + +[testenv] +deps = pytest + pytest-cov + pytest-pep8 +commands = {envpython} setup.py test