diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 81c2f84..01e5104 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,17 +11,17 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.9' ] + python-version: [ '3.12' ] name: Python ${{ matrix.python-version }} sample steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} architecture: x64 - - uses: actions/cache@v2 + - uses: actions/cache@v4 with: path: ${{ env.pythonLocation }} key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }} diff --git a/iiify/resolver.py b/iiify/resolver.py index e37347f..5b77977 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -22,7 +22,6 @@ bookdata = 'https://%s/BookReader/BookReaderJSON.php' bookreader = "https://%s/BookReader/BookReaderImages.php" URI_PRIFIX = "https://iiif.archive.org/iiif" - MAX_SCRAPE_LIMIT = 10_000 MAX_API_LIMIT = 1_000 @@ -30,6 +29,8 @@ class MaxLimitException(Exception): pass valid_filetypes = ['jpg', 'jpeg', 'png', 'gif', 'tif', 'jp2', 'pdf', 'tiff'] +AUDIO_FORMATS = ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten'] +VIDEO_FORMATS = ['MPEG4', 'h.264 HD', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack','QuickTime'] class IsCollection(Exception): # Used for when we need to raise to the route handler from inside the manifest function @@ -506,6 +507,35 @@ def addThumbnails(manifest, identifier, files): if thumbnails: manifest.thumbnail = thumbnails +def sortDerivatives(metadata, includeVtt=False): + """ + Sort the files into originals and derivatives, splitting the derivatives into buckets based on the original + """ + originals = [] + derivatives = {} + vttfiles = {} + for f in metadata['files']: + if f['source'] == 'derivative': + if f['original'] in derivatives and not isinstance(f['original'], list): + derivatives[f['original']][f['format']] = f + else: + derivatives[f['original']] = {f['format']: f} + elif f['source'] == 'original': + originals.append(f) + + if includeVtt and f['format'] == 'Web Video Text Tracks': + # Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt + sourceFilename = re.sub(r'\.[a-zA-H-]*\.vtt', '', f['name']) + if sourceFilename not in vttfiles: + vttfiles[sourceFilename] = [] + + vttfiles[sourceFilename].append(f) + + if includeVtt: + return (originals, derivatives, vttfiles) + else: + return (originals, derivatives) + def create_manifest3(identifier, domain=None, page=None): # Get item metadata metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json() @@ -611,7 +641,6 @@ def create_manifest3(identifier, domain=None, page=None): count += 1 elif mediatype == 'image': (multiFile, format) = checkMultiItem(metadata) - print (f"Checking multiFile {multiFile} {format}") if multiFile: # Create multi file manifest pageCount = 0 @@ -637,19 +666,11 @@ def create_manifest3(identifier, domain=None, page=None): singleImage(metadata, identifier, manifest, uri) elif mediatype == 'audio' or mediatype == 'etree': # sort the files into originals and derivatives, splitting the derivatives into buckets based on the original - originals = [] - derivatives = {} - for f in metadata['files']: - if f['source'] == 'derivative' and not isinstance(f['original'], list): - if f['original'] in derivatives: - derivatives[f['original']][f['format']] = f - else: - derivatives[f['original']] = {f['format']: f} - elif f['source'] == 'original': - originals.append(f) - + (originals, derivatives) = sortDerivatives(metadata) + # create the canvases for each original - for file in [f for f in originals if f['format'] in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']]: + + for file in [f for f in originals if f['format'] in AUDIO_FORMATS]: normalised_id = file['name'].rsplit(".", 1)[0] slugged_id = normalised_id.replace(" ", "-") c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas" @@ -663,7 +684,7 @@ def create_manifest3(identifier, domain=None, page=None): if file['name'] in derivatives: body = Choice(items=[]) # add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734 - for format in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']: + for format in AUDIO_FORMATS: if format in derivatives[file['name']]: r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}", type='Sound', @@ -689,26 +710,7 @@ def create_manifest3(identifier, domain=None, page=None): manifest.add_item(c) elif mediatype == "movies": - # sort the files into originals and derivatives, splitting the derivatives into buckets based on the original - originals = [] - derivatives = {} - vttfiles = {} - for f in metadata['files']: - if f['source'] == 'derivative': - if f['original'] in derivatives: - derivatives[f['original']][f['format']] = f - else: - derivatives[f['original']] = {f['format']: f} - elif f['source'] == 'original': - originals.append(f) - - if f['format'] == 'Web Video Text Tracks': - # Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt - sourceFilename = re.sub(r'\.[a-zA-H-]*\.vtt', '', f['name']) - if sourceFilename not in vttfiles: - vttfiles[sourceFilename] = [] - - vttfiles[sourceFilename].append(f) + (originals, derivatives, vttfiles) = sortDerivatives(metadata, includeVtt=True) if 'access-restricted-item' in metadata['metadata'] and metadata['metadata']['access-restricted-item']: # this is a news item so has to be treated differently @@ -723,7 +725,7 @@ def create_manifest3(identifier, domain=None, page=None): filedata = file # create the canvases for each original - for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 HD', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack','QuickTime']]: + for file in [f for f in originals if f['format'] in VIDEO_FORMATS]: normalised_id = file['name'].rsplit(".", 1)[0] slugged_id = normalised_id.replace(" ", "-") c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas" @@ -768,7 +770,7 @@ def create_manifest3(identifier, domain=None, page=None): manifest.add_item(c) else: # create the canvases for each original - for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]: + for file in [f for f in originals if f['format'] in VIDEO_FORMATS]: normalised_id = file['name'].rsplit(".", 1)[0] slugged_id = normalised_id.replace(" ", "-") c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas" @@ -808,7 +810,7 @@ def create_manifest3(identifier, domain=None, page=None): if file['name'] in derivatives: body = Choice(items=[]) # add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734 - for format in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']: + for format in VIDEO_FORMATS: if format in derivatives[file['name']]: r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}", type='Video', diff --git a/setup.py b/setup.py index 8686524..d457c2e 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ """ setup.py ~~~~~~~~ - iiify IIIF2 web server + iiify IIIF web server :copyright: (c) 2015 by mek. :license: see LICENSE for more details. @@ -37,7 +37,7 @@ def find_version(*file_paths): setup( name='iiify', version=find_version("iiify", "__init__.py"), - description='An implementation of the IIIF Image API 2.0 Specification ', + description='An implementation of the IIIF Image API 2 and 3 Specification ', long_description=read('README.md'), classifiers=[ "Development Status :: 3 - Alpha", @@ -45,8 +45,7 @@ def find_version(*file_paths): "Intended Audience :: Developers", "Operating System :: OS Independent", "Programming Language :: Python", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.12", "Topic :: Internet :: WWW/HTTP", ], author='mek', diff --git a/tests/test_annotations.py b/tests/test_annotations.py index c7bde59..1c607f3 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -1,3 +1,5 @@ +import os + import unittest from flask.testing import FlaskClient from iiify.app import app @@ -5,6 +7,7 @@ class TestAnnotations(unittest.TestCase): def setUp(self) -> None: + os.environ["FLASK_CACHE_DISABLE"] = "true" self.test_app = FlaskClient(app) def test_v3_manifest_has_annotations(self): diff --git a/tests/test_basic.py b/tests/test_basic.py index 0c40327..0e8f271 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,5 +1,4 @@ import os -os.environ["FLASK_CACHE_DISABLE"] = "true" import unittest from flask.testing import FlaskClient @@ -9,6 +8,7 @@ class TestBasic(unittest.TestCase): def setUp(self) -> None: + os.environ["FLASK_CACHE_DISABLE"] = "true" self.test_app = FlaskClient(app) diff --git a/tests/test_cantaloupe_resolver.py b/tests/test_cantaloupe_resolver.py index e24f6cf..35a2a6f 100644 --- a/tests/test_cantaloupe_resolver.py +++ b/tests/test_cantaloupe_resolver.py @@ -1,11 +1,13 @@ import os -os.environ["FLASK_CACHE_DISABLE"] = "true" import unittest from iiify.resolver import cantaloupe_resolver class TestCantaloupeResolver(unittest.TestCase): + def setUp(self) -> None: + os.environ["FLASK_CACHE_DISABLE"] = "true" + def test_single_image(self): cid = cantaloupe_resolver("img-8664_202009") self.assertEqual(cid, "img-8664_202009%2fIMG_8664.jpg") diff --git a/tests/test_collections.py b/tests/test_collections.py index a0c1667..518bccb 100644 --- a/tests/test_collections.py +++ b/tests/test_collections.py @@ -1,5 +1,4 @@ import os -os.environ["FLASK_ENV"] = "testing" import unittest from flask.testing import FlaskClient @@ -7,6 +6,7 @@ class TestCollections(unittest.TestCase): def setUp(self) -> None: + os.environ["FLASK_ENV"] = "testing" self.test_app = FlaskClient(app) def test_v3_collection(self): diff --git a/tests/test_images.py b/tests/test_images.py index dfd65ce..3a6f3c3 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -1,5 +1,4 @@ import os -os.environ["FLASK_CACHE_DISABLE"] = "true" import unittest from flask.testing import FlaskClient @@ -8,6 +7,7 @@ class TestImages(unittest.TestCase): def setUp(self) -> None: + os.environ["FLASK_ENV"] = "testing" self.test_app = FlaskClient(app) def test_v3_resolving(self): diff --git a/tests/test_linking.py b/tests/test_linking.py index edb326e..c8b16d4 100644 --- a/tests/test_linking.py +++ b/tests/test_linking.py @@ -1,5 +1,4 @@ import os -os.environ["FLASK_CACHE_DISABLE"] = "true" import unittest from flask.testing import FlaskClient @@ -8,6 +7,7 @@ class TestLinking(unittest.TestCase): def setUp(self) -> None: + os.environ["FLASK_CACHE_DISABLE"] = "true" self.test_app = FlaskClient(app) def convertListToHash(self, items): diff --git a/tests/test_manifests.py b/tests/test_manifests.py index e2ff62d..c2797ca 100644 --- a/tests/test_manifests.py +++ b/tests/test_manifests.py @@ -1,5 +1,4 @@ import os -os.environ["FLASK_CACHE_DISABLE"] = "true" import unittest from flask.testing import FlaskClient @@ -8,6 +7,7 @@ class TestManifests(unittest.TestCase): def setUp(self) -> None: + os.environ["FLASK_CACHE_DISABLE"] = "true" self.test_app = FlaskClient(app) def test_no_version(self): diff --git a/tests/test_manifests_v2.py b/tests/test_manifests_v2.py index b0ba8a1..a799100 100644 --- a/tests/test_manifests_v2.py +++ b/tests/test_manifests_v2.py @@ -1,5 +1,4 @@ import os -os.environ["FLASK_CACHE_DISABLE"] = "true" import unittest from flask.testing import FlaskClient @@ -8,6 +7,7 @@ class TestManifests(unittest.TestCase): def setUp(self) -> None: + os.environ["FLASK_CACHE_DISABLE"] = "true" self.test_app = FlaskClient(app) def test_v2_image_manifest(self): diff --git a/tests/test_resolver.py b/tests/test_resolver.py index b14f327..6aa3032 100644 --- a/tests/test_resolver.py +++ b/tests/test_resolver.py @@ -1,10 +1,12 @@ import os -os.environ["FLASK_CACHE_DISABLE"] = "true" import unittest from iiify.resolver import purify_domain, collection, manifest_page class TestResolver(unittest.TestCase): + def setUp(self) -> None: + os.environ["FLASK_CACHE_DISABLE"] = "true" + def test_purify(self): domain = purify_domain("https://example.org/iiif/") self.assertEqual(domain, "https://example.org/iiif/") diff --git a/tests/test_video.py b/tests/test_video.py index 07d46ae..6e454d6 100644 --- a/tests/test_video.py +++ b/tests/test_video.py @@ -1,5 +1,4 @@ import os -os.environ["FLASK_CACHE_DISABLE"] = "true" import unittest import math @@ -9,6 +8,7 @@ class TestVideo(unittest.TestCase): def setUp(self) -> None: + os.environ["FLASK_CACHE_DISABLE"] = "true" self.test_app = FlaskClient(app) def test_v3_single_video_manifest(self):