Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding video formats #94

Merged
merged 7 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.9' ]
python-version: [ '3.12' ]
name: Python ${{ matrix.python-version }} sample
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Setup python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
architecture: x64

- uses: actions/cache@v2
- uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
Expand Down
78 changes: 40 additions & 38 deletions iiify/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@
bookdata = 'https://%s/BookReader/BookReaderJSON.php'
bookreader = "https://%s/BookReader/BookReaderImages.php"
URI_PRIFIX = "https://iiif.archive.org/iiif"

MAX_SCRAPE_LIMIT = 10_000
MAX_API_LIMIT = 1_000

class MaxLimitException(Exception):
pass

valid_filetypes = ['jpg', 'jpeg', 'png', 'gif', 'tif', 'jp2', 'pdf', 'tiff']
AUDIO_FORMATS = ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']
VIDEO_FORMATS = ['MPEG4', 'h.264 HD', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack','QuickTime']

class IsCollection(Exception):
# Used for when we need to raise to the route handler from inside the manifest function
Expand Down Expand Up @@ -506,6 +507,35 @@ def addThumbnails(manifest, identifier, files):
if thumbnails:
manifest.thumbnail = thumbnails

def sortDerivatives(metadata, includeVtt=False):
"""
Sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
"""
originals = []
derivatives = {}
vttfiles = {}
for f in metadata['files']:
if f['source'] == 'derivative':
if f['original'] in derivatives and not isinstance(f['original'], list):
derivatives[f['original']][f['format']] = f
else:
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

if includeVtt and f['format'] == 'Web Video Text Tracks':
# Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
sourceFilename = re.sub(r'\.[a-zA-H-]*\.vtt', '', f['name'])
if sourceFilename not in vttfiles:
vttfiles[sourceFilename] = []

vttfiles[sourceFilename].append(f)

if includeVtt:
return (originals, derivatives, vttfiles)
else:
return (originals, derivatives)

def create_manifest3(identifier, domain=None, page=None):
# Get item metadata
metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json()
Expand Down Expand Up @@ -611,7 +641,6 @@ def create_manifest3(identifier, domain=None, page=None):
count += 1
elif mediatype == 'image':
(multiFile, format) = checkMultiItem(metadata)
print (f"Checking multiFile {multiFile} {format}")
if multiFile:
# Create multi file manifest
pageCount = 0
Expand All @@ -637,19 +666,11 @@ def create_manifest3(identifier, domain=None, page=None):
singleImage(metadata, identifier, manifest, uri)
elif mediatype == 'audio' or mediatype == 'etree':
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
originals = []
derivatives = {}
for f in metadata['files']:
if f['source'] == 'derivative' and not isinstance(f['original'], list):
if f['original'] in derivatives:
derivatives[f['original']][f['format']] = f
else:
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

(originals, derivatives) = sortDerivatives(metadata)

# create the canvases for each original
for file in [f for f in originals if f['format'] in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']]:

for file in [f for f in originals if f['format'] in AUDIO_FORMATS]:
normalised_id = file['name'].rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
Expand All @@ -663,7 +684,7 @@ def create_manifest3(identifier, domain=None, page=None):
if file['name'] in derivatives:
body = Choice(items=[])
# add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734
for format in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']:
for format in AUDIO_FORMATS:
if format in derivatives[file['name']]:
r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}",
type='Sound',
Expand All @@ -689,26 +710,7 @@ def create_manifest3(identifier, domain=None, page=None):
manifest.add_item(c)

elif mediatype == "movies":
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
originals = []
derivatives = {}
vttfiles = {}
for f in metadata['files']:
if f['source'] == 'derivative':
if f['original'] in derivatives:
derivatives[f['original']][f['format']] = f
else:
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

if f['format'] == 'Web Video Text Tracks':
# Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
sourceFilename = re.sub(r'\.[a-zA-H-]*\.vtt', '', f['name'])
if sourceFilename not in vttfiles:
vttfiles[sourceFilename] = []

vttfiles[sourceFilename].append(f)
(originals, derivatives, vttfiles) = sortDerivatives(metadata, includeVtt=True)

if 'access-restricted-item' in metadata['metadata'] and metadata['metadata']['access-restricted-item']:
# this is a news item so has to be treated differently
Expand All @@ -723,7 +725,7 @@ def create_manifest3(identifier, domain=None, page=None):
filedata = file

# create the canvases for each original
for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 HD', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack','QuickTime']]:
for file in [f for f in originals if f['format'] in VIDEO_FORMATS]:
normalised_id = file['name'].rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
Expand Down Expand Up @@ -768,7 +770,7 @@ def create_manifest3(identifier, domain=None, page=None):
manifest.add_item(c)
else:
# create the canvases for each original
for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]:
for file in [f for f in originals if f['format'] in VIDEO_FORMATS]:
normalised_id = file['name'].rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
Expand Down Expand Up @@ -808,7 +810,7 @@ def create_manifest3(identifier, domain=None, page=None):
if file['name'] in derivatives:
body = Choice(items=[])
# add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734
for format in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']:
for format in VIDEO_FORMATS:
if format in derivatives[file['name']]:
r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}",
type='Video',
Expand Down
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""
setup.py
~~~~~~~~
iiify IIIF2 web server
iiify IIIF web server

:copyright: (c) 2015 by mek.
:license: see LICENSE for more details.
Expand Down Expand Up @@ -37,16 +37,15 @@ def find_version(*file_paths):
setup(
name='iiify',
version=find_version("iiify", "__init__.py"),
description='An implementation of the IIIF Image API 2.0 Specification ',
description='An implementation of the IIIF Image API 2 and 3 Specification ',
long_description=read('README.md'),
classifiers=[
"Development Status :: 3 - Alpha",
"Environment :: Web Environment",
"Intended Audience :: Developers",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.12",
"Topic :: Internet :: WWW/HTTP",
],
author='mek',
Expand Down
3 changes: 3 additions & 0 deletions tests/test_annotations.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os

import unittest
from flask.testing import FlaskClient
from iiify.app import app

class TestAnnotations(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)

def test_v3_manifest_has_annotations(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_basic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from flask.testing import FlaskClient
Expand All @@ -9,6 +8,7 @@
class TestBasic(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)


Expand Down
4 changes: 3 additions & 1 deletion tests/test_cantaloupe_resolver.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from iiify.resolver import cantaloupe_resolver

class TestCantaloupeResolver(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"

def test_single_image(self):
cid = cantaloupe_resolver("img-8664_202009")
self.assertEqual(cid, "img-8664_202009%2fIMG_8664.jpg")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_collections.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import os
os.environ["FLASK_ENV"] = "testing"

import unittest
from flask.testing import FlaskClient
from iiify.app import app
class TestCollections(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_ENV"] = "testing"
self.test_app = FlaskClient(app)

def test_v3_collection(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_images.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from flask.testing import FlaskClient
Expand All @@ -8,6 +7,7 @@
class TestImages(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_ENV"] = "testing"
self.test_app = FlaskClient(app)

def test_v3_resolving(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_linking.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from flask.testing import FlaskClient
Expand All @@ -8,6 +7,7 @@
class TestLinking(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)

def convertListToHash(self, items):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_manifests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from flask.testing import FlaskClient
Expand All @@ -8,6 +7,7 @@
class TestManifests(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)

def test_no_version(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_manifests_v2.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from flask.testing import FlaskClient
Expand All @@ -8,6 +7,7 @@
class TestManifests(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)

def test_v2_image_manifest(self):
Expand Down
4 changes: 3 additions & 1 deletion tests/test_resolver.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from iiify.resolver import purify_domain, collection, manifest_page

class TestResolver(unittest.TestCase):
def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"

def test_purify(self):
domain = purify_domain("https://example.org/iiif/")
self.assertEqual(domain, "https://example.org/iiif/")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_video.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
import math
Expand All @@ -9,6 +8,7 @@
class TestVideo(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)

def test_v3_single_video_manifest(self):
Expand Down
Loading