Skip to content

Commit

Permalink
Adding extra video formats and refactoring Audio format support
Browse files Browse the repository at this point in the history
  • Loading branch information
glenrobson committed Dec 5, 2024
1 parent b6c6ef4 commit 3acd3fb
Showing 1 changed file with 40 additions and 38 deletions.
78 changes: 40 additions & 38 deletions iiify/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@
bookdata = 'https://%s/BookReader/BookReaderJSON.php'
bookreader = "https://%s/BookReader/BookReaderImages.php"
URI_PRIFIX = "https://iiif.archive.org/iiif"

MAX_SCRAPE_LIMIT = 10_000
MAX_API_LIMIT = 1_000

class MaxLimitException(Exception):
pass

valid_filetypes = ['jpg', 'jpeg', 'png', 'gif', 'tif', 'jp2', 'pdf', 'tiff']
AUDIO_FORMATS = ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']
VIDEO_FORMATS = ['MPEG4', 'h.264 HD', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack','QuickTime']

class IsCollection(Exception):
# Used for when we need to raise to the route handler from inside the manifest function
Expand Down Expand Up @@ -506,6 +507,35 @@ def addThumbnails(manifest, identifier, files):
if thumbnails:
manifest.thumbnail = thumbnails

def sortDerivatives(metadata, includeVtt=False):
"""
Sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
"""
originals = []
derivatives = {}
vttfiles = {}
for f in metadata['files']:
if f['source'] == 'derivative':
if f['original'] in derivatives and not isinstance(f['original'], list):
derivatives[f['original']][f['format']] = f
else:
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

if includeVtt and f['format'] == 'Web Video Text Tracks':
# Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
sourceFilename = re.sub(r'\.[a-zA-H-]*\.vtt', '', f['name'])
if sourceFilename not in vttfiles:
vttfiles[sourceFilename] = []

vttfiles[sourceFilename].append(f)

if includeVtt:
return (originals, derivatives, vttfiles)
else:
return (originals, derivatives)

def create_manifest3(identifier, domain=None, page=None):
# Get item metadata
metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json()
Expand Down Expand Up @@ -611,7 +641,6 @@ def create_manifest3(identifier, domain=None, page=None):
count += 1
elif mediatype == 'image':
(multiFile, format) = checkMultiItem(metadata)
print (f"Checking multiFile {multiFile} {format}")
if multiFile:
# Create multi file manifest
pageCount = 0
Expand All @@ -637,19 +666,11 @@ def create_manifest3(identifier, domain=None, page=None):
singleImage(metadata, identifier, manifest, uri)
elif mediatype == 'audio' or mediatype == 'etree':
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
originals = []
derivatives = {}
for f in metadata['files']:
if f['source'] == 'derivative' and not isinstance(f['original'], list):
if f['original'] in derivatives:
derivatives[f['original']][f['format']] = f
else:
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

(originals, derivatives) = sortDerivatives(metadata)

# create the canvases for each original
for file in [f for f in originals if f['format'] in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']]:

for file in [f for f in originals if f['format'] in AUDIO_FORMATS]:
normalised_id = file['name'].rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
Expand All @@ -663,7 +684,7 @@ def create_manifest3(identifier, domain=None, page=None):
if file['name'] in derivatives:
body = Choice(items=[])
# add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734
for format in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']:
for format in AUDIO_FORMATS:
if format in derivatives[file['name']]:
r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}",
type='Sound',
Expand All @@ -689,26 +710,7 @@ def create_manifest3(identifier, domain=None, page=None):
manifest.add_item(c)

elif mediatype == "movies":
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
originals = []
derivatives = {}
vttfiles = {}
for f in metadata['files']:
if f['source'] == 'derivative':
if f['original'] in derivatives:
derivatives[f['original']][f['format']] = f
else:
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

if f['format'] == 'Web Video Text Tracks':
# Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
sourceFilename = re.sub(r'\.[a-zA-H-]*\.vtt', '', f['name'])
if sourceFilename not in vttfiles:
vttfiles[sourceFilename] = []

vttfiles[sourceFilename].append(f)
(originals, derivatives, vttfiles) = sortDerivatives(metadata, includeVtt=True)

if 'access-restricted-item' in metadata['metadata'] and metadata['metadata']['access-restricted-item']:
# this is a news item so has to be treated differently
Expand All @@ -723,7 +725,7 @@ def create_manifest3(identifier, domain=None, page=None):
filedata = file

# create the canvases for each original
for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 HD', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack','QuickTime']]:
for file in [f for f in originals if f['format'] in VIDEO_FORMATS]:
normalised_id = file['name'].rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
Expand Down Expand Up @@ -768,7 +770,7 @@ def create_manifest3(identifier, domain=None, page=None):
manifest.add_item(c)
else:
# create the canvases for each original
for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]:
for file in [f for f in originals if f['format'] in VIDEO_FORMATS]:
normalised_id = file['name'].rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
Expand Down Expand Up @@ -808,7 +810,7 @@ def create_manifest3(identifier, domain=None, page=None):
if file['name'] in derivatives:
body = Choice(items=[])
# add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734
for format in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']:
for format in VIDEO_FORMATS:
if format in derivatives[file['name']]:
r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}",
type='Video',
Expand Down

0 comments on commit 3acd3fb

Please sign in to comment.