From a8dba9eb2571862d1462681d932658200f4e55a6 Mon Sep 17 00:00:00 2001 From: Mek Date: Thu, 11 Apr 2024 14:07:53 -0400 Subject: [PATCH 1/5] moving labs API iiif catalog to prod service --- iiify/resolver.py | 79 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 7 deletions(-) diff --git a/iiify/resolver.py b/iiify/resolver.py index 986c538..03a3253 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -3,13 +3,15 @@ import os import requests from iiif2 import iiif, web -from .configs import options, cors, approot, cache_root, media_root, apiurl +from .configs import options, cors, approot, cache_root, media_root from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef from urllib.parse import urlparse, parse_qs, quote import json import math import re +SCRAPE_API = 'https://archive.org/services/search/v1/scrape' +ADVANCED_SEARCH = 'https://archive.org/advancedsearch.php?' IMG_CTX = 'http://iiif.io/api/image/2/context.json' PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json' ARCHIVE = 'http://archive.org' @@ -19,6 +21,9 @@ bookreader = "http://%s/BookReader/BookReaderImages.php" URI_PRIFIX = "https://iiif.archive.org/iiif" +class MaxLimitException(Exception): + pass + valid_filetypes = ['jpg', 'jpeg', 'png', 'gif', 'tif', 'jp2', 'pdf', 'tiff'] class IsCollection(Exception): @@ -29,14 +34,74 @@ def purify_domain(domain): domain = re.sub('^http:\/\/', "https://", domain) return domain if domain.endswith('/iiif/') else domain + 'iiif/' -def getids(q, limit=1000, cursor=''): - r = requests.get('%s/iiif' % apiurl, params={ - 'q': q, - 'limit': limit, - 'cursor': cursor - }, allow_redirects=True, timeout=None) +def getids(q, limit=1000, cursor='', page=1): + q = request.args.get('q', '') + query = "(mediatype:(texts) OR mediatype:(image))" + \ + ((" AND %s" % q) if q else "") + fields = request.args.get('fields', '') + sorts = request.args.get('sorts', '') + cursor = request.args.get('cursor', '') + version = 'v2' + + # 'all:1' also works + q = "NOT identifier:..*" + (" AND (%s)" % query if query else "") + if version == 'v2': + return scrape(query=q, fields=fields, sorts=sorts, count=limit, + cursor=cursor) + return search(q, page=page, limit=limit) + +def scrape(query, fields="", sorts="", count=100, cursor="", security=True): + """ + params: + query: the query (using the same query Lucene-like queries supported by Internet Archive Advanced Search. + fields: Metadata fields to return, comma delimited + sorts: Fields to sort on, comma delimited (if identifier is specified, it must be last) + count: Number of results to return (minimum of 100) + cursor: A cursor, if any (otherwise, search starts at the beginning) + """ + if not query: + raise ValueError("GET 'query' parameters required") + + if int(count) > 1000 and security: + raise MaxLimitException("Limit may not exceed 1000.") + + #sorts = sorts or 'date+asc,createdate' + fields = fields or 'identifier,title' + + params = { + 'q': query + } + if sorts: + params['sorts'] = sorts + if fields: + params['fields'] = fields + if count: + params['count'] = count + if cursor: + params['cursor'] = cursor + + r = requests.get(SCRAPE_API, params=params) return r.json() +def search(query, page=1, limit=100, security=True, sort=None, fields=None): + if not query: + raise ValueError("GET query parameters 'q' required") + + if int(limit) > 1000 and security: + raise MaxLimitException("Limit may not exceed 1000.") + + sort = sort or 'sort%5B%5D=date+asc&sort%5B%5D=createdate' + fields = fields or 'identifier,title' + return requests.get( + ADVANCED_SEARCH + sort, + params={'q': query, + 'rows': limit, + 'page': page, + 'fl[]': fields, + 'output': 'json', + }).json() + + def to_mimetype(format): formats = { "VBR MP3": "audio/mp3", From 7685d02a22a65fac10b30f549c45d5cbcb6e0867 Mon Sep 17 00:00:00 2001 From: "Michael E. Karpeles (Mek)" Date: Thu, 26 Sep 2024 10:00:35 -0700 Subject: [PATCH 2/5] fixing getids --- iiify/app.py | 7 ++++--- iiify/resolver.py | 14 +++----------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/iiify/app.py b/iiify/app.py index e78e60a..bdb4e54 100755 --- a/iiify/app.py +++ b/iiify/app.py @@ -56,10 +56,11 @@ def mainentry(): @app.route('/iiif/') def index(): """Lists all available book and image items on Archive.org""" - cursor = request.args.get('cursor', '') q = request.args.get('q', '') - return jsonify(getids(q, cursor=cursor)) - + fields = request.args.get('fields', '') + sorts = request.args.get('sorts', '') + cursor = request.args.get('cursor', '') + return jsonify(getids(q, cursor=cursor, fields=fields, sorts=sorts)) @app.route('/iiif/collection.json') diff --git a/iiify/resolver.py b/iiify/resolver.py index 6dcefcc..6f3503b 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -35,21 +35,13 @@ def purify_domain(domain): domain = re.sub('^http:\/\/', "https://", domain) return domain if domain.endswith('/iiif/') else domain + 'iiif/' -def getids(q, limit=1000, cursor='', page=1): - q = request.args.get('q', '') +def getids(q, limit=1000, cursor='', sorts='', fields=''): query = "(mediatype:(texts) OR mediatype:(image))" + \ ((" AND %s" % q) if q else "") - fields = request.args.get('fields', '') - sorts = request.args.get('sorts', '') - cursor = request.args.get('cursor', '') - version = 'v2' - # 'all:1' also works q = "NOT identifier:..*" + (" AND (%s)" % query if query else "") - if version == 'v2': - return scrape(query=q, fields=fields, sorts=sorts, count=limit, - cursor=cursor) - return search(q, page=page, limit=limit) + return scrape(query=q, fields=fields, sorts=sorts, count=limit, cursor=cursor) + def scrape(query, fields="", sorts="", count=100, cursor="", security=True): """ From a3c7e1ff0eff6b6ecd0e5e08600f94279dd4c50b Mon Sep 17 00:00:00 2001 From: Mek Date: Fri, 27 Sep 2024 15:41:44 -0400 Subject: [PATCH 3/5] use constants, tidy --- iiify/app.py | 2 +- iiify/resolver.py | 23 ++++++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/iiify/app.py b/iiify/app.py index bdb4e54..54de092 100755 --- a/iiify/app.py +++ b/iiify/app.py @@ -57,9 +57,9 @@ def mainentry(): def index(): """Lists all available book and image items on Archive.org""" q = request.args.get('q', '') + cursor = request.args.get('cursor', '') fields = request.args.get('fields', '') sorts = request.args.get('sorts', '') - cursor = request.args.get('cursor', '') return jsonify(getids(q, cursor=cursor, fields=fields, sorts=sorts)) diff --git a/iiify/resolver.py b/iiify/resolver.py index 6f3503b..5ce77f8 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -12,7 +12,7 @@ import xml.etree.ElementTree as ET SCRAPE_API = 'https://archive.org/services/search/v1/scrape' -ADVANCED_SEARCH = 'https://archive.org/advancedsearch.php?' +ADVANCED_SEARCH = 'https://archive.org/advancedsearch.php' IMG_CTX = 'http://iiif.io/api/image/2/context.json' PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json' ARCHIVE = 'https://archive.org' @@ -22,6 +22,9 @@ bookreader = "http://%s/BookReader/BookReaderImages.php" URI_PRIFIX = "https://iiif.archive.org/iiif" +MAX_SCRAPE_LIMIT = 10_000 +MAX_API_LIMIT = 1_000 + class MaxLimitException(Exception): pass @@ -35,7 +38,7 @@ def purify_domain(domain): domain = re.sub('^http:\/\/', "https://", domain) return domain if domain.endswith('/iiif/') else domain + 'iiif/' -def getids(q, limit=1000, cursor='', sorts='', fields=''): +def getids(q, cursor='', sorts='', fields='', limit=MAX_API_LIMIT): query = "(mediatype:(texts) OR mediatype:(image))" + \ ((" AND %s" % q) if q else "") # 'all:1' also works @@ -55,10 +58,9 @@ def scrape(query, fields="", sorts="", count=100, cursor="", security=True): if not query: raise ValueError("GET 'query' parameters required") - if int(count) > 1000 and security: + if int(count) > MAX_API_LIMIT and security: raise MaxLimitException("Limit may not exceed 1000.") - #sorts = sorts or 'date+asc,createdate' fields = fields or 'identifier,title' params = { @@ -83,14 +85,13 @@ def search(query, page=1, limit=100, security=True, sort=None, fields=None): if int(limit) > 1000 and security: raise MaxLimitException("Limit may not exceed 1000.") - sort = sort or 'sort%5B%5D=date+asc&sort%5B%5D=createdate' - fields = fields or 'identifier,title' return requests.get( - ADVANCED_SEARCH + sort, + ADVANCED_SEARCH, params={'q': query, + 'sort[]': sort or ['date asc', 'createdate'], 'rows': limit, 'page': page, - 'fl[]': fields, + 'fl[]': fields or 'identifier,title', 'output': 'json', }).json() @@ -172,12 +173,12 @@ def create_collection3(identifier, domain, page=1, rows=1000): addMetadata(collection, identifier, metadata['metadata'], collection=True) - asURL = f'https://archive.org/advancedsearch.php?q=collection%3A{identifier}&fl[]=identifier&fl[]=mediatype&fl[]=title&fl[]=description&sort[]=&sort[]=&sort[]=&rows={rows}&page={page}&output=json&save=yes' + asURL = f'{ADVANCED_SEARCH}?q=collection%3A{identifier}&fl[]=identifier&fl[]=mediatype&fl[]=title&fl[]=description&sort[]=&sort[]=&sort[]=&rows={rows}&page={page}&output=json&save=yes' itemsSearch = requests.get(asURL).json() total = itemsSearch['response']['numFound'] # There is a max of 10,000 items that can be retrieved from the advanced search - if total > 10000: - total = 10000 + if total > MAX_SCRAPE_LIMIT: + total = MAX_SCRAPE_LIMIT if len(itemsSearch['response']['docs']) == 0: return None From 74d069a8854cfa2dbce16002c1f83d1baabe1357 Mon Sep 17 00:00:00 2001 From: Mek Date: Fri, 27 Sep 2024 15:45:01 -0400 Subject: [PATCH 4/5] fix getid call --- iiify/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iiify/app.py b/iiify/app.py index 54de092..16a72f1 100755 --- a/iiify/app.py +++ b/iiify/app.py @@ -65,10 +65,10 @@ def index(): @app.route('/iiif/collection.json') def catalog(): - cursor = request.args.get('cursor', '') q = request.args.get('q', '') + cursor = request.args.get('cursor', '') domain = purify_domain(request.args.get('domain', request.url_root)) - return ldjsonify(collection(domain, getids(q, limit, cursor)['ids'])) + return ldjsonify(collection(domain, getids(q, cursor=cursor)['ids'])) @app.route('/iiif/cache') From 9aa711cbc8be8a1620637111290e2891d7053da4 Mon Sep 17 00:00:00 2001 From: "Michael E. Karpeles (Mek)" Date: Sun, 29 Sep 2024 07:46:24 -0700 Subject: [PATCH 5/5] fixing collection.json --- iiify/app.py | 28 +++++++++------ iiify/resolver.py | 91 +++++++++++++++++++++-------------------------- 2 files changed, 59 insertions(+), 60 deletions(-) diff --git a/iiify/app.py b/iiify/app.py index 16a72f1..7064032 100755 --- a/iiify/app.py +++ b/iiify/app.py @@ -7,8 +7,9 @@ from flask_cors import CORS from flask_caching import Cache from iiif2 import iiif, web -from .resolver import ia_resolver, create_manifest, create_manifest3, getids, collection, \ - purify_domain, cantaloupe_resolver, create_collection3, IsCollection, create_annotations +from .resolver import ia_resolver, create_manifest, create_manifest3, scrape, \ + collection, purify_domain, cantaloupe_resolver, create_collection3, IsCollection, \ + create_annotations from .configs import options, cors, approot, cache_root, media_root, \ cache_expr, version, image_server, cache_timeouts from urllib.parse import quote @@ -60,15 +61,23 @@ def index(): cursor = request.args.get('cursor', '') fields = request.args.get('fields', '') sorts = request.args.get('sorts', '') - return jsonify(getids(q, cursor=cursor, fields=fields, sorts=sorts)) + r = scrape(q, cursor=cursor, fields=fields, sorts=sorts, restrict_to_iiif=True) + return jsonify(r) @app.route('/iiif/collection.json') def catalog(): q = request.args.get('q', '') cursor = request.args.get('cursor', '') + fields = request.args.get('fields', '') + sorts = request.args.get('sorts', '') domain = purify_domain(request.args.get('domain', request.url_root)) - return ldjsonify(collection(domain, getids(q, cursor=cursor)['ids'])) + identifiers = [ + i.get('identifier') for i in scrape( + q, cursor=cursor, fields=fields, sorts=sorts, restrict_to_iiif=True + ).get('items') + ] + return ldjsonify(collection(domain, identifiers)) @app.route('/iiif/cache') @@ -100,16 +109,16 @@ def helper(identifier): return render_template('helpers/image.html', identifier=identifier, cantaloupe_id=cantaloupe_id, esc_cantaloupe_id=esc_cantaloupe_id) except ValueError: abort(404) - + elif mediatype == "audio" or mediatype == "etree": return render_template('helpers/audio.html', identifier=identifier) elif mediatype == "movies": return render_template('helpers/movies.html', identifier=identifier) elif mediatype == "texts": return render_template('helpers/texts.html', identifier=identifier) - else: + else: return render_template('helpers/unknown.html', identifier=identifier) - + @app.route('/iiif/') def view(identifier): @@ -130,7 +139,7 @@ def view(identifier): @app.route('/iiif/3//collection.json') @cache.cached(timeout=cache_timeouts["med"], forced_update=cache_bust) -def collection3(identifier): +def collection3JSON(identifier): domain = purify_domain(request.args.get('domain', request.url_root)) try: @@ -165,7 +174,7 @@ def collection3page(identifier, page): @app.route('/iiif//collection.json') @cache.cached(timeout=cache_timeouts["long"], forced_update=cache_bust) -def collection(identifier): +def collectionJSON(identifier): return redirect(f'/iiif/3/{identifier}/collection.json', code=302) @@ -240,7 +249,6 @@ def add_header(response): def ldjsonify(data): j = jsonify(data) - # j.headers.set('Access-Control-Allow-Origin', '*') j.mimetype = "application/ld+json" return j diff --git a/iiify/resolver.py b/iiify/resolver.py index 5ce77f8..2ff8d6c 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -7,7 +7,7 @@ from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage,AnnotationPageRef, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef from urllib.parse import urlparse, parse_qs, quote import json -import math +import math import re import xml.etree.ElementTree as ET @@ -38,15 +38,8 @@ def purify_domain(domain): domain = re.sub('^http:\/\/', "https://", domain) return domain if domain.endswith('/iiif/') else domain + 'iiif/' -def getids(q, cursor='', sorts='', fields='', limit=MAX_API_LIMIT): - query = "(mediatype:(texts) OR mediatype:(image))" + \ - ((" AND %s" % q) if q else "") - # 'all:1' also works - q = "NOT identifier:..*" + (" AND (%s)" % query if query else "") - return scrape(query=q, fields=fields, sorts=sorts, count=limit, cursor=cursor) - -def scrape(query, fields="", sorts="", count=100, cursor="", security=True): +def scrape(query, fields="", sorts="", count=100, cursor="", restrict_to_iiif=False, security=True): """ params: query: the query (using the same query Lucene-like queries supported by Internet Archive Advanced Search. @@ -54,12 +47,15 @@ def scrape(query, fields="", sorts="", count=100, cursor="", security=True): sorts: Fields to sort on, comma delimited (if identifier is specified, it must be last) count: Number of results to return (minimum of 100) cursor: A cursor, if any (otherwise, search starts at the beginning) + restrict_to_iiif: restrict query to supported IIIF collections? + security: enforce API page limit """ - if not query: - raise ValueError("GET 'query' parameters required") + if restrict_to_iiif or not query: + _query = "(mediatype:(texts) OR mediatype:(image))" + query = f"{_query} AND {query}" if query else _query if int(count) > MAX_API_LIMIT and security: - raise MaxLimitException("Limit may not exceed 1000.") + raise MaxLimitException(f"Limit may not exceed {MAX_API_LIMIT}.") fields = fields or 'identifier,title' @@ -82,8 +78,8 @@ def search(query, page=1, limit=100, security=True, sort=None, fields=None): if not query: raise ValueError("GET query parameters 'q' required") - if int(limit) > 1000 and security: - raise MaxLimitException("Limit may not exceed 1000.") + if int(limit) > MAX_API_LIMIT and security: + raise MaxLimitException(f"Limit may not exceed {MAX_API_LIMIT}.") return requests.get( ADVANCED_SEARCH, @@ -95,7 +91,7 @@ def search(query, page=1, limit=100, security=True, sort=None, fields=None): 'output': 'json', }).json() -def checkMultiItem(metadata): +def checkMultiItem(metadata): # Maybe add call to book stack to see if that works first # Count the number of each original file @@ -106,12 +102,12 @@ def checkMultiItem(metadata): file_types[file['format']] = 0 file_types[file['format']] += 1 - #print (file_types) + #print (file_types) # If there is multiple files of the same type then return the first format # Will have to see if there are objects with multiple images and formats for format in file_types: - if file_types[format] > 1 and format.lower() in valid_filetypes: + if file_types[format] > 1 and format.lower() in valid_filetypes: return (True, format) return (False, None) @@ -140,7 +136,7 @@ def to_mimetype(format): "Cinepack": "video/x-msvideo", "AIFF": "audio/aiff", "Apple Lossless Audio": "audio/x-m4a", - "MPEG-4 Audio": "audio/mp4" + "MPEG-4 Audio": "audio/mp4" } return formats.get(format, "application/octet-stream") @@ -161,7 +157,7 @@ def collection(domain, identifiers, label='Custom Archive.org IIIF Collection'): }) return cs -def create_collection3(identifier, domain, page=1, rows=1000): +def create_collection3(identifier, domain, page=1, rows=MAX_API_LIMIT): # Get item metadata metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json() @@ -181,18 +177,18 @@ def create_collection3(identifier, domain, page=1, rows=1000): total = MAX_SCRAPE_LIMIT if len(itemsSearch['response']['docs']) == 0: - return None + return None pages = math.ceil(total / rows) for item in itemsSearch['response']['docs']: child = None if item['mediatype'] == 'collection': child = CollectionRef(id=f"{domain}{item['identifier']}/collection.json", type="Collection", label=item['title']) - else: + else: child = ManifestRef(id=f"{domain}{item['identifier']}/manifest.json", type="Manifest", label=item['title']) - + if "description" in item: - child.summary = {"none": [item['description']]} + child.summary = {"none": [item['description']]} collection.add_item(child) page += 1 @@ -201,7 +197,7 @@ def create_collection3(identifier, domain, page=1, rows=1000): collection.add_item(child) return json.loads(collection.jsonld()) - + def manifest_page(identifier, label='', page='', width='', height='', metadata=None, canvasId=""): if not canvasId: canvasId = f"{identifier}/canvas" @@ -310,7 +306,7 @@ def create_manifest(identifier, domain=None, page=None): 'itemPath': subPrefix, 'itemId': identifier }) - if r.status_code != 200: + if r.status_code != 200: # If the bookdata failed then treat as a single image fileName = "" for f in resp['files']: @@ -318,7 +314,7 @@ def create_manifest(identifier, domain=None, page=None): and f['source'].lower() == 'original' \ and 'thumb' not in f['name']: fileName = f['name'] - break + break if not fileName: # Original wasn't an image @@ -380,7 +376,7 @@ def singleImage(metadata, identifier, manifest, uri): and f['source'].lower() == 'original' \ and 'thumb' not in f['name']: fileName = f['name'] - break + break if not fileName: # Original wasn't an image @@ -390,12 +386,12 @@ def singleImage(metadata, identifier, manifest, uri): imgId = f"{identifier}/{fileName}".replace('/','%2f') imgURL = f"{IMG_SRV}/3/{imgId}" - + manifest.make_canvas_from_iiif(url=imgURL, id=f"{URI_PRIFIX}/{identifier}/canvas", label="1", anno_page_id=f"{uri}/annotationPage/1", - anno_id=f"{uri}/annotation/1") + anno_id=f"{uri}/annotation/1") def addMetadata(item, identifier, metadata, collection=False): item.homepage = [{"id": f"https://archive.org/details/{identifier}", @@ -430,7 +426,7 @@ def addMetadata(item, identifier, metadata, collection=False): excluded_fields = [ 'avg_rating', 'backup_location', 'btih', 'description', 'downloads', - 'imagecount', 'indexflag', 'item_size', 'licenseurl', 'curation', + 'imagecount', 'indexflag', 'item_size', 'licenseurl', 'curation', 'noindex', 'num_reviews', 'oai_updatedate', 'publicdate', 'publisher', 'reviewdate', 'scanningcentre', 'stripped_tags', 'uploader' ] @@ -536,7 +532,7 @@ def create_manifest3(identifier, domain=None, page=None): for fileMd in metadata['files']: if fileMd['name'].endswith('_scandata.xml'): subprefix = fileMd['name'].replace('_scandata.xml', '') - if fileMd['format'] == 'Djvu XML': + if fileMd['format'] == 'Djvu XML': djvuFile = fileMd['name'] bookReaderURL = f"https://{metadata.get('server')}/BookReader/BookReaderJSIA.php?id={identifier}&itemPath={metadata.get('dir')}&server={metadata.get('server')}&format=jsonp&subPrefix={subprefix}" @@ -577,7 +573,7 @@ def create_manifest3(identifier, domain=None, page=None): # id=f"https://iiif.archivelab.org/iiif/{identifier}${pageCount}/canvas", # label=f"{page['leafNum']}") pageCount += 1 - + # Setting logic for paging behavior and starting canvases # Start with paged (default) or individual behaviors @@ -609,7 +605,7 @@ def create_manifest3(identifier, domain=None, page=None): annotations.append( AnnotationPageRef(id=f"{domain}3/annotations/{identifier}/{quote(djvuFile, safe='()')}/{count}.json", type="AnnotationPage") - ) + ) canvas.annotations = annotations count += 1 elif mediatype == 'image': @@ -623,7 +619,7 @@ def create_manifest3(identifier, domain=None, page=None): imgId = f"{identifier}/{file['name']}".replace('/','%2f') imgURL = f"{IMG_SRV}/3/{imgId}" pageCount += 1 - + try: manifest.make_canvas_from_iiif(url=imgURL, id=f"{URI_PRIFIX}/{identifier}${pageCount}/canvas", @@ -709,10 +705,10 @@ def create_manifest3(identifier, domain=None, page=None): # Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt sourceFilename = re.sub('\.[a-zA-H-]*\.vtt', '', f['name']) if sourceFilename not in vttfiles: - vttfiles[sourceFilename] = [] - - vttfiles[sourceFilename].append(f) - + vttfiles[sourceFilename] = [] + + vttfiles[sourceFilename].append(f) + # create the canvases for each original for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]: normalised_id = file['name'].rsplit(".", 1)[0] @@ -726,9 +722,9 @@ def create_manifest3(identifier, domain=None, page=None): vttNo = 1 for vttFile in vttfiles[normalised_id]: - vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}", - motivation="supplementing", - target=c.id, + vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}", + motivation="supplementing", + target=c.id, anno_page_id=vttAPId, body={"id": f"{domain}resource/{identifier}/{vttFile['name']}", "type": "Text", @@ -760,9 +756,9 @@ def create_manifest3(identifier, domain=None, page=None): type='Video', format=to_mimetype(format), label={"none": [format]}, - duration=float(file['length']), + duration=float(file['length']), height=int(file['height']), - width=int(file['width']), + width=int(file['width']), ) body.items.append(r) elif file['format'] == format: @@ -931,7 +927,7 @@ def cantaloupe_resolver(identifier): # single image file - find the filename filename = None - for f in files: + for f in files: if valid_filetype(f['name']) \ and f['source'].lower() == 'original' \ and 'thumb' not in f['name']: @@ -954,7 +950,7 @@ def cantaloupe_resolver(identifier): filename = f['name'] fileIdentifier = filename[:-1 * len('_jp2.zip')] - # next look for any _jp2.zip that has a different name to the identifier + # next look for any _jp2.zip that has a different name to the identifier if not filename: for f in files: if f['name'].endswith('_jp2.zip'): @@ -975,13 +971,8 @@ def cantaloupe_resolver(identifier): fileIdentifier = filename[:-1 * len('_tif.zip')] extension = ".tif" - #filename = next(f for f in files if f['source'].lower() == 'derivative' \ - # and f['name'].endswith('_jp2.zip'))['name'] if filename: dirpath = filename[:-4] filepath = f"{fileIdentifier}_{leaf.zfill(4)}{extension}" return f"{identifier}%2f{filename}%2f{dirpath}%2f{filepath}" - # print (f'images not found for {identifier}') - # for f in files: - # print (f"source: {f['source'].lower()} name: {f['name']} and {f['source'].lower() == 'derivative'} {f['name'].endswith('_jp2.zip')}")