From a8dba9eb2571862d1462681d932658200f4e55a6 Mon Sep 17 00:00:00 2001
From: Mek <michael.karpeles@gmail.com>
Date: Thu, 11 Apr 2024 14:07:53 -0400
Subject: [PATCH 1/5] moving labs API iiif catalog to prod service

---
 iiify/resolver.py | 79 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 72 insertions(+), 7 deletions(-)

diff --git a/iiify/resolver.py b/iiify/resolver.py
index 986c538..03a3253 100644
--- a/iiify/resolver.py
+++ b/iiify/resolver.py
@@ -3,13 +3,15 @@
 import os
 import requests
 from iiif2 import iiif, web
-from .configs import options, cors, approot, cache_root, media_root, apiurl
+from .configs import options, cors, approot, cache_root, media_root
 from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef
 from urllib.parse import urlparse, parse_qs, quote
 import json
 import math 
 import re
 
+SCRAPE_API = 'https://archive.org/services/search/v1/scrape'
+ADVANCED_SEARCH = 'https://archive.org/advancedsearch.php?'
 IMG_CTX = 'http://iiif.io/api/image/2/context.json'
 PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json'
 ARCHIVE = 'http://archive.org'
@@ -19,6 +21,9 @@
 bookreader = "http://%s/BookReader/BookReaderImages.php"
 URI_PRIFIX = "https://iiif.archive.org/iiif"
 
+class MaxLimitException(Exception):
+    pass
+
 valid_filetypes = ['jpg', 'jpeg', 'png', 'gif', 'tif', 'jp2', 'pdf', 'tiff']
 
 class IsCollection(Exception):
@@ -29,14 +34,74 @@ def purify_domain(domain):
     domain = re.sub('^http:\/\/', "https://", domain)
     return domain if domain.endswith('/iiif/') else domain + 'iiif/'
 
-def getids(q, limit=1000, cursor=''):
-    r = requests.get('%s/iiif' % apiurl, params={
-        'q': q,
-        'limit': limit,
-        'cursor': cursor
-    }, allow_redirects=True, timeout=None)
+def getids(q, limit=1000, cursor='', page=1):
+        q = request.args.get('q', '')
+        query = "(mediatype:(texts) OR mediatype:(image))" + \
+                ((" AND %s" % q) if q else "")
+        fields = request.args.get('fields', '')
+        sorts = request.args.get('sorts', '')
+        cursor = request.args.get('cursor', '')
+        version = 'v2'
+
+        # 'all:1' also works
+        q = "NOT identifier:..*" + (" AND (%s)" % query if query else "")
+        if version == 'v2':
+            return scrape(query=q, fields=fields, sorts=sorts, count=limit,
+                        cursor=cursor)
+        return search(q, page=page, limit=limit)
+
+def scrape(query, fields="", sorts="", count=100, cursor="", security=True):
+    """
+    params:
+        query: the query (using the same query Lucene-like queries supported by Internet Archive Advanced Search.
+        fields: Metadata fields to return, comma delimited
+        sorts: Fields to sort on, comma delimited (if identifier is specified, it must be last)
+        count: Number of results to return (minimum of 100)
+        cursor: A cursor, if any (otherwise, search starts at the beginning)
+    """
+    if not query:
+        raise ValueError("GET 'query' parameters required")
+
+    if int(count) > 1000 and security:
+        raise MaxLimitException("Limit may not exceed 1000.")
+
+    #sorts = sorts or 'date+asc,createdate'
+    fields = fields or 'identifier,title'
+
+    params = {
+        'q': query
+    }
+    if sorts:
+        params['sorts'] = sorts
+    if fields:
+        params['fields'] = fields
+    if count:
+        params['count'] = count
+    if cursor:
+        params['cursor'] = cursor
+
+    r = requests.get(SCRAPE_API, params=params)
     return r.json()
 
+def search(query, page=1, limit=100, security=True, sort=None, fields=None):
+    if not query:
+        raise ValueError("GET query parameters 'q' required")
+
+    if int(limit) > 1000 and security:
+        raise MaxLimitException("Limit may not exceed 1000.")
+
+    sort = sort or 'sort%5B%5D=date+asc&sort%5B%5D=createdate'
+    fields = fields or 'identifier,title'
+    return requests.get(
+        ADVANCED_SEARCH + sort,
+        params={'q': query,
+                'rows': limit,
+                'page': page,
+                'fl[]': fields,
+                'output': 'json',
+            }).json()
+
+
 def to_mimetype(format):
     formats = {
         "VBR MP3": "audio/mp3",

From 7685d02a22a65fac10b30f549c45d5cbcb6e0867 Mon Sep 17 00:00:00 2001
From: "Michael E. Karpeles (Mek)" <michael.karpeles@gmail.com>
Date: Thu, 26 Sep 2024 10:00:35 -0700
Subject: [PATCH 2/5] fixing getids

---
 iiify/app.py      |  7 ++++---
 iiify/resolver.py | 14 +++-----------
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/iiify/app.py b/iiify/app.py
index e78e60a..bdb4e54 100755
--- a/iiify/app.py
+++ b/iiify/app.py
@@ -56,10 +56,11 @@ def mainentry():
 @app.route('/iiif/')
 def index():
     """Lists all available book and image items on Archive.org"""
-    cursor = request.args.get('cursor', '')
     q = request.args.get('q', '')
-    return jsonify(getids(q, cursor=cursor))
-
+    fields = request.args.get('fields', '')
+    sorts = request.args.get('sorts', '')
+    cursor = request.args.get('cursor', '')
+    return jsonify(getids(q, cursor=cursor, fields=fields, sorts=sorts))
 
 
 @app.route('/iiif/collection.json')
diff --git a/iiify/resolver.py b/iiify/resolver.py
index 6dcefcc..6f3503b 100644
--- a/iiify/resolver.py
+++ b/iiify/resolver.py
@@ -35,21 +35,13 @@ def purify_domain(domain):
     domain = re.sub('^http:\/\/', "https://", domain)
     return domain if domain.endswith('/iiif/') else domain + 'iiif/'
 
-def getids(q, limit=1000, cursor='', page=1):
-        q = request.args.get('q', '')
+def getids(q, limit=1000, cursor='', sorts='', fields=''):
         query = "(mediatype:(texts) OR mediatype:(image))" + \
                 ((" AND %s" % q) if q else "")
-        fields = request.args.get('fields', '')
-        sorts = request.args.get('sorts', '')
-        cursor = request.args.get('cursor', '')
-        version = 'v2'
-
         # 'all:1' also works
         q = "NOT identifier:..*" + (" AND (%s)" % query if query else "")
-        if version == 'v2':
-            return scrape(query=q, fields=fields, sorts=sorts, count=limit,
-                        cursor=cursor)
-        return search(q, page=page, limit=limit)
+        return scrape(query=q, fields=fields, sorts=sorts, count=limit, cursor=cursor)
+
 
 def scrape(query, fields="", sorts="", count=100, cursor="", security=True):
     """

From a3c7e1ff0eff6b6ecd0e5e08600f94279dd4c50b Mon Sep 17 00:00:00 2001
From: Mek <michael.karpeles@gmail.com>
Date: Fri, 27 Sep 2024 15:41:44 -0400
Subject: [PATCH 3/5] use constants, tidy

---
 iiify/app.py      |  2 +-
 iiify/resolver.py | 23 ++++++++++++-----------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/iiify/app.py b/iiify/app.py
index bdb4e54..54de092 100755
--- a/iiify/app.py
+++ b/iiify/app.py
@@ -57,9 +57,9 @@ def mainentry():
 def index():
     """Lists all available book and image items on Archive.org"""
     q = request.args.get('q', '')
+    cursor = request.args.get('cursor', '')
     fields = request.args.get('fields', '')
     sorts = request.args.get('sorts', '')
-    cursor = request.args.get('cursor', '')
     return jsonify(getids(q, cursor=cursor, fields=fields, sorts=sorts))
 
 
diff --git a/iiify/resolver.py b/iiify/resolver.py
index 6f3503b..5ce77f8 100644
--- a/iiify/resolver.py
+++ b/iiify/resolver.py
@@ -12,7 +12,7 @@
 import xml.etree.ElementTree as ET
 
 SCRAPE_API = 'https://archive.org/services/search/v1/scrape'
-ADVANCED_SEARCH = 'https://archive.org/advancedsearch.php?'
+ADVANCED_SEARCH = 'https://archive.org/advancedsearch.php'
 IMG_CTX = 'http://iiif.io/api/image/2/context.json'
 PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json'
 ARCHIVE = 'https://archive.org'
@@ -22,6 +22,9 @@
 bookreader = "http://%s/BookReader/BookReaderImages.php"
 URI_PRIFIX = "https://iiif.archive.org/iiif"
 
+MAX_SCRAPE_LIMIT = 10_000
+MAX_API_LIMIT = 1_000
+
 class MaxLimitException(Exception):
     pass
 
@@ -35,7 +38,7 @@ def purify_domain(domain):
     domain = re.sub('^http:\/\/', "https://", domain)
     return domain if domain.endswith('/iiif/') else domain + 'iiif/'
 
-def getids(q, limit=1000, cursor='', sorts='', fields=''):
+def getids(q, cursor='', sorts='', fields='', limit=MAX_API_LIMIT):
         query = "(mediatype:(texts) OR mediatype:(image))" + \
                 ((" AND %s" % q) if q else "")
         # 'all:1' also works
@@ -55,10 +58,9 @@ def scrape(query, fields="", sorts="", count=100, cursor="", security=True):
     if not query:
         raise ValueError("GET 'query' parameters required")
 
-    if int(count) > 1000 and security:
+    if int(count) > MAX_API_LIMIT and security:
         raise MaxLimitException("Limit may not exceed 1000.")
 
-    #sorts = sorts or 'date+asc,createdate'
     fields = fields or 'identifier,title'
 
     params = {
@@ -83,14 +85,13 @@ def search(query, page=1, limit=100, security=True, sort=None, fields=None):
     if int(limit) > 1000 and security:
         raise MaxLimitException("Limit may not exceed 1000.")
 
-    sort = sort or 'sort%5B%5D=date+asc&sort%5B%5D=createdate'
-    fields = fields or 'identifier,title'
     return requests.get(
-        ADVANCED_SEARCH + sort,
+        ADVANCED_SEARCH,
         params={'q': query,
+                'sort[]': sort or ['date asc', 'createdate'],
                 'rows': limit,
                 'page': page,
-                'fl[]': fields,
+                'fl[]': fields or 'identifier,title',
                 'output': 'json',
             }).json()
 
@@ -172,12 +173,12 @@ def create_collection3(identifier, domain, page=1, rows=1000):
 
     addMetadata(collection, identifier, metadata['metadata'], collection=True)
 
-    asURL = f'https://archive.org/advancedsearch.php?q=collection%3A{identifier}&fl[]=identifier&fl[]=mediatype&fl[]=title&fl[]=description&sort[]=&sort[]=&sort[]=&rows={rows}&page={page}&output=json&save=yes'
+    asURL = f'{ADVANCED_SEARCH}?q=collection%3A{identifier}&fl[]=identifier&fl[]=mediatype&fl[]=title&fl[]=description&sort[]=&sort[]=&sort[]=&rows={rows}&page={page}&output=json&save=yes'
     itemsSearch = requests.get(asURL).json()
     total = itemsSearch['response']['numFound']
     # There is a max of 10,000 items that can be retrieved from the advanced search
-    if total > 10000:
-        total = 10000
+    if total > MAX_SCRAPE_LIMIT:
+        total = MAX_SCRAPE_LIMIT
 
     if len(itemsSearch['response']['docs']) == 0:
         return None 

From 74d069a8854cfa2dbce16002c1f83d1baabe1357 Mon Sep 17 00:00:00 2001
From: Mek <michael.karpeles@gmail.com>
Date: Fri, 27 Sep 2024 15:45:01 -0400
Subject: [PATCH 4/5] fix getid call

---
 iiify/app.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/iiify/app.py b/iiify/app.py
index 54de092..16a72f1 100755
--- a/iiify/app.py
+++ b/iiify/app.py
@@ -65,10 +65,10 @@ def index():
 
 @app.route('/iiif/collection.json')
 def catalog():
-    cursor = request.args.get('cursor', '')
     q = request.args.get('q', '')
+    cursor = request.args.get('cursor', '')
     domain = purify_domain(request.args.get('domain', request.url_root))
-    return ldjsonify(collection(domain, getids(q, limit, cursor)['ids']))
+    return ldjsonify(collection(domain, getids(q, cursor=cursor)['ids']))
 
 
 @app.route('/iiif/cache')

From 9aa711cbc8be8a1620637111290e2891d7053da4 Mon Sep 17 00:00:00 2001
From: "Michael E. Karpeles (Mek)" <michael.karpeles@gmail.com>
Date: Sun, 29 Sep 2024 07:46:24 -0700
Subject: [PATCH 5/5] fixing collection.json

---
 iiify/app.py      | 28 +++++++++------
 iiify/resolver.py | 91 +++++++++++++++++++++--------------------------
 2 files changed, 59 insertions(+), 60 deletions(-)

diff --git a/iiify/app.py b/iiify/app.py
index 16a72f1..7064032 100755
--- a/iiify/app.py
+++ b/iiify/app.py
@@ -7,8 +7,9 @@
 from flask_cors import CORS
 from flask_caching import Cache
 from iiif2 import iiif, web
-from .resolver import ia_resolver, create_manifest, create_manifest3, getids, collection, \
-    purify_domain, cantaloupe_resolver, create_collection3, IsCollection, create_annotations
+from .resolver import ia_resolver, create_manifest, create_manifest3, scrape, \
+    collection, purify_domain, cantaloupe_resolver, create_collection3, IsCollection, \
+    create_annotations
 from .configs import options, cors, approot, cache_root, media_root, \
     cache_expr, version, image_server, cache_timeouts
 from urllib.parse import quote
@@ -60,15 +61,23 @@ def index():
     cursor = request.args.get('cursor', '')
     fields = request.args.get('fields', '')
     sorts = request.args.get('sorts', '')
-    return jsonify(getids(q, cursor=cursor, fields=fields, sorts=sorts))
+    r = scrape(q, cursor=cursor, fields=fields, sorts=sorts, restrict_to_iiif=True)
+    return jsonify(r)
 
 
 @app.route('/iiif/collection.json')
 def catalog():
     q = request.args.get('q', '')
     cursor = request.args.get('cursor', '')
+    fields = request.args.get('fields', '')
+    sorts = request.args.get('sorts', '')
     domain = purify_domain(request.args.get('domain', request.url_root))
-    return ldjsonify(collection(domain, getids(q, cursor=cursor)['ids']))
+    identifiers = [
+        i.get('identifier') for i in scrape(
+            q, cursor=cursor, fields=fields, sorts=sorts, restrict_to_iiif=True
+        ).get('items')
+    ]
+    return ldjsonify(collection(domain, identifiers))
 
 
 @app.route('/iiif/cache')
@@ -100,16 +109,16 @@ def helper(identifier):
             return render_template('helpers/image.html', identifier=identifier, cantaloupe_id=cantaloupe_id, esc_cantaloupe_id=esc_cantaloupe_id)
         except ValueError:
             abort(404)
-        
+
     elif mediatype == "audio" or mediatype == "etree":
         return render_template('helpers/audio.html', identifier=identifier)
     elif mediatype == "movies":
         return render_template('helpers/movies.html', identifier=identifier)
     elif mediatype == "texts":
         return render_template('helpers/texts.html', identifier=identifier)
-    else: 
+    else:
         return render_template('helpers/unknown.html', identifier=identifier)
-         
+
 
 @app.route('/iiif/<identifier>')
 def view(identifier):
@@ -130,7 +139,7 @@ def view(identifier):
 
 @app.route('/iiif/3/<identifier>/collection.json')
 @cache.cached(timeout=cache_timeouts["med"], forced_update=cache_bust)
-def collection3(identifier):
+def collection3JSON(identifier):
     domain = purify_domain(request.args.get('domain', request.url_root))
 
     try:
@@ -165,7 +174,7 @@ def collection3page(identifier, page):
 
 @app.route('/iiif/<identifier>/collection.json')
 @cache.cached(timeout=cache_timeouts["long"], forced_update=cache_bust)
-def collection(identifier):
+def collectionJSON(identifier):
     return redirect(f'/iiif/3/{identifier}/collection.json', code=302)
 
 
@@ -240,7 +249,6 @@ def add_header(response):
 
 def ldjsonify(data):
     j = jsonify(data)
-    # j.headers.set('Access-Control-Allow-Origin', '*')
     j.mimetype = "application/ld+json"
     return j
 
diff --git a/iiify/resolver.py b/iiify/resolver.py
index 5ce77f8..2ff8d6c 100644
--- a/iiify/resolver.py
+++ b/iiify/resolver.py
@@ -7,7 +7,7 @@
 from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage,AnnotationPageRef, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef
 from urllib.parse import urlparse, parse_qs, quote
 import json
-import math 
+import math
 import re
 import xml.etree.ElementTree as ET
 
@@ -38,15 +38,8 @@ def purify_domain(domain):
     domain = re.sub('^http:\/\/', "https://", domain)
     return domain if domain.endswith('/iiif/') else domain + 'iiif/'
 
-def getids(q, cursor='', sorts='', fields='', limit=MAX_API_LIMIT):
-        query = "(mediatype:(texts) OR mediatype:(image))" + \
-                ((" AND %s" % q) if q else "")
-        # 'all:1' also works
-        q = "NOT identifier:..*" + (" AND (%s)" % query if query else "")
-        return scrape(query=q, fields=fields, sorts=sorts, count=limit, cursor=cursor)
 
-
-def scrape(query, fields="", sorts="", count=100, cursor="", security=True):
+def scrape(query, fields="", sorts="", count=100, cursor="", restrict_to_iiif=False, security=True):
     """
     params:
         query: the query (using the same query Lucene-like queries supported by Internet Archive Advanced Search.
@@ -54,12 +47,15 @@ def scrape(query, fields="", sorts="", count=100, cursor="", security=True):
         sorts: Fields to sort on, comma delimited (if identifier is specified, it must be last)
         count: Number of results to return (minimum of 100)
         cursor: A cursor, if any (otherwise, search starts at the beginning)
+        restrict_to_iiif: restrict query to supported IIIF collections?
+        security: enforce API page limit
     """
-    if not query:
-        raise ValueError("GET 'query' parameters required")
+    if restrict_to_iiif or not query:
+        _query = "(mediatype:(texts) OR mediatype:(image))"
+        query = f"{_query} AND {query}" if query else _query
 
     if int(count) > MAX_API_LIMIT and security:
-        raise MaxLimitException("Limit may not exceed 1000.")
+        raise MaxLimitException(f"Limit may not exceed {MAX_API_LIMIT}.")
 
     fields = fields or 'identifier,title'
 
@@ -82,8 +78,8 @@ def search(query, page=1, limit=100, security=True, sort=None, fields=None):
     if not query:
         raise ValueError("GET query parameters 'q' required")
 
-    if int(limit) > 1000 and security:
-        raise MaxLimitException("Limit may not exceed 1000.")
+    if int(limit) > MAX_API_LIMIT and security:
+        raise MaxLimitException(f"Limit may not exceed {MAX_API_LIMIT}.")
 
     return requests.get(
         ADVANCED_SEARCH,
@@ -95,7 +91,7 @@ def search(query, page=1, limit=100, security=True, sort=None, fields=None):
                 'output': 'json',
             }).json()
 
-def checkMultiItem(metadata):    
+def checkMultiItem(metadata):
     # Maybe add call to book stack to see if that works first
 
     # Count the number of each original file
@@ -106,12 +102,12 @@ def checkMultiItem(metadata):
                 file_types[file['format']] = 0
 
             file_types[file['format']] += 1
-    #print (file_types)        
+    #print (file_types)
 
     # If there is multiple files of the same type then return the first format
     # Will have to see if there are objects with multiple images and formats
     for format in file_types:
-        if file_types[format] > 1 and format.lower() in valid_filetypes:        
+        if file_types[format] > 1 and format.lower() in valid_filetypes:
             return (True, format)
 
     return (False, None)
@@ -140,7 +136,7 @@ def to_mimetype(format):
         "Cinepack": "video/x-msvideo",
         "AIFF": "audio/aiff",
         "Apple Lossless Audio": "audio/x-m4a",
-        "MPEG-4 Audio": "audio/mp4" 
+        "MPEG-4 Audio": "audio/mp4"
     }
     return formats.get(format, "application/octet-stream")
 
@@ -161,7 +157,7 @@ def collection(domain, identifiers, label='Custom Archive.org IIIF Collection'):
         })
     return cs
 
-def create_collection3(identifier, domain, page=1, rows=1000):
+def create_collection3(identifier, domain, page=1, rows=MAX_API_LIMIT):
     # Get item metadata
     metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json()
 
@@ -181,18 +177,18 @@ def create_collection3(identifier, domain, page=1, rows=1000):
         total = MAX_SCRAPE_LIMIT
 
     if len(itemsSearch['response']['docs']) == 0:
-        return None 
+        return None
 
     pages = math.ceil(total / rows)
     for item in itemsSearch['response']['docs']:
         child = None
         if item['mediatype'] == 'collection':
             child = CollectionRef(id=f"{domain}{item['identifier']}/collection.json", type="Collection", label=item['title'])
-        else: 
+        else:
             child = ManifestRef(id=f"{domain}{item['identifier']}/manifest.json", type="Manifest", label=item['title'])
-        
+
         if "description" in item:
-            child.summary = {"none": [item['description']]} 
+            child.summary = {"none": [item['description']]}
 
         collection.add_item(child)
     page += 1
@@ -201,7 +197,7 @@ def create_collection3(identifier, domain, page=1, rows=1000):
         collection.add_item(child)
 
     return json.loads(collection.jsonld())
-    
+
 def manifest_page(identifier, label='', page='', width='', height='', metadata=None, canvasId=""):
     if not canvasId:
         canvasId = f"{identifier}/canvas"
@@ -310,7 +306,7 @@ def create_manifest(identifier, domain=None, page=None):
             'itemPath': subPrefix,
             'itemId': identifier
         })
-        if r.status_code != 200: 
+        if r.status_code != 200:
             # If the bookdata failed then treat as a single image
             fileName = ""
             for f in resp['files']:
@@ -318,7 +314,7 @@ def create_manifest(identifier, domain=None, page=None):
                     and f['source'].lower() == 'original' \
                     and 'thumb' not in f['name']:
                     fileName = f['name']
-                    break    
+                    break
 
             if not fileName:
                 # Original wasn't an image
@@ -380,7 +376,7 @@ def singleImage(metadata, identifier, manifest, uri):
             and f['source'].lower() == 'original' \
             and 'thumb' not in f['name']:
             fileName = f['name']
-            break    
+            break
 
     if not fileName:
         # Original wasn't an image
@@ -390,12 +386,12 @@ def singleImage(metadata, identifier, manifest, uri):
 
     imgId = f"{identifier}/{fileName}".replace('/','%2f')
     imgURL = f"{IMG_SRV}/3/{imgId}"
-    
+
     manifest.make_canvas_from_iiif(url=imgURL,
                                     id=f"{URI_PRIFIX}/{identifier}/canvas",
                                     label="1",
                                     anno_page_id=f"{uri}/annotationPage/1",
-                                    anno_id=f"{uri}/annotation/1")    
+                                    anno_id=f"{uri}/annotation/1")
 
 def addMetadata(item, identifier, metadata, collection=False):
     item.homepage = [{"id": f"https://archive.org/details/{identifier}",
@@ -430,7 +426,7 @@ def addMetadata(item, identifier, metadata, collection=False):
 
     excluded_fields = [
         'avg_rating', 'backup_location', 'btih', 'description', 'downloads',
-        'imagecount', 'indexflag', 'item_size', 'licenseurl', 'curation', 
+        'imagecount', 'indexflag', 'item_size', 'licenseurl', 'curation',
         'noindex', 'num_reviews', 'oai_updatedate', 'publicdate', 'publisher',  'reviewdate',
         'scanningcentre', 'stripped_tags', 'uploader'
     ]
@@ -536,7 +532,7 @@ def create_manifest3(identifier, domain=None, page=None):
         for fileMd in metadata['files']:
             if fileMd['name'].endswith('_scandata.xml'):
                 subprefix = fileMd['name'].replace('_scandata.xml', '')
-            if fileMd['format'] == 'Djvu XML':    
+            if fileMd['format'] == 'Djvu XML':
                 djvuFile = fileMd['name']
 
         bookReaderURL = f"https://{metadata.get('server')}/BookReader/BookReaderJSIA.php?id={identifier}&itemPath={metadata.get('dir')}&server={metadata.get('server')}&format=jsonp&subPrefix={subprefix}"
@@ -577,7 +573,7 @@ def create_manifest3(identifier, domain=None, page=None):
                     #                            id=f"https://iiif.archivelab.org/iiif/{identifier}${pageCount}/canvas",
                     #                            label=f"{page['leafNum']}")
                     pageCount += 1
-    
+
 
             # Setting logic for paging behavior and starting canvases
             # Start with paged (default) or individual behaviors
@@ -609,7 +605,7 @@ def create_manifest3(identifier, domain=None, page=None):
 
                 annotations.append(
                     AnnotationPageRef(id=f"{domain}3/annotations/{identifier}/{quote(djvuFile, safe='()')}/{count}.json", type="AnnotationPage")
-                )         
+                )
                 canvas.annotations = annotations
                 count += 1
     elif mediatype == 'image':
@@ -623,7 +619,7 @@ def create_manifest3(identifier, domain=None, page=None):
                     imgId = f"{identifier}/{file['name']}".replace('/','%2f')
                     imgURL = f"{IMG_SRV}/3/{imgId}"
                     pageCount += 1
-                    
+
                     try:
                         manifest.make_canvas_from_iiif(url=imgURL,
                                                     id=f"{URI_PRIFIX}/{identifier}${pageCount}/canvas",
@@ -709,10 +705,10 @@ def create_manifest3(identifier, domain=None, page=None):
                 # Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
                 sourceFilename = re.sub('\.[a-zA-H-]*\.vtt', '', f['name'])
                 if sourceFilename not in vttfiles:
-                    vttfiles[sourceFilename] = []    
-                    
-                vttfiles[sourceFilename].append(f)    
-            
+                    vttfiles[sourceFilename] = []
+
+                vttfiles[sourceFilename].append(f)
+
         # create the canvases for each original
         for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]:
             normalised_id = file['name'].rsplit(".", 1)[0]
@@ -726,9 +722,9 @@ def create_manifest3(identifier, domain=None, page=None):
 
                 vttNo = 1
                 for vttFile in vttfiles[normalised_id]:
-                    vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}", 
-                                               motivation="supplementing", 
-                                               target=c.id, 
+                    vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}",
+                                               motivation="supplementing",
+                                               target=c.id,
                                                anno_page_id=vttAPId,
                                                body={"id": f"{domain}resource/{identifier}/{vttFile['name']}",
                                                      "type": "Text",
@@ -760,9 +756,9 @@ def create_manifest3(identifier, domain=None, page=None):
                                          type='Video',
                                          format=to_mimetype(format),
                                          label={"none": [format]},
-                                         duration=float(file['length']), 
+                                         duration=float(file['length']),
                                          height=int(file['height']),
-                                         width=int(file['width']),                      
+                                         width=int(file['width']),
                         )
                         body.items.append(r)
                     elif file['format'] == format:
@@ -931,7 +927,7 @@ def cantaloupe_resolver(identifier):
         # single image file - find the filename
 
         filename = None
-        for f in files: 
+        for f in files:
             if valid_filetype(f['name']) \
                  and f['source'].lower() == 'original' \
                  and 'thumb' not in f['name']:
@@ -954,7 +950,7 @@ def cantaloupe_resolver(identifier):
                 filename = f['name']
                 fileIdentifier = filename[:-1 * len('_jp2.zip')]
 
-        # next look for any _jp2.zip that has a different name to the identifier 
+        # next look for any _jp2.zip that has a different name to the identifier
         if not filename:
             for f in files:
                 if f['name'].endswith('_jp2.zip'):
@@ -975,13 +971,8 @@ def cantaloupe_resolver(identifier):
                     fileIdentifier = filename[:-1 * len('_tif.zip')]
                     extension = ".tif"
 
-        #filename = next(f for f in files if f['source'].lower() == 'derivative' \
-        #                and f['name'].endswith('_jp2.zip'))['name']
         if filename:
             dirpath = filename[:-4]
             filepath = f"{fileIdentifier}_{leaf.zfill(4)}{extension}"
             return f"{identifier}%2f{filename}%2f{dirpath}%2f{filepath}"
 
- #   print (f'images not found for {identifier}')
- #   for f in files:
- #       print (f"source: {f['source'].lower()} name: {f['name']} and {f['source'].lower() == 'derivative'} {f['name'].endswith('_jp2.zip')}")