diff --git a/content_harvester/by_record.py b/content_harvester/by_record.py index a0e4f5d7..6e628645 100644 --- a/content_harvester/by_record.py +++ b/content_harvester/by_record.py @@ -221,7 +221,15 @@ def check_component_cache( # Do a head request to get the current ETag and # Last-Modified values, used to create a cache key - head_resp = http_session.head(**asdict(request)) + if 'nuxeo' in request.url: + # The S3 presigned URLs from Nuxeo are good for GET requests only + # so do a GET request that mimics a head request + head_resp = http_session.get( + **asdict(request), + headers={"Range": "bytes=0-0"} + ) + else: + head_resp = http_session.head(**asdict(request), allow_redirects=True) if not ( head_resp.headers.get('ETag') or head_resp.headers.get('Last-Modified')