From b2bb999d07e86c7bb187f7a0c26bce2c448775b4 Mon Sep 17 00:00:00 2001 From: Matt Daily Date: Mon, 4 Nov 2024 09:29:56 -0800 Subject: [PATCH 1/7] Add extra logging to debug some missed reductions --- banzai/utils/realtime_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/banzai/utils/realtime_utils.py b/banzai/utils/realtime_utils.py index 639f5146..e26fe28c 100644 --- a/banzai/utils/realtime_utils.py +++ b/banzai/utils/realtime_utils.py @@ -48,6 +48,7 @@ def need_to_process_image(file_info, context): if 'frameid' in file_info: if 'version_set' not in file_info: + logger.info("Version set not available in file_info", extra_tags={"filename": file_info['filename']}) return True checksum = file_info['version_set'][0].get('md5') filename = file_info['filename'] @@ -57,7 +58,7 @@ def need_to_process_image(file_info, context): logger.info("Checking if file needs to be processed", extra_tags={"filename": filename}) if not (filename.endswith('.fits') or filename.endswith('.fits.fz')): - logger.debug("Filename does not have a .fits extension, stopping reduction", + logger.error("Filename does not have a .fits extension, stopping reduction", extra_tags={"filename": filename}) return False @@ -70,6 +71,7 @@ def need_to_process_image(file_info, context): # Check the md5. # Reset the number of tries if the file has changed on disk/in s3 if image.checksum != checksum: + logger.info('File has changed on disk. Resetting success flags and tries', extra_tags={'filename': filename}) need_to_process = True image.checksum = checksum image.tries = 0 @@ -78,6 +80,7 @@ def need_to_process_image(file_info, context): # Check if we need to try again elif image.tries < context.max_tries and not image.success: + logger.info('File has not been successfully processed yet. Trying again.', extra_tags={'filename': filename}) need_to_process = True dbs.commit_processed_image(image, context.db_address) From 581196e2163f5c9fa21b1a2c19450923a0176142 Mon Sep 17 00:00:00 2001 From: Matt Daily Date: Mon, 4 Nov 2024 10:27:23 -0800 Subject: [PATCH 2/7] Additional logs to help narrow down issue --- banzai/celery.py | 1 + banzai/main.py | 9 ++++++++- banzai/utils/realtime_utils.py | 7 ++++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/banzai/celery.py b/banzai/celery.py index c5647764..28637130 100644 --- a/banzai/celery.py +++ b/banzai/celery.py @@ -180,6 +180,7 @@ def process_image(file_info: dict, runtime_context: dict): :param file_info: Body of queue message: dict :param runtime_context: Context object with runtime environment info """ + logger.info('Processing frame', extra_tags={'filename': file_info.get('filename')}) runtime_context = Context(runtime_context) try: if realtime_utils.need_to_process_image(file_info, runtime_context): diff --git a/banzai/main.py b/banzai/main.py index 34d1a827..bd1c1fd5 100755 --- a/banzai/main.py +++ b/banzai/main.py @@ -10,6 +10,7 @@ import argparse import os.path import logging +import traceback from kombu import Exchange, Connection, Queue from kombu.mixins import ConsumerMixin @@ -47,7 +48,13 @@ def get_consumers(self, Consumer, channel): return [consumer] def on_message(self, body, message): - instrument = LCOFrameFactory.get_instrument_from_header(body, self.runtime_context.db_address) + logger.info('Received message', extra_tags={'filename': body['filename']}) + try: + instrument = LCOFrameFactory.get_instrument_from_header(body, self.runtime_context.db_address) + except Exception: + logger.error(f'Could not get instrument from header. {traceback.format_exc()}', extra_tags={'filename': body['filename']}) + message.ack() + return if instrument is None or instrument.nx is None: queue_name = self.runtime_context.CELERY_TASK_QUEUE_NAME elif instrument.nx * instrument.ny > self.runtime_context.LARGE_WORKER_THRESHOLD: diff --git a/banzai/utils/realtime_utils.py b/banzai/utils/realtime_utils.py index e26fe28c..2f6ca001 100644 --- a/banzai/utils/realtime_utils.py +++ b/banzai/utils/realtime_utils.py @@ -1,4 +1,5 @@ import os + from banzai import dbs from banzai.utils import file_utils, import_utils, image_utils from banzai.data import HeaderOnly @@ -91,7 +92,11 @@ def need_to_process_image(file_info, context): factory = import_utils.import_attribute(context.FRAME_FACTORY)() test_image = factory.observation_frame_class(hdu_list=[HeaderOnly(file_info, name='')], file_path=file_info['filename']) - test_image.instrument = factory.get_instrument_from_header(file_info, db_address=context.db_address) + try: + test_image.instrument = factory.get_instrument_from_header(file_info, db_address=context.db_address) + except Exception: + logger.error(f'Issue getting instrument from header. {logs.format_exception()}', extra_tags={'filename': filename}) + need_to_process = False if image_utils.get_reduction_level(test_image.meta) != '00': logger.error('Image has nonzero reduction level. Aborting.', extra_tags={'filename': filename}) need_to_process = False From fb7964dd58b6bbbfec4e6aa909fae02b405eebe2 Mon Sep 17 00:00:00 2001 From: Matt Daily Date: Mon, 4 Nov 2024 10:47:45 -0800 Subject: [PATCH 3/7] Make sure we run build docker on all branches, also fix up image tags --- .github/workflows/build-docker.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index b9285031..a100be32 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -5,7 +5,7 @@ concurrency: ${{ github.workflow }}-${{ github.ref }} on: push: branches: - - "*" + - "**" tags: - "*" pull_request: @@ -54,5 +54,8 @@ jobs: with: context: . push: true - tags: ${{ steps.meta.outputs.tags }} + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + ${{ github.event_name == 'push' && github.ref_type == 'tag' && format('{0}:{1}', env.REGISTRY, env.IMAGE_NAME) }}:${{ github.ref_name }} labels: ${{ steps.meta.outputs.labels }} From 86121d02c3c2b0e5a1ffb7f0a38c7adaf59c5cda Mon Sep 17 00:00:00 2001 From: Matt Daily Date: Mon, 4 Nov 2024 10:53:27 -0800 Subject: [PATCH 4/7] Make sure we lowercase the repository name! --- .github/workflows/build-docker.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index a100be32..ef04ddcc 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -46,7 +46,7 @@ jobs: id: meta uses: docker/metadata-action@v4 with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + images: ${{ env.REGISTRY }}/$(echo ${{ env.IMAGE_NAME }} | tr '[:upper:]' '[:lower:]') - name: Build and also push Dockerimage id: build-and-push @@ -55,7 +55,7 @@ jobs: context: . push: true tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest - ${{ github.event_name == 'push' && github.ref_type == 'tag' && format('{0}:{1}', env.REGISTRY, env.IMAGE_NAME) }}:${{ github.ref_name }} + ${{ env.REGISTRY }}/$(echo ${{ env.IMAGE_NAME }} | tr '[:upper:]' '[:lower:]'):${{ github.sha }} + ${{ env.REGISTRY }}/$(echo ${{ env.IMAGE_NAME }} | tr '[:upper:]' '[:lower:]'):latest + ${{ github.event_name == 'push' && github.ref_type == 'tag' && format('{0}/{1}', env.REGISTRY, env.IMAGE_NAME | toLowerCase) }}:${{ github.ref_name }} labels: ${{ steps.meta.outputs.labels }} From d104b9efc52a5030fa9a854b0be9273f88f83408 Mon Sep 17 00:00:00 2001 From: Matt Daily Date: Mon, 4 Nov 2024 15:40:14 -0800 Subject: [PATCH 5/7] Fix to lowercase casting for image registry --- .github/workflows/build-docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index ef04ddcc..4da0661b 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -57,5 +57,5 @@ jobs: tags: | ${{ env.REGISTRY }}/$(echo ${{ env.IMAGE_NAME }} | tr '[:upper:]' '[:lower:]'):${{ github.sha }} ${{ env.REGISTRY }}/$(echo ${{ env.IMAGE_NAME }} | tr '[:upper:]' '[:lower:]'):latest - ${{ github.event_name == 'push' && github.ref_type == 'tag' && format('{0}/{1}', env.REGISTRY, env.IMAGE_NAME | toLowerCase) }}:${{ github.ref_name }} + ${{ github.event_name == 'push' && github.ref_type == 'tag' && format('{0}/{1}', env.REGISTRY, env.IMAGE_NAME) | toLowerCase }}:${{ github.ref_name }} labels: ${{ steps.meta.outputs.labels }} From 7c18d53c4940206a594bc1426d276f01111c24c2 Mon Sep 17 00:00:00 2001 From: Matt Daily Date: Mon, 4 Nov 2024 15:54:52 -0800 Subject: [PATCH 6/7] Hopefully final fix for image tagging? --- .github/workflows/build-docker.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index 4da0661b..688ed136 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -46,7 +46,12 @@ jobs: id: meta uses: docker/metadata-action@v4 with: - images: ${{ env.REGISTRY }}/$(echo ${{ env.IMAGE_NAME }} | tr '[:upper:]' '[:lower:]') + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=sha + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} - name: Build and also push Dockerimage id: build-and-push @@ -54,8 +59,5 @@ jobs: with: context: . push: true - tags: | - ${{ env.REGISTRY }}/$(echo ${{ env.IMAGE_NAME }} | tr '[:upper:]' '[:lower:]'):${{ github.sha }} - ${{ env.REGISTRY }}/$(echo ${{ env.IMAGE_NAME }} | tr '[:upper:]' '[:lower:]'):latest - ${{ github.event_name == 'push' && github.ref_type == 'tag' && format('{0}/{1}', env.REGISTRY, env.IMAGE_NAME) | toLowerCase }}:${{ github.ref_name }} + tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} From c8ec352337c84774bffa779f724d400a409f0945 Mon Sep 17 00:00:00 2001 From: Curtis McCully Date: Tue, 5 Nov 2024 10:55:05 -0500 Subject: [PATCH 7/7] Added to the changelog --- CHANGES.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 5e7f5e27..d3be4c8c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +1.19.1 (2024-11-05) +------------------- +- Added extra logging and try excepts to catch frames that bypass silently + 1.19.0 (2024-10-16) ------------------- - Added the ability to read fits files that are pre downloaded and are already in memory