diff --git a/datalab/datalab_session/data_operations/median.py b/datalab/datalab_session/data_operations/median.py index 60f40b1..49ef942 100644 --- a/datalab/datalab_session/data_operations/median.py +++ b/datalab/datalab_session/data_operations/median.py @@ -1,5 +1,7 @@ from io import BytesIO import logging +import os +import tempfile import numpy as np from astropy.io import fits @@ -49,48 +51,54 @@ def operate(self): log.info(f'Executing median operation on {file_count} files') - # fetch fits for all input data - image_data_list = [] - - for index, file_info in enumerate(input_files): - basename = file_info.get('basename', 'No basename found') - - archive_record = get_archive_from_basename(basename) - try: - fits_url = archive_record[0].get('url', 'No URL found') - except IndexError: - continue - - with fits.open(fits_url, use_fsspec=True) as hdu_list: - data = hdu_list['SCI'].data - image_data_list.append(data) - self.set_percent_completion((index) / file_count) - - # Crop fits image data to be the same shape then stack - min_shape = min(arr.shape for arr in image_data_list) - cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in image_data_list] - stacked_data = np.stack(cropped_data_list, axis=2) - - # Calculate a Median along the z axis - median = np.median(stacked_data, axis=2) - - # Create a new Fits File - cache_key = self.generate_cache_key() - hdr = fits.Header([('KEY', cache_key)]) - primary_hdu = fits.PrimaryHDU(header=hdr) - image_hdu = fits.ImageHDU(median) - hdu_list = fits.HDUList([primary_hdu, image_hdu]) - - fits_buffer = BytesIO() - hdu_list.writeto(fits_buffer) - fits_buffer.seek(0) - - # Write the HDU List to the output FITS file in bitbucket - response = store_fits_output(cache_key, fits_buffer) + with tempfile.TemporaryDirectory() as temp_dir: + memmap_paths = [] + + for index, file_info in enumerate(input_files): + basename = file_info.get('basename', 'No basename found') + archive_record = get_archive_from_basename(basename) + + try: + fits_url = archive_record[0].get('url', 'No URL found') + except IndexError: + continue + + with fits.open(fits_url, use_fsspec=True) as hdu_list: + data = hdu_list['SCI'].data + memmap_path = os.path.join(temp_dir, f'memmap_{index}.dat') + memmap_array = np.memmap(memmap_path, dtype=data.dtype, mode='w+', shape=data.shape) + memmap_array[:] = data[:] + memmap_paths.append(memmap_path) + + self.set_percent_completion(index / file_count) + + image_data_list = [ + np.memmap(path, dtype=np.float32, mode='r', shape=memmap_array.shape) + for path in memmap_paths + ] + + # Crop fits image data to be the same shape then stack + min_shape = min(arr.shape for arr in image_data_list) + cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in image_data_list] + stacked_data = np.stack(cropped_data_list, axis=2) + + # Calculate a Median along the z axis + median = np.median(stacked_data, axis=2) + + cache_key = self.generate_cache_key() + header = fits.Header([('KEY', cache_key)]) + primary_hdu = fits.PrimaryHDU(header=header) + image_hdu = fits.ImageHDU(median) + hdu_list = fits.HDUList([primary_hdu, image_hdu]) + + fits_buffer = BytesIO() + hdu_list.writeto(fits_buffer) + fits_buffer.seek(0) + + # Write the HDU List to the output FITS file in bitbucket + response = store_fits_output(cache_key, fits_buffer) # TODO: No output yet, need to build a thumbnail service - output = { - 'output_files': [] - } + output = {'output_files': []} self.set_percent_completion(file_count / file_count) self.set_output(output) diff --git a/datalab/datalab_session/util.py b/datalab/datalab_session/util.py index 8516eef..d4ca246 100644 --- a/datalab/datalab_session/util.py +++ b/datalab/datalab_session/util.py @@ -24,7 +24,7 @@ def store_fits_output(item_key, fits_buffer): def get_archive_from_basename(basename: str) -> dict: """ - Querys and returns an archive file from the Archive + Queries and returns an archive file from the Archive Keyword Arguements: basename -- name to query