Skip to content

Commit

Permalink
using tempfile and memap to work on arrays in memory, first stores ea…
Browse files Browse the repository at this point in the history
…ch array individually and then combines them
  • Loading branch information
LTDakin committed Mar 21, 2024
1 parent ab49450 commit 4846322
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 42 deletions.
90 changes: 49 additions & 41 deletions datalab/datalab_session/data_operations/median.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from io import BytesIO
import logging
import os
import tempfile

import numpy as np
from astropy.io import fits
Expand Down Expand Up @@ -49,48 +51,54 @@ def operate(self):

log.info(f'Executing median operation on {file_count} files')

# fetch fits for all input data
image_data_list = []

for index, file_info in enumerate(input_files):
basename = file_info.get('basename', 'No basename found')

archive_record = get_archive_from_basename(basename)
try:
fits_url = archive_record[0].get('url', 'No URL found')
except IndexError:
continue

with fits.open(fits_url, use_fsspec=True) as hdu_list:
data = hdu_list['SCI'].data
image_data_list.append(data)
self.set_percent_completion((index) / file_count)

# Crop fits image data to be the same shape then stack
min_shape = min(arr.shape for arr in image_data_list)
cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in image_data_list]
stacked_data = np.stack(cropped_data_list, axis=2)

# Calculate a Median along the z axis
median = np.median(stacked_data, axis=2)

# Create a new Fits File
cache_key = self.generate_cache_key()
hdr = fits.Header([('KEY', cache_key)])
primary_hdu = fits.PrimaryHDU(header=hdr)
image_hdu = fits.ImageHDU(median)
hdu_list = fits.HDUList([primary_hdu, image_hdu])

fits_buffer = BytesIO()
hdu_list.writeto(fits_buffer)
fits_buffer.seek(0)

# Write the HDU List to the output FITS file in bitbucket
response = store_fits_output(cache_key, fits_buffer)
with tempfile.TemporaryDirectory() as temp_dir:
memmap_paths = []

for index, file_info in enumerate(input_files):
basename = file_info.get('basename', 'No basename found')
archive_record = get_archive_from_basename(basename)

try:
fits_url = archive_record[0].get('url', 'No URL found')
except IndexError:
continue

with fits.open(fits_url, use_fsspec=True) as hdu_list:
data = hdu_list['SCI'].data
memmap_path = os.path.join(temp_dir, f'memmap_{index}.dat')
memmap_array = np.memmap(memmap_path, dtype=data.dtype, mode='w+', shape=data.shape)
memmap_array[:] = data[:]
memmap_paths.append(memmap_path)

self.set_percent_completion(index / file_count)

image_data_list = [
np.memmap(path, dtype=np.float32, mode='r', shape=memmap_array.shape)
for path in memmap_paths
]

# Crop fits image data to be the same shape then stack
min_shape = min(arr.shape for arr in image_data_list)
cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in image_data_list]
stacked_data = np.stack(cropped_data_list, axis=2)

# Calculate a Median along the z axis
median = np.median(stacked_data, axis=2)

cache_key = self.generate_cache_key()
header = fits.Header([('KEY', cache_key)])
primary_hdu = fits.PrimaryHDU(header=header)
image_hdu = fits.ImageHDU(median)
hdu_list = fits.HDUList([primary_hdu, image_hdu])

fits_buffer = BytesIO()
hdu_list.writeto(fits_buffer)
fits_buffer.seek(0)

# Write the HDU List to the output FITS file in bitbucket
response = store_fits_output(cache_key, fits_buffer)

# TODO: No output yet, need to build a thumbnail service
output = {
'output_files': []
}
output = {'output_files': []}
self.set_percent_completion(file_count / file_count)
self.set_output(output)
2 changes: 1 addition & 1 deletion datalab/datalab_session/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def store_fits_output(item_key, fits_buffer):

def get_archive_from_basename(basename: str) -> dict:
"""
Querys and returns an archive file from the Archive
Queries and returns an archive file from the Archive
Keyword Arguements:
basename -- name to query
Expand Down

0 comments on commit 4846322

Please sign in to comment.