Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly Remove TempFiles #54

Merged
merged 2 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions datalab/datalab_session/analysis/get_tif.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def get_tif(input: dict):
else:
# If tif file doesn't exist, generate a new tif file
fits_path = get_fits(basename)
tif_path = create_tif(basename, fits_path)
tif_url = add_file_to_bucket(file_key, tif_path)
with create_tif(basename, fits_path) as tif_path:
tif_url = add_file_to_bucket(file_key, tif_path)

return {"tif_url": tif_url}
6 changes: 5 additions & 1 deletion datalab/datalab_session/analysis/line_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from astropy.wcs import WcsError
from astropy import coordinates

from datalab.datalab_session.exceptions import ClientAlertException
from datalab.datalab_session.utils.file_utils import scale_points, get_hdu
from datalab.datalab_session.utils.s3_utils import get_fits

Expand All @@ -22,7 +23,10 @@ def line_profile(input: dict):
"""
fits_path = get_fits(input['basename'], input['source'])

sci_hdu = get_hdu(fits_path, 'SCI')
try:
sci_hdu = get_hdu(fits_path, 'SCI')
except TypeError as e:
raise ClientAlertException(f'Error: {e}')

x_points, y_points = scale_points(input["height"], input["width"], sci_hdu.data.shape[0], sci_hdu.data.shape[1], x_points=[input["x1"], input["x2"]], y_points=[input["y1"], input["y2"]])

Expand Down
19 changes: 12 additions & 7 deletions datalab/datalab_session/data_operations/fits_output_handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import tempfile
import os
import numpy as np
from astropy.io import fits

from datalab import settings
from datalab.datalab_session.utils.file_utils import create_jpgs
from datalab.datalab_session.utils.s3_utils import save_fits_and_thumbnails

Expand Down Expand Up @@ -52,11 +54,14 @@ def create_and_save_data_products(self, index: int=None, large_jpg_path: str=Non
small_jpg (str): Optionally add a path to a small jpg to save, will not create a new jpg.
"""
hdu_list = fits.HDUList([self.primary_hdu, self.image_hdu])
fits_output_path = tempfile.NamedTemporaryFile(suffix=f'{self.datalab_id}.fits').name
hdu_list.writeto(fits_output_path, overwrite=True)

# allow for operations to pregenerate the jpgs, ex. RGB stacking
if not large_jpg_path or not small_jpg_path:
large_jpg_path, small_jpg_path = create_jpgs(self.datalab_id, fits_output_path)

with tempfile.NamedTemporaryFile(suffix=f'{self.datalab_id}.fits', dir=settings.TEMP_FITS_DIR) as fits_output_file:
fits_output_path = fits_output_file.name
hdu_list.writeto(fits_output_path, overwrite=True)

return save_fits_and_thumbnails(self.datalab_id, fits_output_path, large_jpg_path, small_jpg_path, index)
# allow for operations to pregenerate the jpgs, ex. RGB stacking
if not large_jpg_path or not small_jpg_path:
with create_jpgs(self.datalab_id, fits_output_path) as (large_jpg_path, small_jpg_path):
return save_fits_and_thumbnails(self.datalab_id, fits_output_path, large_jpg_path, small_jpg_path, index)
else:
return save_fits_and_thumbnails(self.datalab_id, fits_output_path, large_jpg_path, small_jpg_path, index)
25 changes: 12 additions & 13 deletions datalab/datalab_session/data_operations/rgb_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,19 +133,18 @@ def operate(self):

aligned_images = self._align_images(fits_files)

large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, aligned_images, color=True, zmin=zmin_list, zmax=zmax_list)

stacked_ndarray = self._create_3d_array(input_handlers)

rgb_comment = f'Datalab RGB Stack on files {", ".join(input["basename"] for input in rgb_inputs)}'
output = FITSOutputHandler(
self.cache_key,
stacked_ndarray,
rgb_comment
).create_and_save_data_products(
large_jpg_path=large_jpg_path,
small_jpg_path=small_jpg_path
)
with create_jpgs(self.cache_key, aligned_images, color=True, zmin=zmin_list, zmax=zmax_list) as (large_jpg_path, small_jpg_path):
stacked_ndarray = self._create_3d_array(input_handlers)

rgb_comment = f'Datalab RGB Stack on files {", ".join(input["basename"] for input in rgb_inputs)}'
output = FITSOutputHandler(
self.cache_key,
stacked_ndarray,
rgb_comment
).create_and_save_data_products(
large_jpg_path=large_jpg_path,
small_jpg_path=small_jpg_path
)

log.info(f'RGB Stack output: {output}')
self.set_output(output)
12 changes: 6 additions & 6 deletions datalab/datalab_session/tests/test_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,9 @@ def test_operate(self, mock_create_jpgs, mock_save_fits_and_thumbnails, mock_get
# return the test fits paths in order of the input_files instead of aws fetch
mock_get_fits.side_effect = [self.test_fits_1_path, self.test_fits_2_path]
# save temp output to a known path so we can test it
mock_named_tempfile.return_value.name = self.temp_median_path
mock_named_tempfile.return_value.__enter__.return_value.name = self.temp_median_path
# avoids overwriting our output
mock_create_jpgs.return_value = ('test_path', 'test_path')
mock_create_jpgs.return_value.__enter__.return_value = ('test_path', 'test_path')
# don't save to s3
mock_save_fits_and_thumbnails.return_value = self.temp_median_path

Expand Down Expand Up @@ -230,9 +230,9 @@ def test_operate(self, mock_get_fits, mock_named_tempfile, mock_create_jpgs, moc
# return the test fits paths in order of the input_files instead of aws fetch
mock_get_fits.side_effect = [self.test_red_path, self.test_green_path, self.test_blue_path]
# save temp output to a known path so we can test
mock_named_tempfile.return_value.name = self.temp_rgb_path
mock_named_tempfile.return_value.__enter__.return_value.name = self.temp_rgb_path
# avoids overwriting our output
mock_create_jpgs.return_value = ('test_path', 'test_path')
mock_create_jpgs.return_value.__enter__.return_value = ('test_path', 'test_path')
# don't save to s3
mock_save_fits_and_thumbnails.return_value = self.temp_rgb_path

Expand Down Expand Up @@ -300,9 +300,9 @@ def test_operate(self, mock_create_jpgs, mock_save_fits_and_thumbnails, mock_get
mock_get_fits.side_effect = [self.test_fits_1_path, self.test_fits_2_path,
self.temp_fits_1_negative_path, self.temp_fits_2_negative_path]
# save temp output to a known path so we can test it
mock_named_tempfile.return_value.name = self.temp_stacked_path
mock_named_tempfile.return_value.__enter__.return_value.name = self.temp_stacked_path
# avoids overwriting our output
mock_create_jpgs.return_value = ('test_path', 'test_path')
mock_create_jpgs.return_value.__enter__.return_value = ('test_path', 'test_path')
# don't save to s3
mock_save_fits_and_thumbnails.return_value = self.temp_stacked_path

Expand Down
46 changes: 21 additions & 25 deletions datalab/datalab_session/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,37 +17,33 @@ def test_get_fits_dimensions(self):

def test_create_fits(self):
test_2d_ndarray = np.zeros((10, 10))
path = create_fits('create_fits_test', test_2d_ndarray)
with create_fits('create_fits_test', test_2d_ndarray) as path:
# test the file was written out to a path
self.assertIsInstance(path, str)
self.assertIsFile(path)

# test the file has the right data
hdu = fits.open(path)
self.assertEqual(hdu[0].header['KEY'], 'create_fits_test')
self.assertEqual(hdu[1].data.tolist(), test_2d_ndarray.tolist())

# test the file was written out to a path
self.assertIsInstance(path, str)
self.assertIsFile(path)

# test the file has the right data
hdu = fits.open(path)
self.assertEqual(hdu[0].header['KEY'], 'create_fits_test')
self.assertEqual(hdu[1].data.tolist(), test_2d_ndarray.tolist())

def test_create_tif(self):
fits_path = self.test_fits_path
tif_path = create_tif('create_tif_test', fits_path)

# test the file was written out to a path
self.assertIsInstance(tif_path, str)
self.assertIsFile(tif_path)
with create_tif('create_tif_test', fits_path) as tif_path:
# test the file was written out to a path
self.assertIsInstance(tif_path, str)
self.assertIsFile(tif_path)
self.assertFilesEqual(tif_path, self.test_tif_path)

self.assertFilesEqual(tif_path, self.test_tif_path)

def test_create_jpgs(self):
fits_path = self.test_fits_path
jpg_paths = create_jpgs('create_jpgs_test', fits_path)

# test the files were written out to a path
self.assertEqual(len(jpg_paths), 2)
self.assertIsFile(jpg_paths[0])
self.assertIsFile(jpg_paths[1])
self.assertFilesEqual(jpg_paths[0], self.test_large_jpg_path)
self.assertFilesEqual(jpg_paths[1], self.test_small_jpg_path)
with create_jpgs('create_jpgs_test', fits_path) as jpg_paths:
# test the files were written out to a path
self.assertEqual(len(jpg_paths), 2)
self.assertIsFile(jpg_paths[0])
self.assertIsFile(jpg_paths[1])
self.assertFilesEqual(jpg_paths[0], self.test_large_jpg_path)
self.assertFilesEqual(jpg_paths[1], self.test_small_jpg_path)

def test_stack_arrays(self):
test_array_1 = np.zeros((10, 20))
Expand Down
47 changes: 26 additions & 21 deletions datalab/datalab_session/utils/file_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import tempfile
import logging
import os
from contextlib import contextmanager

from astropy.io import fits
import numpy as np
from fits2image.conversions import fits_to_jpg, fits_to_tif

from datalab import settings
from datalab.datalab_session.exceptions import ClientAlertException
from datalab.datalab_session.utils.s3_utils import save_fits_and_thumbnails

log = logging.getLogger()
log.setLevel(logging.INFO)
Expand All @@ -29,6 +31,7 @@ def get_fits_dimensions(fits_file, extension: str = 'SCI') -> tuple:
hdu_shape = hdu[extension].shape
return hdu_shape

@contextmanager
def create_fits(key: str, image_arr: np.ndarray, comment=None) -> str:
"""
Creates a fits file with the given key and image array
Expand All @@ -41,22 +44,30 @@ def create_fits(key: str, image_arr: np.ndarray, comment=None) -> str:
image_hdu = fits.ImageHDU(data=image_arr, name='SCI')

hdu_list = fits.HDUList([primary_hdu, image_hdu])
fits_path = tempfile.NamedTemporaryFile(suffix=f'{key}.fits').name
fits_path = tempfile.NamedTemporaryFile(suffix=f'{key}.fits', dir=settings.TEMP_FITS_DIR).name
hdu_list.writeto(fits_path, overwrite=True)

return fits_path
try:
yield fits_path
finally:
os.remove(fits_path)

@contextmanager
def create_tif(key: str, fits_path: np.ndarray) -> str:
"""
Creates a full sized TIFF file from a FITs
Returns the path to the TIFF file
"""
height, width = get_fits_dimensions(fits_path)
tif_path = tempfile.NamedTemporaryFile(suffix=f'{key}.tif').name
tif_path = tempfile.NamedTemporaryFile(suffix=f'{key}.tif', dir=settings.TEMP_FITS_DIR).name
fits_to_tif(fits_path, tif_path, width=width, height=height)

return tif_path
try:
yield tif_path
finally:
os.remove(tif_path)

@contextmanager
def create_jpgs(cache_key, fits_paths: str, color=False, zmin=None, zmax=None) -> list:
"""
Create jpgs from fits files and save them to S3
Expand All @@ -68,8 +79,8 @@ def create_jpgs(cache_key, fits_paths: str, color=False, zmin=None, zmax=None) -
fits_paths = [fits_paths]

# create the jpgs from the fits files
large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg').name
thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg').name
large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg', dir=settings.TEMP_FITS_DIR).name
thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg', dir=settings.TEMP_FITS_DIR).name

max_height = 0
max_width = 0
Expand All @@ -81,7 +92,14 @@ def create_jpgs(cache_key, fits_paths: str, color=False, zmin=None, zmax=None) -
fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color, zmin=zmin, zmax=zmax)
fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color, zmin=zmin, zmax=zmax)

return large_jpg_path, thumbnail_jpg_path
try:
yield large_jpg_path, thumbnail_jpg_path
finally:
try:
os.remove(large_jpg_path)
os.remove(thumbnail_jpg_path)
except FileNotFoundError:
pass

def crop_arrays(array_list: list):
"""
Expand Down Expand Up @@ -116,16 +134,3 @@ def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_poi
x_points = width_2 - x_points

return x_points, y_points

def create_output(cache_key, np_array, large_jpg=None, small_jpg=None, index=None, comment=None):
"""
A more automated way of creating output for a dev
Dev can specify just a cache_key and np array and the function will create the fits and jpgs
or the dev can pass the fits_file or jpgs and the function will save them
"""
fits_file = create_fits(cache_key, np_array, comment)

if not large_jpg or not small_jpg:
large_jpg, small_jpg = create_jpgs(cache_key, fits_file)

return save_fits_and_thumbnails(cache_key, fits_file, large_jpg, small_jpg, index)