Skip to content

Commit

Permalink
Merge pull request #54 from LCOGT/fix/remove-temp-files
Browse files Browse the repository at this point in the history
Properly Remove TempFiles
  • Loading branch information
LTDakin authored Feb 3, 2025
2 parents 911fd47 + 1f2af63 commit ffbb461
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 75 deletions.
4 changes: 2 additions & 2 deletions datalab/datalab_session/analysis/get_tif.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def get_tif(input: dict):
else:
# If tif file doesn't exist, generate a new tif file
fits_path = get_fits(basename)
tif_path = create_tif(basename, fits_path)
tif_url = add_file_to_bucket(file_key, tif_path)
with create_tif(basename, fits_path) as tif_path:
tif_url = add_file_to_bucket(file_key, tif_path)

return {"tif_url": tif_url}
6 changes: 5 additions & 1 deletion datalab/datalab_session/analysis/line_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from astropy.wcs import WcsError
from astropy import coordinates

from datalab.datalab_session.exceptions import ClientAlertException
from datalab.datalab_session.utils.file_utils import scale_points, get_hdu
from datalab.datalab_session.utils.s3_utils import get_fits

Expand All @@ -22,7 +23,10 @@ def line_profile(input: dict):
"""
fits_path = get_fits(input['basename'], input['source'])

sci_hdu = get_hdu(fits_path, 'SCI')
try:
sci_hdu = get_hdu(fits_path, 'SCI')
except TypeError as e:
raise ClientAlertException(f'Error: {e}')

x_points, y_points = scale_points(input["height"], input["width"], sci_hdu.data.shape[0], sci_hdu.data.shape[1], x_points=[input["x1"], input["x2"]], y_points=[input["y1"], input["y2"]])

Expand Down
19 changes: 12 additions & 7 deletions datalab/datalab_session/data_operations/fits_output_handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import tempfile
import os
import numpy as np
from astropy.io import fits

from datalab import settings
from datalab.datalab_session.utils.file_utils import create_jpgs
from datalab.datalab_session.utils.s3_utils import save_fits_and_thumbnails

Expand Down Expand Up @@ -52,11 +54,14 @@ def create_and_save_data_products(self, index: int=None, large_jpg_path: str=Non
small_jpg (str): Optionally add a path to a small jpg to save, will not create a new jpg.
"""
hdu_list = fits.HDUList([self.primary_hdu, self.image_hdu])
fits_output_path = tempfile.NamedTemporaryFile(suffix=f'{self.datalab_id}.fits').name
hdu_list.writeto(fits_output_path, overwrite=True)

# allow for operations to pregenerate the jpgs, ex. RGB stacking
if not large_jpg_path or not small_jpg_path:
large_jpg_path, small_jpg_path = create_jpgs(self.datalab_id, fits_output_path)

with tempfile.NamedTemporaryFile(suffix=f'{self.datalab_id}.fits', dir=settings.TEMP_FITS_DIR) as fits_output_file:
fits_output_path = fits_output_file.name
hdu_list.writeto(fits_output_path, overwrite=True)

return save_fits_and_thumbnails(self.datalab_id, fits_output_path, large_jpg_path, small_jpg_path, index)
# allow for operations to pregenerate the jpgs, ex. RGB stacking
if not large_jpg_path or not small_jpg_path:
with create_jpgs(self.datalab_id, fits_output_path) as (large_jpg_path, small_jpg_path):
return save_fits_and_thumbnails(self.datalab_id, fits_output_path, large_jpg_path, small_jpg_path, index)
else:
return save_fits_and_thumbnails(self.datalab_id, fits_output_path, large_jpg_path, small_jpg_path, index)
25 changes: 12 additions & 13 deletions datalab/datalab_session/data_operations/rgb_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,19 +133,18 @@ def operate(self):

aligned_images = self._align_images(fits_files)

large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, aligned_images, color=True, zmin=zmin_list, zmax=zmax_list)

stacked_ndarray = self._create_3d_array(input_handlers)

rgb_comment = f'Datalab RGB Stack on files {", ".join(input["basename"] for input in rgb_inputs)}'
output = FITSOutputHandler(
self.cache_key,
stacked_ndarray,
rgb_comment
).create_and_save_data_products(
large_jpg_path=large_jpg_path,
small_jpg_path=small_jpg_path
)
with create_jpgs(self.cache_key, aligned_images, color=True, zmin=zmin_list, zmax=zmax_list) as (large_jpg_path, small_jpg_path):
stacked_ndarray = self._create_3d_array(input_handlers)

rgb_comment = f'Datalab RGB Stack on files {", ".join(input["basename"] for input in rgb_inputs)}'
output = FITSOutputHandler(
self.cache_key,
stacked_ndarray,
rgb_comment
).create_and_save_data_products(
large_jpg_path=large_jpg_path,
small_jpg_path=small_jpg_path
)

log.info(f'RGB Stack output: {output}')
self.set_output(output)
12 changes: 6 additions & 6 deletions datalab/datalab_session/tests/test_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,9 @@ def test_operate(self, mock_create_jpgs, mock_save_fits_and_thumbnails, mock_get
# return the test fits paths in order of the input_files instead of aws fetch
mock_get_fits.side_effect = [self.test_fits_1_path, self.test_fits_2_path]
# save temp output to a known path so we can test it
mock_named_tempfile.return_value.name = self.temp_median_path
mock_named_tempfile.return_value.__enter__.return_value.name = self.temp_median_path
# avoids overwriting our output
mock_create_jpgs.return_value = ('test_path', 'test_path')
mock_create_jpgs.return_value.__enter__.return_value = ('test_path', 'test_path')
# don't save to s3
mock_save_fits_and_thumbnails.return_value = self.temp_median_path

Expand Down Expand Up @@ -230,9 +230,9 @@ def test_operate(self, mock_get_fits, mock_named_tempfile, mock_create_jpgs, moc
# return the test fits paths in order of the input_files instead of aws fetch
mock_get_fits.side_effect = [self.test_red_path, self.test_green_path, self.test_blue_path]
# save temp output to a known path so we can test
mock_named_tempfile.return_value.name = self.temp_rgb_path
mock_named_tempfile.return_value.__enter__.return_value.name = self.temp_rgb_path
# avoids overwriting our output
mock_create_jpgs.return_value = ('test_path', 'test_path')
mock_create_jpgs.return_value.__enter__.return_value = ('test_path', 'test_path')
# don't save to s3
mock_save_fits_and_thumbnails.return_value = self.temp_rgb_path

Expand Down Expand Up @@ -300,9 +300,9 @@ def test_operate(self, mock_create_jpgs, mock_save_fits_and_thumbnails, mock_get
mock_get_fits.side_effect = [self.test_fits_1_path, self.test_fits_2_path,
self.temp_fits_1_negative_path, self.temp_fits_2_negative_path]
# save temp output to a known path so we can test it
mock_named_tempfile.return_value.name = self.temp_stacked_path
mock_named_tempfile.return_value.__enter__.return_value.name = self.temp_stacked_path
# avoids overwriting our output
mock_create_jpgs.return_value = ('test_path', 'test_path')
mock_create_jpgs.return_value.__enter__.return_value = ('test_path', 'test_path')
# don't save to s3
mock_save_fits_and_thumbnails.return_value = self.temp_stacked_path

Expand Down
46 changes: 21 additions & 25 deletions datalab/datalab_session/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,37 +17,33 @@ def test_get_fits_dimensions(self):

def test_create_fits(self):
test_2d_ndarray = np.zeros((10, 10))
path = create_fits('create_fits_test', test_2d_ndarray)
with create_fits('create_fits_test', test_2d_ndarray) as path:
# test the file was written out to a path
self.assertIsInstance(path, str)
self.assertIsFile(path)

# test the file has the right data
hdu = fits.open(path)
self.assertEqual(hdu[0].header['KEY'], 'create_fits_test')
self.assertEqual(hdu[1].data.tolist(), test_2d_ndarray.tolist())

# test the file was written out to a path
self.assertIsInstance(path, str)
self.assertIsFile(path)

# test the file has the right data
hdu = fits.open(path)
self.assertEqual(hdu[0].header['KEY'], 'create_fits_test')
self.assertEqual(hdu[1].data.tolist(), test_2d_ndarray.tolist())

def test_create_tif(self):
fits_path = self.test_fits_path
tif_path = create_tif('create_tif_test', fits_path)

# test the file was written out to a path
self.assertIsInstance(tif_path, str)
self.assertIsFile(tif_path)
with create_tif('create_tif_test', fits_path) as tif_path:
# test the file was written out to a path
self.assertIsInstance(tif_path, str)
self.assertIsFile(tif_path)
self.assertFilesEqual(tif_path, self.test_tif_path)

self.assertFilesEqual(tif_path, self.test_tif_path)

def test_create_jpgs(self):
fits_path = self.test_fits_path
jpg_paths = create_jpgs('create_jpgs_test', fits_path)

# test the files were written out to a path
self.assertEqual(len(jpg_paths), 2)
self.assertIsFile(jpg_paths[0])
self.assertIsFile(jpg_paths[1])
self.assertFilesEqual(jpg_paths[0], self.test_large_jpg_path)
self.assertFilesEqual(jpg_paths[1], self.test_small_jpg_path)
with create_jpgs('create_jpgs_test', fits_path) as jpg_paths:
# test the files were written out to a path
self.assertEqual(len(jpg_paths), 2)
self.assertIsFile(jpg_paths[0])
self.assertIsFile(jpg_paths[1])
self.assertFilesEqual(jpg_paths[0], self.test_large_jpg_path)
self.assertFilesEqual(jpg_paths[1], self.test_small_jpg_path)

def test_stack_arrays(self):
test_array_1 = np.zeros((10, 20))
Expand Down
47 changes: 26 additions & 21 deletions datalab/datalab_session/utils/file_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import tempfile
import logging
import os
from contextlib import contextmanager

from astropy.io import fits
import numpy as np
from fits2image.conversions import fits_to_jpg, fits_to_tif

from datalab import settings
from datalab.datalab_session.exceptions import ClientAlertException
from datalab.datalab_session.utils.s3_utils import save_fits_and_thumbnails

log = logging.getLogger()
log.setLevel(logging.INFO)
Expand All @@ -29,6 +31,7 @@ def get_fits_dimensions(fits_file, extension: str = 'SCI') -> tuple:
hdu_shape = hdu[extension].shape
return hdu_shape

@contextmanager
def create_fits(key: str, image_arr: np.ndarray, comment=None) -> str:
"""
Creates a fits file with the given key and image array
Expand All @@ -41,22 +44,30 @@ def create_fits(key: str, image_arr: np.ndarray, comment=None) -> str:
image_hdu = fits.ImageHDU(data=image_arr, name='SCI')

hdu_list = fits.HDUList([primary_hdu, image_hdu])
fits_path = tempfile.NamedTemporaryFile(suffix=f'{key}.fits').name
fits_path = tempfile.NamedTemporaryFile(suffix=f'{key}.fits', dir=settings.TEMP_FITS_DIR).name
hdu_list.writeto(fits_path, overwrite=True)

return fits_path
try:
yield fits_path
finally:
os.remove(fits_path)

@contextmanager
def create_tif(key: str, fits_path: np.ndarray) -> str:
"""
Creates a full sized TIFF file from a FITs
Returns the path to the TIFF file
"""
height, width = get_fits_dimensions(fits_path)
tif_path = tempfile.NamedTemporaryFile(suffix=f'{key}.tif').name
tif_path = tempfile.NamedTemporaryFile(suffix=f'{key}.tif', dir=settings.TEMP_FITS_DIR).name
fits_to_tif(fits_path, tif_path, width=width, height=height)

return tif_path
try:
yield tif_path
finally:
os.remove(tif_path)

@contextmanager
def create_jpgs(cache_key, fits_paths: str, color=False, zmin=None, zmax=None) -> list:
"""
Create jpgs from fits files and save them to S3
Expand All @@ -68,8 +79,8 @@ def create_jpgs(cache_key, fits_paths: str, color=False, zmin=None, zmax=None) -
fits_paths = [fits_paths]

# create the jpgs from the fits files
large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg').name
thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg').name
large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg', dir=settings.TEMP_FITS_DIR).name
thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg', dir=settings.TEMP_FITS_DIR).name

max_height = 0
max_width = 0
Expand All @@ -81,7 +92,14 @@ def create_jpgs(cache_key, fits_paths: str, color=False, zmin=None, zmax=None) -
fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color, zmin=zmin, zmax=zmax)
fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color, zmin=zmin, zmax=zmax)

return large_jpg_path, thumbnail_jpg_path
try:
yield large_jpg_path, thumbnail_jpg_path
finally:
try:
os.remove(large_jpg_path)
os.remove(thumbnail_jpg_path)
except FileNotFoundError:
pass

def crop_arrays(array_list: list):
"""
Expand Down Expand Up @@ -116,16 +134,3 @@ def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_poi
x_points = width_2 - x_points

return x_points, y_points

def create_output(cache_key, np_array, large_jpg=None, small_jpg=None, index=None, comment=None):
"""
A more automated way of creating output for a dev
Dev can specify just a cache_key and np array and the function will create the fits and jpgs
or the dev can pass the fits_file or jpgs and the function will save them
"""
fits_file = create_fits(cache_key, np_array, comment)

if not large_jpg or not small_jpg:
large_jpg, small_jpg = create_jpgs(cache_key, fits_file)

return save_fits_and_thumbnails(cache_key, fits_file, large_jpg, small_jpg, index)

0 comments on commit ffbb461

Please sign in to comment.