From 9a5b5eb560ce9d6fdaecdf0b1a5eae69838d09b2 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Thu, 19 Sep 2024 15:42:12 +0100 Subject: [PATCH 01/19] Remove unnecessary `enumerate()` --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 9a30f76c0..997656ba1 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -63,7 +63,7 @@ def _calc_memory_bytes_paganin_filter_tomopy( # estimate padding size here based on non_slice dimensions pad_tup = [] - for index, element in enumerate(non_slice_dims_shape): + for element in non_slice_dims_shape: diff = _shift_bit_length(element + 1) - element if element % 2 == 0: pad_width = diff // 2 From b223f55fef55188ff3f73c3e04ce4475a4de02f9 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Thu, 19 Sep 2024 15:44:04 +0100 Subject: [PATCH 02/19] Rename `element` var for improved clarity --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 997656ba1..6231b178a 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -63,9 +63,9 @@ def _calc_memory_bytes_paganin_filter_tomopy( # estimate padding size here based on non_slice dimensions pad_tup = [] - for element in non_slice_dims_shape: - diff = _shift_bit_length(element + 1) - element - if element % 2 == 0: + for dim_len in non_slice_dims_shape: + diff = _shift_bit_length(dim_len + 1) - dim_len + if dim_len % 2 == 0: pad_width = diff // 2 pad_width = (pad_width, pad_width) else: From 9bac3dd3ee7b6753c4e50a7b7d680cef988e08d1 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Thu, 19 Sep 2024 15:47:53 +0100 Subject: [PATCH 03/19] Move unpadded input size calculation to top of estimator --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 6231b178a..90f06114e 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -61,6 +61,9 @@ def _calc_memory_bytes_paganin_filter_tomopy( ) -> Tuple[int, int]: from httomolibgpu.prep.phase import _shift_bit_length + # Input (unpadded) + unpadded_in_slice_size = np.prod(non_slice_dims_shape) * dtype.itemsize + # estimate padding size here based on non_slice dimensions pad_tup = [] for dim_len in non_slice_dims_shape: @@ -75,8 +78,6 @@ def _calc_memory_bytes_paganin_filter_tomopy( pad_width = (left_pad, right_pad) pad_tup.append(pad_width) - input_size = np.prod(non_slice_dims_shape) * dtype.itemsize - in_slice_size = ( (non_slice_dims_shape[0] + pad_tup[0][0] + pad_tup[0][1]) * (non_slice_dims_shape[1] + pad_tup[1][0] + pad_tup[1][1]) @@ -96,7 +97,7 @@ def _calc_memory_bytes_paganin_filter_tomopy( res_slice = grid_size tot_memory_bytes = int( - input_size + unpadded_in_slice_size + in_slice_size + out_slice_size + 2 * complex_slice From 6c48494c21be8d7c96da058d74e3b46e57a9f54e Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Thu, 19 Sep 2024 15:52:26 +0100 Subject: [PATCH 04/19] Rename var to explicitly mention it accounts for padding --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 90f06114e..725ec71b6 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -78,7 +78,8 @@ def _calc_memory_bytes_paganin_filter_tomopy( pad_width = (left_pad, right_pad) pad_tup.append(pad_width) - in_slice_size = ( + # Padded input + padded_in_slice_size = ( (non_slice_dims_shape[0] + pad_tup[0][0] + pad_tup[0][1]) * (non_slice_dims_shape[1] + pad_tup[1][0] + pad_tup[1][1]) * dtype.itemsize @@ -90,7 +91,7 @@ def _calc_memory_bytes_paganin_filter_tomopy( ) # FFT needs complex inputs, so copy to complex happens first - complex_slice = in_slice_size / dtype.itemsize * np.complex64().nbytes + complex_slice = padded_in_slice_size / dtype.itemsize * np.complex64().nbytes fftplan_slice = complex_slice grid_size = np.prod(non_slice_dims_shape) * np.float32().nbytes filter_size = grid_size @@ -98,7 +99,7 @@ def _calc_memory_bytes_paganin_filter_tomopy( tot_memory_bytes = int( unpadded_in_slice_size - + in_slice_size + + padded_in_slice_size + out_slice_size + 2 * complex_slice + 0.5 * fftplan_slice From a358662f8d66e16f11c25f7f319d5576cc14028f Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Thu, 19 Sep 2024 15:56:34 +0100 Subject: [PATCH 05/19] Move var accounting for cast to `complex64` to match ordering in method code --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 725ec71b6..cd010ae1d 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -84,14 +84,16 @@ def _calc_memory_bytes_paganin_filter_tomopy( * (non_slice_dims_shape[1] + pad_tup[1][0] + pad_tup[1][1]) * dtype.itemsize ) + + # Padded input cast to `complex64` + complex_slice = padded_in_slice_size / dtype.itemsize * np.complex64().nbytes + out_slice_size = ( (non_slice_dims_shape[0] + pad_tup[0][0] + pad_tup[0][1]) * (non_slice_dims_shape[1] + pad_tup[1][0] + pad_tup[1][1]) * dtype.itemsize ) - # FFT needs complex inputs, so copy to complex happens first - complex_slice = padded_in_slice_size / dtype.itemsize * np.complex64().nbytes fftplan_slice = complex_slice grid_size = np.prod(non_slice_dims_shape) * np.float32().nbytes filter_size = grid_size From e370bec70659b64353400c54efdbcfe97ee42d4c Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Thu, 19 Sep 2024 16:26:20 +0100 Subject: [PATCH 06/19] Estimate 2D FFT plan using cufft plan estimator --- .../httomolibgpu/supporting_funcs/prep/phase.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index cd010ae1d..3595fda87 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -24,6 +24,8 @@ from typing import Tuple import numpy as np +from httomo.cufft import CufftType, cufft_estimate_2d + __all__ = [ "_calc_memory_bytes_paganin_filter_savu", "_calc_memory_bytes_paganin_filter_tomopy", @@ -88,13 +90,21 @@ def _calc_memory_bytes_paganin_filter_tomopy( # Padded input cast to `complex64` complex_slice = padded_in_slice_size / dtype.itemsize * np.complex64().nbytes + # Plan size for 2D FFT + ny = non_slice_dims_shape[0] + pad_tup[0][0] + pad_tup[0][1] + nx = non_slice_dims_shape[1] + pad_tup[1][0] + pad_tup[1][1] + fftplan_slice_size = cufft_estimate_2d( + nx=nx, + ny=ny, + fft_type=CufftType.CUFFT_C2C, + ) + out_slice_size = ( (non_slice_dims_shape[0] + pad_tup[0][0] + pad_tup[0][1]) * (non_slice_dims_shape[1] + pad_tup[1][0] + pad_tup[1][1]) * dtype.itemsize ) - fftplan_slice = complex_slice grid_size = np.prod(non_slice_dims_shape) * np.float32().nbytes filter_size = grid_size res_slice = grid_size @@ -102,9 +112,9 @@ def _calc_memory_bytes_paganin_filter_tomopy( tot_memory_bytes = int( unpadded_in_slice_size + padded_in_slice_size + + fftplan_slice_size + out_slice_size + 2 * complex_slice - + 0.5 * fftplan_slice + 2 * res_slice ) subtract_bytes = int(filter_size + grid_size) From 2d98c07a578aeb3b28fdbb3ea12bd6b364a185a1 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Fri, 20 Sep 2024 12:37:45 +0100 Subject: [PATCH 07/19] Remove unnecessary out slice size in estimation The fact that the 2D FFT used in `paganin_filter_tomopy()` is C2C, in conjunction with the use of `overwrite_x=True`, means that the result is able to be written into the input array. Thus, no new array is created to hold the result of the 2D FFT. --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 3595fda87..0bf590753 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -99,12 +99,6 @@ def _calc_memory_bytes_paganin_filter_tomopy( fft_type=CufftType.CUFFT_C2C, ) - out_slice_size = ( - (non_slice_dims_shape[0] + pad_tup[0][0] + pad_tup[0][1]) - * (non_slice_dims_shape[1] + pad_tup[1][0] + pad_tup[1][1]) - * dtype.itemsize - ) - grid_size = np.prod(non_slice_dims_shape) * np.float32().nbytes filter_size = grid_size res_slice = grid_size @@ -113,7 +107,6 @@ def _calc_memory_bytes_paganin_filter_tomopy( unpadded_in_slice_size + padded_in_slice_size + fftplan_slice_size - + out_slice_size + 2 * complex_slice + 2 * res_slice ) From 88e9d1474be531c430ee7316cad6d0498ba0368b Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Fri, 20 Sep 2024 13:04:45 +0100 Subject: [PATCH 08/19] Remove unnecessary complex slice doubling in estimation Also move variable in final sum to match the order in which the allocations appear in the method. --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 0bf590753..76463681e 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -106,8 +106,8 @@ def _calc_memory_bytes_paganin_filter_tomopy( tot_memory_bytes = int( unpadded_in_slice_size + padded_in_slice_size + + complex_slice + fftplan_slice_size - + 2 * complex_slice + 2 * res_slice ) subtract_bytes = int(filter_size + grid_size) From d509cc43576132b0437cbb3bdbf17a3bdc855bb5 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Fri, 20 Sep 2024 15:36:46 +0100 Subject: [PATCH 09/19] Account for deallocated padded float32 array in memory estimation The padded `float32` array is cast to `complex64`, and the variable originally assigned to the `float32` version is reassigned to the `complex64` version. The reassigned variable was the only thing referring to the `float32` array, so the ref count of it drops to 0 and is deallocated. The padded `float32` array gets deallocated early enough in the method that it doesn't contribute to the *peak* memory usage of the paganin filter method, which was verified by checking: - the output of `LineProfileHook` when running the method - the allocations and deallocations reported by the custom `MaxMemoryHook` in the tests during the method's execution --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 76463681e..2e3c36344 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -107,6 +107,9 @@ def _calc_memory_bytes_paganin_filter_tomopy( unpadded_in_slice_size + padded_in_slice_size + complex_slice + # The padded float32 array is deallocated when a copy is made when casting to complex64 + # and the variable `padded_tomo` is reassigned to the complex64 version + - padded_in_slice_size + fftplan_slice_size + 2 * res_slice ) From fdf58a463b608e765e0e6491d4ead2e5a17cd694 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Fri, 20 Sep 2024 15:42:58 +0100 Subject: [PATCH 10/19] Include padding in reciprocal grid size calculation The method's code defines the reciprocal grid based on the padded projections `complex64` array shape. However, the memory estimator was incorrectly calculating the number of bytes the reciprocal grid takes using the unpadded `float32` array shape. --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 2e3c36344..d9985c135 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -99,7 +99,8 @@ def _calc_memory_bytes_paganin_filter_tomopy( fft_type=CufftType.CUFFT_C2C, ) - grid_size = np.prod(non_slice_dims_shape) * np.float32().nbytes + # Size of "reciprocal grid" generated, based on padded projections shape + grid_size = np.prod((ny, nx)) * np.float32().nbytes filter_size = grid_size res_slice = grid_size From 135c0c2b6832543505ec873b9100fe712636fb24 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Fri, 20 Sep 2024 16:01:40 +0100 Subject: [PATCH 11/19] Modify value + rename var accounting for cropped float32 IFFT result --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index d9985c135..ad89bf83c 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -102,7 +102,9 @@ def _calc_memory_bytes_paganin_filter_tomopy( # Size of "reciprocal grid" generated, based on padded projections shape grid_size = np.prod((ny, nx)) * np.float32().nbytes filter_size = grid_size - res_slice = grid_size + + # Size of cropped/unpadded + cast to float32 result of 2D IFFT + cropped_float32_res_slice = np.prod(non_slice_dims_shape) * np.float32().nbytes tot_memory_bytes = int( unpadded_in_slice_size @@ -112,7 +114,7 @@ def _calc_memory_bytes_paganin_filter_tomopy( # and the variable `padded_tomo` is reassigned to the complex64 version - padded_in_slice_size + fftplan_slice_size - + 2 * res_slice + + 2 * cropped_float32_res_slice ) subtract_bytes = int(filter_size + grid_size) From f23a0623157772a4999d10d95729e024a16a120a Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Fri, 20 Sep 2024 16:06:58 +0100 Subject: [PATCH 12/19] Add extra var for clarity on where another allocation occurs --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index ad89bf83c..9cd5763f6 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -106,6 +106,9 @@ def _calc_memory_bytes_paganin_filter_tomopy( # Size of cropped/unpadded + cast to float32 result of 2D IFFT cropped_float32_res_slice = np.prod(non_slice_dims_shape) * np.float32().nbytes + # Size of negative log of cropped float32 result of 2D IFFT + negative_log_slice = cropped_float32_res_slice + tot_memory_bytes = int( unpadded_in_slice_size + padded_in_slice_size @@ -114,7 +117,8 @@ def _calc_memory_bytes_paganin_filter_tomopy( # and the variable `padded_tomo` is reassigned to the complex64 version - padded_in_slice_size + fftplan_slice_size - + 2 * cropped_float32_res_slice + + cropped_float32_res_slice + + negative_log_slice ) subtract_bytes = int(filter_size + grid_size) From f145f511034c956e6cd0c56f6e3e9ee19ef42d49 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 23 Sep 2024 11:59:22 +0100 Subject: [PATCH 13/19] Rename var to explicitly mention the absence of padding --- .../httomolibgpu/supporting_funcs/prep/phase.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 9cd5763f6..9306e696c 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -39,7 +39,9 @@ def _calc_memory_bytes_paganin_filter_savu( ) -> Tuple[int, int]: pad_x = kwargs["pad_x"] pad_y = kwargs["pad_y"] - input_size = np.prod(non_slice_dims_shape) * dtype.itemsize + + # Input (unpadded) + unpadded_in_slice_size = np.prod(non_slice_dims_shape) * dtype.itemsize in_slice_size = ( (non_slice_dims_shape[0] + 2 * pad_y) * (non_slice_dims_shape[1] + 2 * pad_x) @@ -51,7 +53,11 @@ def _calc_memory_bytes_paganin_filter_savu( filter_size = complex_slice res_slice = np.prod(non_slice_dims_shape) * np.float32().nbytes tot_memory_bytes = ( - input_size + in_slice_size + complex_slice + fftplan_slice + res_slice + unpadded_in_slice_size + + in_slice_size + + complex_slice + + fftplan_slice + + res_slice ) return (tot_memory_bytes, filter_size) From f1660e3d6a1e787c04d303c7afa591f1621e52e1 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 23 Sep 2024 12:01:55 +0100 Subject: [PATCH 14/19] Rename var to explicitly mention the presence of padding --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 9306e696c..5dc20d126 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -42,19 +42,21 @@ def _calc_memory_bytes_paganin_filter_savu( # Input (unpadded) unpadded_in_slice_size = np.prod(non_slice_dims_shape) * dtype.itemsize - in_slice_size = ( + + # Padded input + padded_in_slice_size = ( (non_slice_dims_shape[0] + 2 * pad_y) * (non_slice_dims_shape[1] + 2 * pad_x) * dtype.itemsize ) # FFT needs complex inputs, so copy to complex happens first - complex_slice = in_slice_size / dtype.itemsize * np.complex64().nbytes + complex_slice = padded_in_slice_size / dtype.itemsize * np.complex64().nbytes fftplan_slice = complex_slice filter_size = complex_slice res_slice = np.prod(non_slice_dims_shape) * np.float32().nbytes tot_memory_bytes = ( unpadded_in_slice_size - + in_slice_size + + padded_in_slice_size + complex_slice + fftplan_slice + res_slice From 8e4aefc4590032dd5c7ea0874b74a5960a50b617 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 23 Sep 2024 12:14:34 +0100 Subject: [PATCH 15/19] Separate estimation of cast to `complex64` from FFT plan estimation --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 5dc20d126..c45164540 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -49,8 +49,10 @@ def _calc_memory_bytes_paganin_filter_savu( * (non_slice_dims_shape[1] + 2 * pad_x) * dtype.itemsize ) - # FFT needs complex inputs, so copy to complex happens first + + # Padded input cast to `complex64` complex_slice = padded_in_slice_size / dtype.itemsize * np.complex64().nbytes + fftplan_slice = complex_slice filter_size = complex_slice res_slice = np.prod(non_slice_dims_shape) * np.float32().nbytes From df3d9b42b65da8c7ac55e6f5e9657d2d057d1153 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 23 Sep 2024 12:20:44 +0100 Subject: [PATCH 16/19] Estimate 2D FFT plan using cufft plan estimator --- .../supporting_funcs/prep/phase.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index c45164540..08b7c2545 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -44,23 +44,31 @@ def _calc_memory_bytes_paganin_filter_savu( unpadded_in_slice_size = np.prod(non_slice_dims_shape) * dtype.itemsize # Padded input + padded_non_slice_dims_shape = ( + non_slice_dims_shape[0] + 2 * pad_y, + non_slice_dims_shape[1] + 2 * pad_x, + ) padded_in_slice_size = ( - (non_slice_dims_shape[0] + 2 * pad_y) - * (non_slice_dims_shape[1] + 2 * pad_x) - * dtype.itemsize + padded_non_slice_dims_shape[0] * padded_non_slice_dims_shape[1] * dtype.itemsize ) # Padded input cast to `complex64` complex_slice = padded_in_slice_size / dtype.itemsize * np.complex64().nbytes - fftplan_slice = complex_slice + # Plan size for 2D FFT + fftplan_slice_size = cufft_estimate_2d( + nx=padded_non_slice_dims_shape[1], + ny=padded_non_slice_dims_shape[0], + fft_type=CufftType.CUFFT_C2C, + ) + filter_size = complex_slice res_slice = np.prod(non_slice_dims_shape) * np.float32().nbytes tot_memory_bytes = ( unpadded_in_slice_size + padded_in_slice_size + complex_slice - + fftplan_slice + + fftplan_slice_size + res_slice ) return (tot_memory_bytes, filter_size) From a090d1ecc83596433b6e4d1d853cd1bdab4ff1e6 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 23 Sep 2024 12:24:14 +0100 Subject: [PATCH 17/19] Rename var accounting for cropped float32 IFFT result --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 08b7c2545..90d89a324 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -63,13 +63,16 @@ def _calc_memory_bytes_paganin_filter_savu( ) filter_size = complex_slice - res_slice = np.prod(non_slice_dims_shape) * np.float32().nbytes + + # Size of cropped/unpadded + cast to float32 result of 2D IFFT + cropped_float32_res_slice = np.prod(non_slice_dims_shape) * np.float32().nbytes + tot_memory_bytes = ( unpadded_in_slice_size + padded_in_slice_size + complex_slice + fftplan_slice_size - + res_slice + + cropped_float32_res_slice ) return (tot_memory_bytes, filter_size) From 6c7d0760ed03712b7d5e9d2de71705f3362b1962 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 23 Sep 2024 12:26:50 +0100 Subject: [PATCH 18/19] Add code comment for filter size estimated value --- .../external/httomolibgpu/supporting_funcs/prep/phase.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 90d89a324..418e919c5 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -62,6 +62,8 @@ def _calc_memory_bytes_paganin_filter_savu( fft_type=CufftType.CUFFT_C2C, ) + # Shape of 2D filter is the same as the padded `complex64` slice shape, so the size will be + # the same filter_size = complex_slice # Size of cropped/unpadded + cast to float32 result of 2D IFFT From b32a2c87e29eab98317846cffa6488379eea7766 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 23 Sep 2024 16:28:05 +0100 Subject: [PATCH 19/19] Account for negligible FFT plan size affecting peak memory usage --- .../supporting_funcs/prep/phase.py | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py index 418e919c5..aa837f55b 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/phase.py @@ -69,13 +69,25 @@ def _calc_memory_bytes_paganin_filter_savu( # Size of cropped/unpadded + cast to float32 result of 2D IFFT cropped_float32_res_slice = np.prod(non_slice_dims_shape) * np.float32().nbytes - tot_memory_bytes = ( - unpadded_in_slice_size - + padded_in_slice_size - + complex_slice - + fftplan_slice_size - + cropped_float32_res_slice - ) + # If the FFT plan size is negligible for some reason, this changes where the peak GPU + # memory usage occurs. Hence, the if/else branching below for calculating the total bytes. + NEGLIGIBLE_FFT_PLAN_SIZE = 16 + if fftplan_slice_size < NEGLIGIBLE_FFT_PLAN_SIZE: + tot_memory_bytes = int( + unpadded_in_slice_size + padded_in_slice_size + complex_slice + ) + else: + tot_memory_bytes = int( + unpadded_in_slice_size + + padded_in_slice_size + + complex_slice + # The padded float32 array is deallocated when a copy is made when casting to complex64 + # and the variable `padded_tomo` is reassigned to the complex64 version + - padded_in_slice_size + + fftplan_slice_size + + cropped_float32_res_slice + ) + return (tot_memory_bytes, filter_size) @@ -132,17 +144,26 @@ def _calc_memory_bytes_paganin_filter_tomopy( # Size of negative log of cropped float32 result of 2D IFFT negative_log_slice = cropped_float32_res_slice - tot_memory_bytes = int( - unpadded_in_slice_size - + padded_in_slice_size - + complex_slice - # The padded float32 array is deallocated when a copy is made when casting to complex64 - # and the variable `padded_tomo` is reassigned to the complex64 version - - padded_in_slice_size - + fftplan_slice_size - + cropped_float32_res_slice - + negative_log_slice - ) + # If the FFT plan size is negligible for some reason, this changes where the peak GPU + # memory usage occurs. Hence, the if/else branching below for calculating the total bytes. + NEGLIGIBLE_FFT_PLAN_SIZE = 16 + if fftplan_slice_size < NEGLIGIBLE_FFT_PLAN_SIZE: + tot_memory_bytes = int( + unpadded_in_slice_size + padded_in_slice_size + complex_slice + ) + else: + tot_memory_bytes = int( + unpadded_in_slice_size + + padded_in_slice_size + + complex_slice + # The padded float32 array is deallocated when a copy is made when casting to complex64 + # and the variable `padded_tomo` is reassigned to the complex64 version + - padded_in_slice_size + + fftplan_slice_size + + cropped_float32_res_slice + + negative_log_slice + ) + subtract_bytes = int(filter_size + grid_size) return (tot_memory_bytes, subtract_bytes)