From 932e1e99139926ee676c1452e67ec4dd43c7fdcf Mon Sep 17 00:00:00 2001 From: Dimitri Vlachos Date: Wed, 5 Feb 2025 15:58:38 +0000 Subject: [PATCH] Refactor h5read to provide shape information and update tests --- include/dx2/h5/h5read_processed.hpp | 99 ++++++++++++++++++++++------- tests/test_read_h5_array.cxx | 57 ++++++++++++----- 2 files changed, 116 insertions(+), 40 deletions(-) diff --git a/include/dx2/h5/h5read_processed.hpp b/include/dx2/h5/h5read_processed.hpp index 628fa7e..34deb8e 100644 --- a/include/dx2/h5/h5read_processed.hpp +++ b/include/dx2/h5/h5read_processed.hpp @@ -10,79 +10,132 @@ #include #include +template struct H5Data { + std::vector data; // Flat data array + std::vector shape; // Dimensions of the data +}; + +template +std::vector read_array_from_h5_file(const std::string &filename, + const std::string &dataset_name) { + H5Data h5_data; + read_array_from_h5_file(filename, dataset_name, h5_data); + return h5_data.data; +} + /** * @brief Reads a dataset from an HDF5 file into an std::vector. * * @tparam T The type of data to read (e.g., int, double). * @param filename The path to the HDF5 file. * @param dataset_name The name of the dataset to read. + * @param h5_data The struct to store the data and shape. * @return A std::vector containing the data from the dataset. * @throws std::runtime_error If the file, dataset, or datatype cannot be opened * or read. */ template -std::vector read_array_from_h5_file(const std::string &filename, - const std::string &dataset_name) { - // Start measuring time +void read_array_from_h5_file(const std::string &filename, + const std::string &dataset_name, + H5Data &h5_data) { auto start_time = std::chrono::high_resolution_clock::now(); - // Open the HDF5 file hid_t file = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); if (file < 0) { throw std::runtime_error("Error: Unable to open file: " + filename); } try { - // Open the dataset hid_t dataset = H5Dopen(file, dataset_name.c_str(), H5P_DEFAULT); if (dataset < 0) { + H5Fclose(file); throw std::runtime_error("Error: Unable to open dataset: " + dataset_name); } try { - // Get the datatype and check size - hid_t datatype = H5Dget_type(dataset); - size_t datatype_size = H5Tget_size(datatype); - if (datatype_size != sizeof(T)) { + hid_t dataspace = H5Dget_space(dataset); + if (dataspace < 0) { + H5Dclose(dataset); + H5Fclose(file); throw std::runtime_error( - "Error: Dataset type size does not match expected type size."); + "Error: Unable to get dataspace for dataset: " + dataset_name); } - // Get the dataspace and the number of elements - hid_t dataspace = H5Dget_space(dataset); + int ndims = H5Sget_simple_extent_ndims(dataspace); + if (ndims < 0) { + H5Sclose(dataspace); + H5Dclose(dataset); + H5Fclose(file); + throw std::runtime_error("Error: Unable to get dataset rank."); + } + + std::vector dims(ndims); + H5Sget_simple_extent_dims(dataspace, dims.data(), nullptr); + + h5_data.shape.assign(dims.begin(), dims.end()); size_t num_elements = H5Sget_simple_extent_npoints(dataspace); + h5_data.data.resize(num_elements); + + // Determine HDF5 type mapping + hid_t native_type; + if constexpr (std::is_same_v) { + native_type = H5T_NATIVE_INT; + } else if constexpr (std::is_same_v) { + native_type = H5T_NATIVE_FLOAT; + } else if constexpr (std::is_same_v) { + native_type = H5T_NATIVE_DOUBLE; + } else if constexpr (std::is_same_v) { + native_type = + H5T_NATIVE_ULONG; // or H5T_NATIVE_UINT64, depending on system + } else { + H5Sclose(dataspace); + H5Dclose(dataset); + H5Fclose(file); + throw std::runtime_error("Unsupported data type for HDF5 reading."); + } - // Allocate a vector to hold the data - std::vector data_out(num_elements); + // Validate dataset type before reading + hid_t dataset_type = H5Dget_type(dataset); + if (H5Tequal(dataset_type, native_type) == 0) { + H5Tclose(dataset_type); + H5Sclose(dataspace); + H5Dclose(dataset); + H5Fclose(file); + throw std::runtime_error( + "Error: Dataset type does not match requested type."); + } + H5Tclose(dataset_type); - // Read the data into the vector - herr_t status = H5Dread(dataset, datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, - data_out.data()); + // Read dataset + herr_t status = H5Dread(dataset, native_type, H5S_ALL, H5S_ALL, + H5P_DEFAULT, h5_data.data.data()); if (status < 0) { + H5Sclose(dataspace); + H5Dclose(dataset); + H5Fclose(file); throw std::runtime_error("Error: Unable to read dataset: " + dataset_name); } - // Close the dataset and return the data + // Cleanup + H5Sclose(dataspace); H5Dclose(dataset); H5Fclose(file); - // Log timing auto end_time = std::chrono::high_resolution_clock::now(); double elapsed_time = std::chrono::duration(end_time - start_time).count(); std::cout << "READ TIME for " << dataset_name << " : " << elapsed_time << "s" << std::endl; - return data_out; - } catch (...) { - H5Dclose(dataset); // Ensure dataset is closed in case of failure + H5Dclose(dataset); + H5Fclose(file); throw; } } catch (...) { - H5Fclose(file); // Ensure file is closed in case of failure + H5Fclose(file); throw; } } diff --git a/tests/test_read_h5_array.cxx b/tests/test_read_h5_array.cxx index cedecb3..f4ad775 100644 --- a/tests/test_read_h5_array.cxx +++ b/tests/test_read_h5_array.cxx @@ -30,7 +30,6 @@ class HDF5ReadTest : public ::testing::Test { // Check if the dataset exists hid_t dataset = H5Dopen(file, dataset_path.c_str(), H5P_DEFAULT); if (dataset >= 0) { - // Dataset already exists, close and return H5Dclose(dataset); H5Fclose(file); return; @@ -58,28 +57,35 @@ class HDF5ReadTest : public ::testing::Test { // --------------- read_array_from_h5_file TESTS --------------- #pragma region read_array_from_h5_file tests +// Test reading double array and validating shape TEST_F(HDF5ReadTest, ReadDoubleArrayFromH5) { std::string array_name = "/dials/processing/group_0/xyzobs.px.value"; - // Read array from the test HDF5 file - std::vector xyzobs_px = - read_array_from_h5_file(test_file_path, array_name); + H5Data h5_data; + read_array_from_h5_file(test_file_path, array_name, h5_data); + + EXPECT_FALSE(h5_data.data.empty()); + EXPECT_EQ(h5_data.shape.size(), 2); + EXPECT_EQ(h5_data.shape[1], 3); // Check a specific value double expected_value = 528.86470588235295; - EXPECT_EQ(xyzobs_px[10], expected_value); + EXPECT_EQ(h5_data.data[10], expected_value); } +// Test reading size_t array and validating shape TEST_F(HDF5ReadTest, ReadSizeTArrayFromH5) { std::string flags_name = "/dials/processing/group_0/flags"; - // Read array from the test HDF5 file - std::vector flags_array = - read_array_from_h5_file(test_file_path, flags_name); + H5Data h5_data; + read_array_from_h5_file(test_file_path, flags_name, h5_data); + + EXPECT_FALSE(h5_data.data.empty()); + EXPECT_EQ(h5_data.shape.size(), 1); // Check a specific value std::size_t expected_flag_value = 32; - EXPECT_EQ(flags_array[5], expected_flag_value); + EXPECT_EQ(h5_data.data[5], expected_flag_value); } // Test reading from a non-existent file @@ -87,7 +93,8 @@ TEST_F(HDF5ReadTest, ReadFromNonExistentFile) { std::string invalid_file = "invalid_file.h5"; std::string dataset_name = "/some/dataset"; - EXPECT_THROW(read_array_from_h5_file(invalid_file, dataset_name), + H5Data h5_data; + EXPECT_THROW(read_array_from_h5_file(invalid_file, dataset_name, h5_data), std::runtime_error); } @@ -95,25 +102,41 @@ TEST_F(HDF5ReadTest, ReadFromNonExistentFile) { TEST_F(HDF5ReadTest, ReadNonExistentDataset) { std::string invalid_dataset = "/this/does/not/exist"; - EXPECT_THROW(read_array_from_h5_file(test_file_path, invalid_dataset), - std::runtime_error); + H5Data h5_data; + EXPECT_THROW( + read_array_from_h5_file(test_file_path, invalid_dataset, h5_data), + std::runtime_error); } // Test reading an empty dataset TEST_F(HDF5ReadTest, ReadEmptyDataset) { std::string empty_dataset = "/dials/processing/empty_dataset"; - std::vector result = - read_array_from_h5_file(test_file_path, empty_dataset); - EXPECT_TRUE(result.empty()) << "Expected an empty vector for empty dataset."; + H5Data h5_data; + read_array_from_h5_file(test_file_path, empty_dataset, h5_data); + + EXPECT_TRUE(h5_data.data.empty()) + << "Expected an empty vector for empty dataset."; +} + +// Test reading a multi-dimensional dataset +TEST_F(HDF5ReadTest, ReadMultiDimensionalArrayFromH5) { + std::string dataset_name = "/dials/processing/group_0/xyzobs.px.value"; + + H5Data h5_data; + read_array_from_h5_file(test_file_path, dataset_name, h5_data); + + EXPECT_EQ(h5_data.shape.size(), 2); + EXPECT_EQ(h5_data.shape[1], 3); // Ensure the last dimension is 3 } // Test data type mismatch TEST_F(HDF5ReadTest, ReadWithIncorrectType) { std::string dataset = "/dials/processing/group_0/xyzobs.px.value"; - // Try to read a float dataset as int (should fail) - EXPECT_THROW(read_array_from_h5_file(test_file_path, dataset), + // Try to read a double dataset as int (should fail) + H5Data h5_data; + EXPECT_THROW(read_array_from_h5_file(test_file_path, dataset, h5_data), std::runtime_error); }