Skip to content

Commit

Permalink
Refactor h5read to provide shape information and update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dimitrivlachos committed Feb 6, 2025
1 parent 6700eca commit 9428b32
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 40 deletions.
99 changes: 76 additions & 23 deletions include/dx2/h5/h5read_processed.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,79 +10,132 @@
#include <string>
#include <vector>

template <typename T> struct H5Data {
std::vector<T> data; // Flat data array
std::vector<size_t> shape; // Dimensions of the data
};

template <typename T>
std::vector<T> read_array_from_h5_file(const std::string &filename,
const std::string &dataset_name) {
H5Data<T> h5_data;
read_array_from_h5_file(filename, dataset_name, h5_data);
return h5_data.data;
}

/**
* @brief Reads a dataset from an HDF5 file into an std::vector.
*
* @tparam T The type of data to read (e.g., int, double).
* @param filename The path to the HDF5 file.
* @param dataset_name The name of the dataset to read.
* @param h5_data The struct to store the data and shape.
* @return A std::vector containing the data from the dataset.
* @throws std::runtime_error If the file, dataset, or datatype cannot be opened
* or read.
*/
template <typename T>
std::vector<T> read_array_from_h5_file(const std::string &filename,
const std::string &dataset_name) {
// Start measuring time
void read_array_from_h5_file(const std::string &filename,
const std::string &dataset_name,
H5Data<T> &h5_data) {
auto start_time = std::chrono::high_resolution_clock::now();

// Open the HDF5 file
hid_t file = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
if (file < 0) {
throw std::runtime_error("Error: Unable to open file: " + filename);
}

try {
// Open the dataset
hid_t dataset = H5Dopen(file, dataset_name.c_str(), H5P_DEFAULT);
if (dataset < 0) {
H5Fclose(file);
throw std::runtime_error("Error: Unable to open dataset: " +
dataset_name);
}

try {
// Get the datatype and check size
hid_t datatype = H5Dget_type(dataset);
size_t datatype_size = H5Tget_size(datatype);
if (datatype_size != sizeof(T)) {
hid_t dataspace = H5Dget_space(dataset);
if (dataspace < 0) {
H5Dclose(dataset);
H5Fclose(file);
throw std::runtime_error(
"Error: Dataset type size does not match expected type size.");
"Error: Unable to get dataspace for dataset: " + dataset_name);
}

// Get the dataspace and the number of elements
hid_t dataspace = H5Dget_space(dataset);
int ndims = H5Sget_simple_extent_ndims(dataspace);
if (ndims < 0) {
H5Sclose(dataspace);
H5Dclose(dataset);
H5Fclose(file);
throw std::runtime_error("Error: Unable to get dataset rank.");
}

std::vector<hsize_t> dims(ndims);
H5Sget_simple_extent_dims(dataspace, dims.data(), nullptr);

h5_data.shape.assign(dims.begin(), dims.end());
size_t num_elements = H5Sget_simple_extent_npoints(dataspace);
h5_data.data.resize(num_elements);

// Determine HDF5 type mapping
hid_t native_type;
if constexpr (std::is_same_v<T, int>) {
native_type = H5T_NATIVE_INT;
} else if constexpr (std::is_same_v<T, float>) {
native_type = H5T_NATIVE_FLOAT;
} else if constexpr (std::is_same_v<T, double>) {
native_type = H5T_NATIVE_DOUBLE;
} else if constexpr (std::is_same_v<T, size_t>) {
native_type =
H5T_NATIVE_ULONG; // or H5T_NATIVE_UINT64, depending on system
} else {
H5Sclose(dataspace);
H5Dclose(dataset);
H5Fclose(file);
throw std::runtime_error("Unsupported data type for HDF5 reading.");
}

// Allocate a vector to hold the data
std::vector<T> data_out(num_elements);
// Validate dataset type before reading
hid_t dataset_type = H5Dget_type(dataset);
if (H5Tequal(dataset_type, native_type) == 0) {
H5Tclose(dataset_type);
H5Sclose(dataspace);
H5Dclose(dataset);
H5Fclose(file);
throw std::runtime_error(
"Error: Dataset type does not match requested type.");
}
H5Tclose(dataset_type);

// Read the data into the vector
herr_t status = H5Dread(dataset, datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT,
data_out.data());
// Read dataset
herr_t status = H5Dread(dataset, native_type, H5S_ALL, H5S_ALL,
H5P_DEFAULT, h5_data.data.data());
if (status < 0) {
H5Sclose(dataspace);
H5Dclose(dataset);
H5Fclose(file);
throw std::runtime_error("Error: Unable to read dataset: " +
dataset_name);
}

// Close the dataset and return the data
// Cleanup
H5Sclose(dataspace);
H5Dclose(dataset);
H5Fclose(file);

// Log timing
auto end_time = std::chrono::high_resolution_clock::now();
double elapsed_time =
std::chrono::duration<double>(end_time - start_time).count();
std::cout << "READ TIME for " << dataset_name << " : " << elapsed_time
<< "s" << std::endl;

return data_out;

} catch (...) {
H5Dclose(dataset); // Ensure dataset is closed in case of failure
H5Dclose(dataset);
H5Fclose(file);
throw;
}
} catch (...) {
H5Fclose(file); // Ensure file is closed in case of failure
H5Fclose(file);
throw;
}
}
Expand Down
57 changes: 40 additions & 17 deletions tests/test_read_h5_array.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ class HDF5ReadTest : public ::testing::Test {
// Check if the dataset exists
hid_t dataset = H5Dopen(file, dataset_path.c_str(), H5P_DEFAULT);
if (dataset >= 0) {
// Dataset already exists, close and return
H5Dclose(dataset);
H5Fclose(file);
return;
Expand Down Expand Up @@ -58,62 +57,86 @@ class HDF5ReadTest : public ::testing::Test {
// --------------- read_array_from_h5_file TESTS ---------------
#pragma region read_array_from_h5_file tests

// Test reading double array and validating shape
TEST_F(HDF5ReadTest, ReadDoubleArrayFromH5) {
std::string array_name = "/dials/processing/group_0/xyzobs.px.value";

// Read array from the test HDF5 file
std::vector<double> xyzobs_px =
read_array_from_h5_file<double>(test_file_path, array_name);
H5Data<double> h5_data;
read_array_from_h5_file(test_file_path, array_name, h5_data);

EXPECT_FALSE(h5_data.data.empty());
EXPECT_EQ(h5_data.shape.size(), 2);
EXPECT_EQ(h5_data.shape[1], 3);

// Check a specific value
double expected_value = 528.86470588235295;
EXPECT_EQ(xyzobs_px[10], expected_value);
EXPECT_EQ(h5_data.data[10], expected_value);
}

// Test reading size_t array and validating shape
TEST_F(HDF5ReadTest, ReadSizeTArrayFromH5) {
std::string flags_name = "/dials/processing/group_0/flags";

// Read array from the test HDF5 file
std::vector<std::size_t> flags_array =
read_array_from_h5_file<std::size_t>(test_file_path, flags_name);
H5Data<std::size_t> h5_data;
read_array_from_h5_file(test_file_path, flags_name, h5_data);

EXPECT_FALSE(h5_data.data.empty());
EXPECT_EQ(h5_data.shape.size(), 1);

// Check a specific value
std::size_t expected_flag_value = 32;
EXPECT_EQ(flags_array[5], expected_flag_value);
EXPECT_EQ(h5_data.data[5], expected_flag_value);
}

// Test reading from a non-existent file
TEST_F(HDF5ReadTest, ReadFromNonExistentFile) {
std::string invalid_file = "invalid_file.h5";
std::string dataset_name = "/some/dataset";

EXPECT_THROW(read_array_from_h5_file<double>(invalid_file, dataset_name),
H5Data<double> h5_data;
EXPECT_THROW(read_array_from_h5_file(invalid_file, dataset_name, h5_data),
std::runtime_error);
}

// Test reading a non-existent dataset
TEST_F(HDF5ReadTest, ReadNonExistentDataset) {
std::string invalid_dataset = "/this/does/not/exist";

EXPECT_THROW(read_array_from_h5_file<double>(test_file_path, invalid_dataset),
std::runtime_error);
H5Data<double> h5_data;
EXPECT_THROW(
read_array_from_h5_file(test_file_path, invalid_dataset, h5_data),
std::runtime_error);
}

// Test reading an empty dataset
TEST_F(HDF5ReadTest, ReadEmptyDataset) {
std::string empty_dataset = "/dials/processing/empty_dataset";

std::vector<double> result =
read_array_from_h5_file<double>(test_file_path, empty_dataset);
EXPECT_TRUE(result.empty()) << "Expected an empty vector for empty dataset.";
H5Data<double> h5_data;
read_array_from_h5_file(test_file_path, empty_dataset, h5_data);

EXPECT_TRUE(h5_data.data.empty())
<< "Expected an empty vector for empty dataset.";
}

// Test reading a multi-dimensional dataset
TEST_F(HDF5ReadTest, ReadMultiDimensionalArrayFromH5) {
std::string dataset_name = "/dials/processing/group_0/xyzobs.px.value";

H5Data<double> h5_data;
read_array_from_h5_file(test_file_path, dataset_name, h5_data);

EXPECT_EQ(h5_data.shape.size(), 2);
EXPECT_EQ(h5_data.shape[1], 3); // Ensure the last dimension is 3
}

// Test data type mismatch
TEST_F(HDF5ReadTest, ReadWithIncorrectType) {
std::string dataset = "/dials/processing/group_0/xyzobs.px.value";

// Try to read a float dataset as int (should fail)
EXPECT_THROW(read_array_from_h5_file<int>(test_file_path, dataset),
// Try to read a double dataset as int (should fail)
H5Data<int> h5_data;
EXPECT_THROW(read_array_from_h5_file(test_file_path, dataset, h5_data),
std::runtime_error);
}

Expand Down

0 comments on commit 9428b32

Please sign in to comment.