diff --git a/mdio/acceptance_test.cc b/mdio/acceptance_test.cc index db379da..c464291 100644 --- a/mdio/acceptance_test.cc +++ b/mdio/acceptance_test.cc @@ -14,6 +14,8 @@ #include #include +#include +#include #include #include @@ -24,6 +26,58 @@ namespace { +constexpr char PYTHON_EXECUTABLE[] = "python3"; // Don't specify absolute path +constexpr char PROJECT_BASE_PATH_ENV[] = "PROJECT_BASE_PATH"; +constexpr char DEFAULT_BASE_PATH[] = "../.."; +constexpr char ZARR_SCRIPT_RELATIVE_PATH[] = + "/mdio/regression_tests/zarr_compatibility.py"; +constexpr char XARRAY_SCRIPT_RELATIVE_PATH[] = + "/mdio/regression_tests/xarray_compatibility_test.py"; +constexpr char FILE_PATH_BASE[] = "./zarrs/acceptance/"; +constexpr int ERROR_CODE = EXIT_FAILURE; +constexpr int SUCCESS_CODE = EXIT_SUCCESS; + +/** + * @brief Executes the Python regression tests + * + * @param scriptPath Which script to run, should be either + * ZARR_SCRIPT_RELATIVE_PATH or XARRAY_SCRIPT_RELATIVE_PATH + * @param arg The argument to pass to the script + * @return int The status code of the script execution + */ +int executePythonScript(const std::string& scriptPath, + const std::vector& args) { + // Ensure scriptPath and args are sanitized + if (scriptPath.empty() || args.empty()) { + std::cerr << "Error: scriptPath or args are empty." << std::endl; + return ERROR_CODE; + } + + // Ensure the scriptPath is an absolute path or default base path + if (scriptPath[0] != '/' && scriptPath.find(DEFAULT_BASE_PATH) != 0) { + std::cerr + << "Error: PROJECT_BASE_PATH must be an absolute path or not be set." + << std::endl; + return ERROR_CODE; + } + + // Prepare arguments for execvp + std::vector execArgs = {PYTHON_EXECUTABLE, scriptPath.c_str()}; + for (const auto& arg : args) { + execArgs.push_back(arg.c_str()); + } + execArgs.push_back(nullptr); + + // Execute the Python script + if (execvp(execArgs[0], const_cast(execArgs.data())) == -1) { + perror("execvp failed"); + return ERROR_CODE; + } + + return SUCCESS_CODE; // This line will never be reached if execvp is + // successful +} + namespace VariableTesting { using float16_t = mdio::dtypes::float_16_t; @@ -567,41 +621,63 @@ TEST(Variable, sliceByDimIdx) { } TEST(Variable, zarrCompatibility) { - const char* basePath = std::getenv("PROJECT_BASE_PATH"); + const char* basePath = std::getenv(PROJECT_BASE_PATH_ENV); if (!basePath) { std::cout << "PROJECT_BASE_PATH environment variable not set. Expecting to " "be in the 'build/mdio' directory." << std::endl; - basePath = "../.."; + basePath = DEFAULT_BASE_PATH; } - std::string srcPath = - std::string(basePath) + "/mdio/regression_tests/zarr_compatibility.py"; - std::string filePathBase = "./zarrs/acceptance/"; - std::string command = "python3 " + srcPath + " " + filePathBase; - - std::string i2 = command + "i2"; - std::string i4 = command + "i4"; - std::string i8 = command + "i8"; - std::string f2 = command + "f2"; - std::string f4 = command + "f4"; - std::string f8 = command + "f8"; - std::string voided = command + "voided"; - - EXPECT_TRUE(std::system(i2.c_str()) == 0) - << "Failed to read i2\n\tMore detailed error expected above..."; - EXPECT_TRUE(std::system(i4.c_str()) == 0) - << "Failed to read i4\n\tMore detailed error expected above..."; - EXPECT_TRUE(std::system(i8.c_str()) == 0) - << "Failed to read i8\n\tMore detailed error expected above..."; - EXPECT_TRUE(std::system(f2.c_str()) == 0) - << "Failed to read f2\n\tMore detailed error expected above..."; - EXPECT_TRUE(std::system(f4.c_str()) == 0) - << "Failed to read f4\n\tMore detailed error expected above..."; - EXPECT_TRUE(std::system(f8.c_str()) == 0) - << "Failed to read f8\n\tMore detailed error expected above..."; - EXPECT_TRUE(std::system(voided.c_str()) == 0) - << "Failed to read voided\n\tMore detailed error expected above..."; + // Resolve the absolute path for the script + std::string srcPath = std::string(basePath) + ZARR_SCRIPT_RELATIVE_PATH; + + // Ensure that srcPath is valid and points to an existing file + if (access(srcPath.c_str(), F_OK) == -1) { + std::cerr << "Error: Python script not found at " << srcPath << std::endl; + FAIL() << "Script not found: " << srcPath; + } + + std::vector args = {"i2", "i4", "i8", "f2", + "f4", "f8", "voided"}; + std::vector pids; + + for (const auto& arg : args) { + pid_t pid = fork(); + if (pid == 0) { + // Child process + int result = executePythonScript(srcPath, {FILE_PATH_BASE + arg}); + if (result == 0xfd00) { // 0xfd from Python is 0xfd00 in C++ + GTEST_SKIP() << "Zarr compatibility skipped due to import error for " + "required library"; + exit(SUCCESS_CODE); + } + exit(result); + } else if (pid > 0) { + // Parent process + pids.push_back(pid); + } else { + // Fork failed + perror("fork failed"); + FAIL() << "fork failed"; + } + } + + // Wait for all child processes + for (pid_t pid : pids) { + int status; + if (waitpid(pid, &status, 0) == -1) { + perror("waitpid failed"); + FAIL() << "waitpid failed"; + } + if (WIFEXITED(status) && WEXITSTATUS(status) == 0xfd00) { + GTEST_SKIP() << "Zarr compatibility skipped due to import error for " + "required library"; + } + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Failed to read one of the arguments\n\tMore detailed error " + "expected above..."; + } } TEST(Variable, dimensionUnits) { @@ -1650,32 +1726,62 @@ TEST(Dataset, isel) { } TEST(Dataset, xarrayCompatible) { - const char* basePath = std::getenv("PROJECT_BASE_PATH"); + const char* basePath = std::getenv(PROJECT_BASE_PATH_ENV); if (!basePath) { std::cout << "PROJECT_BASE_PATH environment variable not set. Expecting to " "be in the 'build/mdio' directory." << std::endl; - basePath = "../.."; + basePath = DEFAULT_BASE_PATH; + } + + // Resolve the absolute path for the script + std::string srcPath = std::string(basePath) + XARRAY_SCRIPT_RELATIVE_PATH; + + // Ensure that srcPath is valid and points to an existing file + if (access(srcPath.c_str(), F_OK) == -1) { + std::cerr << "Error: Python script not found at " << srcPath << std::endl; + FAIL() << "Script not found: " << srcPath; } - std::string srcPath = std::string(basePath) + - "/mdio/regression_tests/xarray_compatibility_test.py"; - std::string datasetPath = "./zarrs/acceptance"; - std::string command = "python3 " + srcPath + " " + datasetPath + " False"; - int status = system(command.c_str()); - if (status == 0xfd00) { // 0xfd from Python is 0xfd00 in C++ - GTEST_SKIP() - << "Xarray compatibility skipped due to import error for xarray"; + std::vector metadataOptions = {"False", "True"}; + std::vector pids; + + for (const auto& option : metadataOptions) { + pid_t pid = fork(); + if (pid == 0) { + // Child process + int result = executePythonScript(srcPath, {FILE_PATH_BASE, option}); + if (result == 0xfd00) { // 0xfd from Python is 0xfd00 in C++ + GTEST_SKIP() + << "Xarray compatibility skipped due to import error for xarray"; + exit(SUCCESS_CODE); + } + exit(result); + } else if (pid > 0) { + // Parent process + pids.push_back(pid); + } else { + // Fork failed + perror("fork failed"); + FAIL() << "fork failed"; + } + } + + // Wait for all child processes + for (pid_t pid : pids) { + int status; + if (waitpid(pid, &status, 0) == -1) { + perror("waitpid failed"); + FAIL() << "waitpid failed"; + } + if (WIFEXITED(status) && WEXITSTATUS(status) == 0xfd00) { + GTEST_SKIP() + << "Xarray compatibility skipped due to import error for xarray"; + } + ASSERT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "xarray compatibility test failed with one of the metadata " + "options\n\tThere was some expected output above..."; } - ASSERT_TRUE(status == 0) - << "xarray compatibility test failed without consolidated " - "metadata\n\tThere was some expected output above..."; - - command = "python3 " + srcPath + " " + datasetPath + " True"; - status = system(command.c_str()); - ASSERT_TRUE(status == 0) - << "xarray compatibility test failed with consolidated metadata\n\tThere " - "was some expected output above..."; } TEST(Dataset, listVars) { diff --git a/mdio/dataset_factory.h b/mdio/dataset_factory.h index cdaa8a3..96b5b8a 100644 --- a/mdio/dataset_factory.h +++ b/mdio/dataset_factory.h @@ -22,6 +22,7 @@ #include #include "mdio/dataset_validator.h" +#include "mdio/impl.h" // #include "tensorstore/tensorstore.h" #include "absl/strings/escaping.h" @@ -183,7 +184,7 @@ absl::Status transform_compressor(nlohmann::json& input /*NOLINT*/, */ void transform_shape( nlohmann::json& input /*NOLINT*/, nlohmann::json& variable /*NOLINT*/, - std::unordered_map& dimensionMap /*NOLINT*/) { + std::unordered_map& dimensionMap /*NOLINT*/) { if (input["dimensions"][0].is_object()) { nlohmann::json shape = nlohmann::json::array(); for (auto& dimension : input["dimensions"]) { @@ -262,7 +263,7 @@ absl::Status transform_metadata(const std::string& path, */ tensorstore::Result from_json_to_spec( nlohmann::json& json /*NOLINT*/, - std::unordered_map& dimensionMap /*NOLINT*/, + std::unordered_map& dimensionMap /*NOLINT*/, const std::string& path) { nlohmann::json variableStub = R"( { @@ -403,12 +404,18 @@ tensorstore::Result from_json_to_spec( * @return A map of dimension names to sizes or error if the dimensions are not * consistently sized */ -tensorstore::Result> get_dimensions( +tensorstore::Result> get_dimensions( nlohmann::json& spec /*NOLINT*/) { - std::unordered_map dimensions; + std::unordered_map dimensions; for (auto& variable : spec["variables"]) { if (variable["dimensions"][0].is_object()) { for (auto& dimension : variable["dimensions"]) { + if (dimension["size"].get() > mdio::constants::kMaxSize) { + return absl::InvalidArgumentError( + "Dimension " + dimension["name"].dump() + + " exceeds maximum size of " + + std::to_string(mdio::constants::kMaxSize)); + } if (dimensions.count(dimension["name"]) == 0) { dimensions[dimension["name"]] = dimension["size"]; } else { @@ -447,7 +454,7 @@ Construct(nlohmann::json& spec /*NOLINT*/, const std::string& path) { return dimensions.status(); } - std::unordered_map dimensionMap = dimensions.value(); + std::unordered_map dimensionMap = dimensions.value(); std::vector datasetSpec; for (auto& variable : spec["variables"]) { diff --git a/mdio/dataset_factory_test.cc b/mdio/dataset_factory_test.cc index 7d6f847..1baf703 100644 --- a/mdio/dataset_factory_test.cc +++ b/mdio/dataset_factory_test.cc @@ -607,6 +607,31 @@ TEST(Variable, simple) { } } +TEST(Variable, maxSizeExceeded) { + nlohmann::json j = nlohmann::json::parse(manifest); + // Set all Variables to exceed the maximum size + j["variables"][0]["dimensions"][0]["size"] = 0x7fffffffffffffff; + j["variables"][0]["dimensions"][1]["size"] = 0x7fffffffffffffff; + j["variables"][1]["dimensions"][0]["size"] = 0x7fffffffffffffff; + j["variables"][2]["dimensions"][0]["size"] = 0x7fffffffffffffff; + + auto res = Construct(j, "zarrs/simple_dataset"); + ASSERT_FALSE(res.status().ok()) + << "Construction succeeded despite exceeding maximum size"; +} + +TEST(Variable, maxSizeReached) { + nlohmann::json j = nlohmann::json::parse(manifest); + // Set all Variables to reach the maximum size + j["variables"][0]["dimensions"][0]["size"] = mdio::constants::kMaxSize; + j["variables"][0]["dimensions"][1]["size"] = mdio::constants::kMaxSize; + j["variables"][1]["dimensions"][0]["size"] = mdio::constants::kMaxSize; + j["variables"][2]["dimensions"][0]["size"] = mdio::constants::kMaxSize; + + auto res = Construct(j, "zarrs/simple_dataset"); + ASSERT_TRUE(res.status().ok()) << res.status(); +} + TEST(Xarray, open) { nlohmann::json j = nlohmann::json::parse(manifest); auto res = Construct(j, "zarrs/simple_dataset"); diff --git a/mdio/impl.h b/mdio/impl.h index 4d48f71..c019186 100644 --- a/mdio/impl.h +++ b/mdio/impl.h @@ -114,6 +114,9 @@ constexpr auto kCreateClean = /// Create a new file or error if it already exists. constexpr auto kCreate = tensorstore::OpenMode::create; +// Tensorstore appears to be imposing a max size of 0x3fffffffffffffff +constexpr uint64_t kMaxSize = 4611686018427387903; + // Supported dtypes constexpr auto kBool = tensorstore::dtype_v; constexpr auto kInt8 = tensorstore::dtype_v;