diff --git a/mdio/dataset_factory.h b/mdio/dataset_factory.h index cdaa8a3..4f30b9a 100644 --- a/mdio/dataset_factory.h +++ b/mdio/dataset_factory.h @@ -183,7 +183,7 @@ absl::Status transform_compressor(nlohmann::json& input /*NOLINT*/, */ void transform_shape( nlohmann::json& input /*NOLINT*/, nlohmann::json& variable /*NOLINT*/, - std::unordered_map& dimensionMap /*NOLINT*/) { + std::unordered_map& dimensionMap /*NOLINT*/) { if (input["dimensions"][0].is_object()) { nlohmann::json shape = nlohmann::json::array(); for (auto& dimension : input["dimensions"]) { @@ -262,7 +262,7 @@ absl::Status transform_metadata(const std::string& path, */ tensorstore::Result from_json_to_spec( nlohmann::json& json /*NOLINT*/, - std::unordered_map& dimensionMap /*NOLINT*/, + std::unordered_map& dimensionMap /*NOLINT*/, const std::string& path) { nlohmann::json variableStub = R"( { @@ -403,12 +403,18 @@ tensorstore::Result from_json_to_spec( * @return A map of dimension names to sizes or error if the dimensions are not * consistently sized */ -tensorstore::Result> get_dimensions( +tensorstore::Result> get_dimensions( nlohmann::json& spec /*NOLINT*/) { - std::unordered_map dimensions; + std::unordered_map dimensions; for (auto& variable : spec["variables"]) { if (variable["dimensions"][0].is_object()) { for (auto& dimension : variable["dimensions"]) { + if (dimension["size"].get() > mdio::constants::kMaxSize) { + return absl::InvalidArgumentError( + "Dimension " + dimension["name"].dump() + + " exceeds maximum size of " + + std::to_string(mdio::constants::kMaxSize)); + } if (dimensions.count(dimension["name"]) == 0) { dimensions[dimension["name"]] = dimension["size"]; } else { @@ -447,7 +453,7 @@ Construct(nlohmann::json& spec /*NOLINT*/, const std::string& path) { return dimensions.status(); } - std::unordered_map dimensionMap = dimensions.value(); + std::unordered_map dimensionMap = dimensions.value(); std::vector datasetSpec; for (auto& variable : spec["variables"]) { diff --git a/mdio/dataset_factory_test.cc b/mdio/dataset_factory_test.cc index 7d6f847..1baf703 100644 --- a/mdio/dataset_factory_test.cc +++ b/mdio/dataset_factory_test.cc @@ -607,6 +607,31 @@ TEST(Variable, simple) { } } +TEST(Variable, maxSizeExceeded) { + nlohmann::json j = nlohmann::json::parse(manifest); + // Set all Variables to exceed the maximum size + j["variables"][0]["dimensions"][0]["size"] = 0x7fffffffffffffff; + j["variables"][0]["dimensions"][1]["size"] = 0x7fffffffffffffff; + j["variables"][1]["dimensions"][0]["size"] = 0x7fffffffffffffff; + j["variables"][2]["dimensions"][0]["size"] = 0x7fffffffffffffff; + + auto res = Construct(j, "zarrs/simple_dataset"); + ASSERT_FALSE(res.status().ok()) + << "Construction succeeded despite exceeding maximum size"; +} + +TEST(Variable, maxSizeReached) { + nlohmann::json j = nlohmann::json::parse(manifest); + // Set all Variables to reach the maximum size + j["variables"][0]["dimensions"][0]["size"] = mdio::constants::kMaxSize; + j["variables"][0]["dimensions"][1]["size"] = mdio::constants::kMaxSize; + j["variables"][1]["dimensions"][0]["size"] = mdio::constants::kMaxSize; + j["variables"][2]["dimensions"][0]["size"] = mdio::constants::kMaxSize; + + auto res = Construct(j, "zarrs/simple_dataset"); + ASSERT_TRUE(res.status().ok()) << res.status(); +} + TEST(Xarray, open) { nlohmann::json j = nlohmann::json::parse(manifest); auto res = Construct(j, "zarrs/simple_dataset"); diff --git a/mdio/impl.h b/mdio/impl.h index 4d48f71..c019186 100644 --- a/mdio/impl.h +++ b/mdio/impl.h @@ -114,6 +114,9 @@ constexpr auto kCreateClean = /// Create a new file or error if it already exists. constexpr auto kCreate = tensorstore::OpenMode::create; +// Tensorstore appears to be imposing a max size of 0x3fffffffffffffff +constexpr uint64_t kMaxSize = 4611686018427387903; + // Supported dtypes constexpr auto kBool = tensorstore::dtype_v; constexpr auto kInt8 = tensorstore::dtype_v;