Skip to content

Commit

Permalink
Fix dimension size being handled as 32-bit int
Browse files Browse the repository at this point in the history
  • Loading branch information
BrianMichell committed Oct 23, 2024
1 parent aff0cfb commit 23fc159
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 5 deletions.
16 changes: 11 additions & 5 deletions mdio/dataset_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ absl::Status transform_compressor(nlohmann::json& input /*NOLINT*/,
*/
void transform_shape(
nlohmann::json& input /*NOLINT*/, nlohmann::json& variable /*NOLINT*/,
std::unordered_map<std::string, int>& dimensionMap /*NOLINT*/) {
std::unordered_map<std::string, uint64_t>& dimensionMap /*NOLINT*/) {
if (input["dimensions"][0].is_object()) {
nlohmann::json shape = nlohmann::json::array();
for (auto& dimension : input["dimensions"]) {
Expand Down Expand Up @@ -262,7 +262,7 @@ absl::Status transform_metadata(const std::string& path,
*/
tensorstore::Result<nlohmann::json> from_json_to_spec(
nlohmann::json& json /*NOLINT*/,
std::unordered_map<std::string, int>& dimensionMap /*NOLINT*/,
std::unordered_map<std::string, uint64_t>& dimensionMap /*NOLINT*/,
const std::string& path) {
nlohmann::json variableStub = R"(
{
Expand Down Expand Up @@ -403,12 +403,18 @@ tensorstore::Result<nlohmann::json> from_json_to_spec(
* @return A map of dimension names to sizes or error if the dimensions are not
* consistently sized
*/
tensorstore::Result<std::unordered_map<std::string, int>> get_dimensions(
tensorstore::Result<std::unordered_map<std::string, uint64_t>> get_dimensions(
nlohmann::json& spec /*NOLINT*/) {
std::unordered_map<std::string, int> dimensions;
std::unordered_map<std::string, uint64_t> dimensions;
for (auto& variable : spec["variables"]) {
if (variable["dimensions"][0].is_object()) {
for (auto& dimension : variable["dimensions"]) {
if (dimension["size"].get<uint64_t>() > mdio::constants::kMaxSize) {
return absl::InvalidArgumentError(
"Dimension " + dimension["name"].dump() +
" exceeds maximum size of " +
std::to_string(mdio::constants::kMaxSize));
}
if (dimensions.count(dimension["name"]) == 0) {
dimensions[dimension["name"]] = dimension["size"];
} else {
Expand Down Expand Up @@ -447,7 +453,7 @@ Construct(nlohmann::json& spec /*NOLINT*/, const std::string& path) {
return dimensions.status();
}

std::unordered_map<std::string, int> dimensionMap = dimensions.value();
std::unordered_map<std::string, uint64_t> dimensionMap = dimensions.value();

std::vector<nlohmann::json> datasetSpec;
for (auto& variable : spec["variables"]) {
Expand Down
25 changes: 25 additions & 0 deletions mdio/dataset_factory_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,31 @@ TEST(Variable, simple) {
}
}

TEST(Variable, maxSizeExceeded) {
nlohmann::json j = nlohmann::json::parse(manifest);
// Set all Variables to exceed the maximum size
j["variables"][0]["dimensions"][0]["size"] = 0x7fffffffffffffff;
j["variables"][0]["dimensions"][1]["size"] = 0x7fffffffffffffff;
j["variables"][1]["dimensions"][0]["size"] = 0x7fffffffffffffff;
j["variables"][2]["dimensions"][0]["size"] = 0x7fffffffffffffff;

auto res = Construct(j, "zarrs/simple_dataset");
ASSERT_FALSE(res.status().ok())
<< "Construction succeeded despite exceeding maximum size";
}

TEST(Variable, maxSizeReached) {
nlohmann::json j = nlohmann::json::parse(manifest);
// Set all Variables to reach the maximum size
j["variables"][0]["dimensions"][0]["size"] = mdio::constants::kMaxSize;
j["variables"][0]["dimensions"][1]["size"] = mdio::constants::kMaxSize;
j["variables"][1]["dimensions"][0]["size"] = mdio::constants::kMaxSize;
j["variables"][2]["dimensions"][0]["size"] = mdio::constants::kMaxSize;

auto res = Construct(j, "zarrs/simple_dataset");
ASSERT_TRUE(res.status().ok()) << res.status();
}

TEST(Xarray, open) {
nlohmann::json j = nlohmann::json::parse(manifest);
auto res = Construct(j, "zarrs/simple_dataset");
Expand Down
3 changes: 3 additions & 0 deletions mdio/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ constexpr auto kCreateClean =
/// Create a new file or error if it already exists.
constexpr auto kCreate = tensorstore::OpenMode::create;

// Tensorstore appears to be imposing a max size of 0x3fffffffffffffff
constexpr uint64_t kMaxSize = 4611686018427387903;

// Supported dtypes
constexpr auto kBool = tensorstore::dtype_v<mdio::dtypes::bool_t>;
constexpr auto kInt8 = tensorstore::dtype_v<mdio::dtypes::int8_t>;
Expand Down

0 comments on commit 23fc159

Please sign in to comment.