Skip to content

Commit

Permalink
Merge branch 'main' into 120_slice_converter
Browse files Browse the repository at this point in the history
  • Loading branch information
BrianMichell authored Sep 19, 2024
2 parents dae2796 + 2c74346 commit 7ada18c
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 54 deletions.
32 changes: 17 additions & 15 deletions mdio/acceptance_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -461,20 +461,22 @@ TEST(Variable, optionalAttrs) {
ASSERT_TRUE(f8.status().ok()) << f8.status();
ASSERT_TRUE(voided.status().ok()) << voided.status();

EXPECT_EQ(i2.value().getMetadata()["attributes"]["foo"], "bar")
<< i2.value().getMetadata();
EXPECT_EQ(i4.value().getMetadata()["attributes"]["foo"], "bar")
<< i4.value().getMetadata();
EXPECT_EQ(i8.value().getMetadata()["attributes"]["foo"], "bar")
<< i8.value().getMetadata();
EXPECT_EQ(f2.value().getMetadata()["attributes"]["foo"], "bar")
<< f2.value().getMetadata();
EXPECT_EQ(f4.value().getMetadata()["attributes"]["foo"], "bar")
<< f4.value().getMetadata();
EXPECT_EQ(f8.value().getMetadata()["attributes"]["foo"], "bar")
<< f8.value().getMetadata();
EXPECT_EQ(voided.value().getMetadata()["attributes"]["foo"], "bar")
<< voided.value().getMetadata();
std::cout << i2.value().GetAttributes().dump(4) << std::endl;

EXPECT_EQ(i2.value().GetAttributes()["attributes"]["foo"], "bar")
<< i2.value().GetAttributes();
EXPECT_EQ(i4.value().GetAttributes()["attributes"]["foo"], "bar")
<< i4.value().GetAttributes();
EXPECT_EQ(i8.value().GetAttributes()["attributes"]["foo"], "bar")
<< i8.value().GetAttributes();
EXPECT_EQ(f2.value().GetAttributes()["attributes"]["foo"], "bar")
<< f2.value().GetAttributes();
EXPECT_EQ(f4.value().GetAttributes()["attributes"]["foo"], "bar")
<< f4.value().GetAttributes();
EXPECT_EQ(f8.value().GetAttributes()["attributes"]["foo"], "bar")
<< f8.value().GetAttributes();
EXPECT_EQ(voided.value().GetAttributes()["attributes"]["foo"], "bar")
<< voided.value().GetAttributes();
}

TEST(Variable, namedDimensions) {
Expand Down Expand Up @@ -964,7 +966,7 @@ TEST(VariableData, longName) {

TEST(VariableData, optionalAttrs) {
auto variableData = getVariable().Read().value();
EXPECT_EQ(variableData.metadata["attributes"]["foo"], "bar")
EXPECT_EQ(variableData.metadata["metadata"]["attributes"]["foo"], "bar")
<< variableData.metadata.dump(4);
}

Expand Down
22 changes: 14 additions & 8 deletions mdio/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,17 +165,21 @@ Future<void> write_zmetadata(
MDIO_ASSIGN_OR_RETURN(zmetadata["metadata"][zarray_key], get_zarray(json))

nlohmann::json fixedJson = json["attributes"];
fixedJson["_ARRAY_DIMENSIONS"] = json["attributes"]["dimension_names"];
fixedJson["_ARRAY_DIMENSIONS"] = fixedJson["dimension_names"];
fixedJson.erase("dimension_names");
// We do not want to be seralizing the variable_name. It should be
// self-describing
fixedJson.erase("variable_name");
if (fixedJson.contains("variable_name")) {
fixedJson.erase("variable_name");
}
if (fixedJson.contains("long_name") &&
fixedJson["long_name"].get<std::string>() == "") {
fixedJson.erase("long_name");
}
if (fixedJson.contains("metadata")) {
fixedJson["metadata"].erase("chunkGrid");
if (fixedJson["metadata"].contains("chunkGrid")) {
fixedJson["metadata"].erase("chunkGrid");
}
for (auto& item : fixedJson["metadata"].items()) {
fixedJson[item.key()] = std::move(item.value());
}
Expand Down Expand Up @@ -216,23 +220,23 @@ Future<void> write_zmetadata(
tensorstore::InlineExecutor{},
[zattrs = std::move(zattrs)](const tensorstore::KvStore& kvstore) {
return tensorstore::kvstore::Write(kvstore, "/.zattrs",
absl::Cord(zattrs.dump()));
absl::Cord(zattrs.dump(4)));
},
kvs_future);

auto zmetadata_future = tensorstore::MapFutureValue(
tensorstore::InlineExecutor{},
[zmetadata = std::move(zmetadata)](const tensorstore::KvStore& kvstore) {
return tensorstore::kvstore::Write(kvstore, "/.zmetadata",
absl::Cord(zmetadata.dump()));
absl::Cord(zmetadata.dump(4)));
},
kvs_future);

auto zgroup_future = tensorstore::MapFutureValue(
tensorstore::InlineExecutor{},
[zgroup = std::move(zgroup)](const tensorstore::KvStore& kvstore) {
return tensorstore::kvstore::Write(kvstore, "/.zgroup",
absl::Cord(zgroup.dump()));
absl::Cord(zgroup.dump(4)));
},
kvs_future);

Expand Down Expand Up @@ -405,7 +409,7 @@ class Dataset {

friend std::ostream& operator<<(std::ostream& os, const Dataset& dataset) {
// Output metadata
os << "Metadata: " << dataset.metadata.dump() << "\n";
os << "Metadata: " << dataset.metadata.dump(4) << "\n";

// Output variables
const auto keys = dataset.variables.get_iterable_accessor();
Expand Down Expand Up @@ -1329,7 +1333,9 @@ class Dataset {
meta.erase("long_name");
}
if (meta.contains("metadata")) {
meta["metadata"].erase("chunkGrid"); // We never serialize this
if (meta["metadata"].contains("chunkGrid")) {
meta["metadata"].erase("chunkGrid"); // We never serialize this
}
if (!meta.contains("attributes")) {
meta["attributes"] = nlohmann::json::object();
}
Expand Down
16 changes: 9 additions & 7 deletions mdio/dataset_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -932,20 +932,22 @@ TEST(Dataset, commitMetadata) {
ASSERT_TRUE(newImageRes.ok()) << newImageRes.status();

nlohmann::json metadata = newImageRes.value().getMetadata();
ASSERT_TRUE(metadata.contains("statsV1"))
ASSERT_TRUE(metadata.contains("metadata")) << metadata;
ASSERT_TRUE(metadata["metadata"].contains("statsV1"))
<< "Did not find statsV1 in metadata";
ASSERT_TRUE(metadata["statsV1"].contains("histogram"))
ASSERT_TRUE(metadata["metadata"]["statsV1"].contains("histogram"))
<< "Did not find histogram in statsV1";
ASSERT_TRUE(metadata["statsV1"]["histogram"].contains("binCenters"))
ASSERT_TRUE(
metadata["metadata"]["statsV1"]["histogram"].contains("binCenters"))
<< "Did not find binCenters in histogram";
EXPECT_TRUE(metadata["statsV1"]["histogram"]["binCenters"] ==
EXPECT_TRUE(metadata["metadata"]["statsV1"]["histogram"]["binCenters"] ==
std::vector<float>({2, 4, 6}))
<< "Expected binCenters to be [2, 4, 6] but got "
<< metadata["statsV1"]["histogram"]["binCenters"];
EXPECT_TRUE(metadata["statsV1"]["histogram"]["counts"] ==
<< metadata["metadata"]["statsV1"]["histogram"]["binCenters"];
EXPECT_TRUE(metadata["metadata"]["statsV1"]["histogram"]["counts"] ==
std::vector<float>({10, 15, 20}))
<< "Expected counts to be [10, 15, 20] but got "
<< metadata["statsV1"]["histogram"]["counts"];
<< metadata["metadata"]["statsV1"]["histogram"]["counts"];
}

TEST(Dataset, commitSlicedMetadata) {
Expand Down
21 changes: 21 additions & 0 deletions mdio/utils/trim_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -234,4 +234,25 @@ TEST(TrimDataset, oneSliceData) {
}
}

TEST(TrimDataset, metadataConsistency) {
ASSERT_TRUE(SETUP(kTestPath).status().ok());
nlohmann::json imageData;
{
auto dsFut = mdio::Dataset::Open(kTestPath, mdio::constants::kOpen);
ASSERT_TRUE(dsFut.status().ok()) << dsFut.status();
auto ds = dsFut.value();
auto imageVarRes = ds.variables.at("image");
ASSERT_TRUE(imageVarRes.status().ok()) << imageVarRes.status();
imageData = imageVarRes.value().getMetadata();
}
mdio::RangeDescriptor<mdio::Index> slice = {"inline", 0, 128, 1};
auto res = mdio::utils::TrimDataset(kTestPath, true, slice);
ASSERT_TRUE(res.status().ok()) << res.status();
auto dsRes = mdio::Dataset::Open(kTestPath, mdio::constants::kOpen);
ASSERT_TRUE(dsRes.status().ok()) << dsRes.status();
auto imageVarRes = dsRes.value().variables.at("image");
ASSERT_TRUE(imageVarRes.status().ok()) << imageVarRes.status();
EXPECT_EQ(imageVarRes.value().getMetadata(), imageData);
}

} // namespace
84 changes: 60 additions & 24 deletions mdio/variable.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,13 @@ Result<Variable<T, R, M>> from_json(

nlohmann::json scrubbed_spec = attributes;
scrubbed_spec.erase("variable_name");
auto attrsRes = UserAttributes::FromVariableJson(spec);
auto specWithoutChunkgrid = spec;
if (specWithoutChunkgrid.contains("metadata")) {
if (specWithoutChunkgrid["metadata"].contains("chunkGrid")) {
specWithoutChunkgrid["metadata"].erase("chunkGrid");
}
}
auto attrsRes = UserAttributes::FromVariableJson(specWithoutChunkgrid);
if (!attrsRes.ok()) {
return attrsRes.status();
}
Expand All @@ -331,8 +337,15 @@ Result<Variable<T, R, M>> from_json(
// These live in our `UserAttributes` object and are technically mutable. The
// best kind of mutable!
if (scrubbed_spec.contains("metadata")) {
scrubbed_spec["metadata"].erase("attributes");
scrubbed_spec["metadata"].erase("statsV1");
if (scrubbed_spec["metadata"].contains("attributes")) {
scrubbed_spec["metadata"].erase("attributes");
}
if (scrubbed_spec["metadata"].contains("statsV1")) {
scrubbed_spec["metadata"].erase("statsV1");
}
if (scrubbed_spec["metadata"].contains("unitsV1")) {
scrubbed_spec["metadata"].erase("unitsV1");
}
}

std::shared_ptr<std::shared_ptr<UserAttributes>> attrs =
Expand Down Expand Up @@ -444,7 +457,7 @@ Future<Variable<T, R, M>> CreateVariable(const nlohmann::json& json_spec,
// It's important to use the store's kvstore or else we get a race condition
// on "mkdir".
return tensorstore::kvstore::Write(store.kvstore(), outpath,
absl::Cord(output_json.dump()));
absl::Cord(output_json.dump(4)));
};

// this is intended to handle the struct array where we "reopen" the store
Expand Down Expand Up @@ -587,19 +600,18 @@ Future<Variable<T, R, M>> OpenVariable(const nlohmann::json& json_store,

// Create a new JSON object with "attributes" as the parent key
::nlohmann::json new_metadata;
new_metadata["attributes"] = updated_metadata;
new_metadata["attributes"]["variable_name"] = variable_name;
new_metadata = updated_metadata;
new_metadata["variable_name"] = variable_name;

if (new_metadata["attributes"].contains("_ARRAY_DIMENSIONS")) {
if (new_metadata.contains("_ARRAY_DIMENSIONS")) {
// Move "_ARRAY_DIMENSIONS" to "dimension_names"
new_metadata["attributes"]["dimension_names"] =
new_metadata["attributes"]["_ARRAY_DIMENSIONS"];
new_metadata["attributes"].erase("_ARRAY_DIMENSIONS");
new_metadata["dimension_names"] = new_metadata["_ARRAY_DIMENSIONS"];
new_metadata.erase("_ARRAY_DIMENSIONS");
}

if (new_metadata["attributes"].contains("dimension_names")) {
auto dimension_names = new_metadata["attributes"]["dimension_names"]
.get<std::vector<std::string>>();
if (new_metadata.contains("dimension_names")) {
auto dimension_names =
new_metadata["dimension_names"].get<std::vector<std::string>>();
for (DimensionIndex i = 0; i < dimension_names.size(); ++i) {
MDIO_ASSIGN_OR_RETURN(
labeled_store,
Expand All @@ -610,14 +622,30 @@ Future<Variable<T, R, M>> OpenVariable(const nlohmann::json& json_store,
absl::StrCat("Field not found in JSON: ", "metadata"));
}

if (new_metadata.contains("attributes")) {
new_metadata["metadata"]["attributes"] = new_metadata["attributes"];
new_metadata.erase("attributes");
}
if (new_metadata.contains("statsV1")) {
new_metadata["metadata"]["statsV1"] = new_metadata["statsV1"];
new_metadata.erase("statsV1");
}
if (new_metadata.contains("unitsV1")) {
new_metadata["metadata"]["unitsV1"] = new_metadata["unitsV1"];
new_metadata.erase("unitsV1");
}

if (!suppliedAttributes.is_null()) {
// The supplied attributes contain some things that we do not serialize.
// We need to remove them. This could cause confusion. If the user
// specifies a different chunkGrid, it will not be used and should
// actually fail here.
nlohmann::json correctedSuppliedAttrs = suppliedAttributes;
if (correctedSuppliedAttrs["attributes"].contains("metadata")) {
correctedSuppliedAttrs["attributes"]["metadata"].erase("chunkGrid");
if (correctedSuppliedAttrs["attributes"]["metadata"].contains(
"chunkGrid")) {
correctedSuppliedAttrs["attributes"]["metadata"].erase("chunkGrid");
}
for (auto& item :
correctedSuppliedAttrs["attributes"]["metadata"].items()) {
correctedSuppliedAttrs["attributes"][item.key()] =
Expand All @@ -627,9 +655,10 @@ Future<Variable<T, R, M>> OpenVariable(const nlohmann::json& json_store,
}
// BFS to make sure supplied attributes match stored attributes
nlohmann::json searchableMetadata = new_metadata;
searchableMetadata["attributes"].erase(
"variable_name"); // Since we don't actually want to have to specify
// the variable name
if (searchableMetadata["attributes"].contains("variable_name")) {
// Since we don't actually want to have to specify the variable name
searchableMetadata["attributes"].erase("variable_name");
}
if (searchableMetadata["attributes"].contains("metadata")) {
for (auto& item :
searchableMetadata["attributes"]["metadata"].items()) {
Expand All @@ -655,7 +684,7 @@ Future<Variable<T, R, M>> OpenVariable(const nlohmann::json& json_store,
} else if (value != currentAttributes[key]) {
return absl::InvalidArgumentError(absl::StrCat(
"Conflicting values for field: ", key, ". ", "Expected: ",
value.dump(), ", but got: ", currentAttributes[key].dump()));
value.dump(4), ", but got: ", currentAttributes[key].dump(4)));
}
}
}
Expand Down Expand Up @@ -1238,21 +1267,25 @@ class Variable {
output_json.erase("dimension_names");
if (output_json.contains("long_name")) {
output_json["attributes"]["long_name"] = output_json["long_name"];
output_json.erase("long_name");
}
if (output_json.contains("coordinates")) {
output_json["attributes"]["coordinates"] = output_json["coordinates"];
output_json.erase("coordinates");
}
std::string outpath = "/.zattrs";
if (isCloudStore) {
outpath = ".zattrs";
}

if (output_json["attributes"].contains("metadata")) {
auto metadata = output_json["attributes"]["metadata"];
output_json["attributes"].erase("metadata");
output_json["attributes"].merge_patch(metadata);
if (output_json.contains("metadata")) {
output_json["attributes"]["metadata"] = output_json["metadata"];
output_json.erase("metadata");
}

return tensorstore::kvstore::Write(
store.kvstore(), outpath,
absl::Cord(output_json["attributes"].dump()));
absl::Cord(output_json["attributes"].dump(4)));
};

bool isCloudStore = false;
Expand Down Expand Up @@ -1339,7 +1372,10 @@ class Variable {
auto attrs = GetAttributes();
// Check for not being a `nlohmann::json::object()`
if (attrs.is_object() && !attrs.empty()) {
ret["attributes"]["metadata"] = attrs;
if (!ret.contains("metadata")) {
ret["metadata"] = nlohmann::json::object();
}
ret["metadata"].merge_patch(attrs);
}
return ret;
}
Expand Down

0 comments on commit 7ada18c

Please sign in to comment.