Skip to content

Commit

Permalink
Merge pull request #132 from TGSAI/131_Trim_struct_metadata
Browse files Browse the repository at this point in the history
Fix structured data metadata only trim
  • Loading branch information
markspec authored Oct 22, 2024
2 parents 5f270f0 + 10f2467 commit 0771808
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 23 deletions.
37 changes: 14 additions & 23 deletions mdio/utils/trim.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,34 +67,18 @@ Future<void> TrimDataset(std::string dataset_path,
MDIO_ASSIGN_OR_RETURN(auto var, ds.variables.at(varIdentifier))
var.set_metadata_publish_flag(true);

if (var.dimensions().labels().back() == "") {
auto spec = var.spec();
if (!spec.status().ok()) {
// Something went wrong with Tensorstore retrieving the spec
return spec.status();
}
auto specJsonResult = spec.value().ToJson(IncludeDefaults{});
if (!specJsonResult.status().ok()) {
return specJsonResult.status();
}
// This will fall over if the first dtype is itself structured data
nlohmann::json specJson =
specJsonResult.value()["metadata"]["dtype"][0][0];
std::string field = specJson.get<std::string>();
// If the variable is structured data we will pick the first dimension
// arbitrarially
auto selection = ds.SelectField(varIdentifier, field);
if (!selection.status().ok()) {
return selection.status();
}
MDIO_ASSIGN_OR_RETURN(var, ds.variables.at(varIdentifier))
}
bool wasStruct = var.dimensions().labels().back() == "";

auto varStore = var.get_store();
std::vector<tensorstore::Index> implicitDims;
std::vector<tensorstore::Index> newShape;

for (size_t i = 0; i < var.dimensions().shape().size(); i++) {
auto dims = var.dimensions().shape().size();
if (wasStruct) {
--dims;
}

for (size_t i = 0; i < dims; i++) {
implicitDims.push_back(tensorstore::kImplicit);
if (shapeDescriptors.count(var.dimensions().labels()[i]) > 0) {
newShape.push_back(shapeDescriptors[var.dimensions().labels()[i]]);
Expand All @@ -103,9 +87,16 @@ Future<void> TrimDataset(std::string dataset_path,
}
}

if (wasStruct) {
implicitDims.push_back(tensorstore::kImplicit);
newShape.push_back(tensorstore::kImplicit);
}

tensorstore::ResizeOptions resizeOptions;
if (delete_sliced_out_chunks) {
resizeOptions.mode = tensorstore::ResizeMode::resize_tied_bounds;
} else if (wasStruct) {
resizeOptions.mode = tensorstore::ResizeMode::resize_metadata_only;
} else {
resizeOptions.mode = tensorstore::ResizeMode::resize_metadata_only;
}
Expand Down
49 changes: 49 additions & 0 deletions mdio/utils/trim_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,55 @@ TEST(TrimDataset, oneSliceData) {
}
}

TEST(TrimDataset, oneSliceDataNoDelete) {
// Set up the dataset
ASSERT_TRUE(SETUP(kTestPath).status().ok());
auto dsRes = mdio::Dataset::Open(kTestPath, mdio::constants::kOpen);
ASSERT_TRUE(dsRes.status().ok()) << dsRes.status();
auto ds = dsRes.value();

// Write some data to the inline variable
auto inlineVarRes = ds.variables.get<mdio::dtypes::uint32_t>("inline");
ASSERT_TRUE(inlineVarRes.status().ok()) << inlineVarRes.status();
auto inlineVar = inlineVarRes.value();

auto inlineVarFuture = inlineVar.Read();
ASSERT_TRUE(inlineVarFuture.status().ok()) << inlineVarFuture.status();
auto inlineVarData = inlineVarFuture.value();

auto inlineDataAccessor = inlineVarData.get_data_accessor();

for (int i = 0; i < 256; ++i) {
inlineDataAccessor({i}) = i + 256;
}

auto writeFuture = inlineVar.Write(inlineVarData);
ASSERT_TRUE(writeFuture.status().ok()) << writeFuture.status();

// Trim outside of a chunk boundry
mdio::RangeDescriptor<mdio::Index> slice = {"inline", 0, 128, 1};
auto res = mdio::utils::TrimDataset(kTestPath, false, slice);
ASSERT_TRUE(res.status().ok()) << res.status();

auto newDsRes = mdio::Dataset::Open(kTestPath, mdio::constants::kOpen);
ASSERT_TRUE(newDsRes.status().ok()) << newDsRes.status();
auto newDs = newDsRes.value();

std::string name = "inline";
auto varRes = newDs.get_variable(name);
ASSERT_TRUE(varRes.status().ok()) << varRes.status();
auto var = varRes.value();
auto varFuture = var.Read();
ASSERT_TRUE(varFuture.status().ok()) << varFuture.status();
auto varData = varFuture.value();

auto varDataAccessor = reinterpret_cast<mdio::dtypes::uint32_t*>(
varData.get_data_accessor().data());
for (int i = 0; i < 128; ++i) {
EXPECT_EQ(varDataAccessor[i], i + 256) << "i: " << i;
}
}

TEST(TrimDataset, metadataConsistency) {
ASSERT_TRUE(SETUP(kTestPath).status().ok());
nlohmann::json imageData;
Expand Down

0 comments on commit 0771808

Please sign in to comment.