From 10f2467ad796eee11122b3088d91bed2f2a8912e Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Thu, 17 Oct 2024 18:59:41 +0000 Subject: [PATCH] Fix structured data metadata only trim --- mdio/utils/trim.h | 37 ++++++++++++------------------- mdio/utils/trim_test.cc | 49 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 23 deletions(-) diff --git a/mdio/utils/trim.h b/mdio/utils/trim.h index fcb1099..307a4c2 100644 --- a/mdio/utils/trim.h +++ b/mdio/utils/trim.h @@ -67,34 +67,18 @@ Future TrimDataset(std::string dataset_path, MDIO_ASSIGN_OR_RETURN(auto var, ds.variables.at(varIdentifier)) var.set_metadata_publish_flag(true); - if (var.dimensions().labels().back() == "") { - auto spec = var.spec(); - if (!spec.status().ok()) { - // Something went wrong with Tensorstore retrieving the spec - return spec.status(); - } - auto specJsonResult = spec.value().ToJson(IncludeDefaults{}); - if (!specJsonResult.status().ok()) { - return specJsonResult.status(); - } - // This will fall over if the first dtype is itself structured data - nlohmann::json specJson = - specJsonResult.value()["metadata"]["dtype"][0][0]; - std::string field = specJson.get(); - // If the variable is structured data we will pick the first dimension - // arbitrarially - auto selection = ds.SelectField(varIdentifier, field); - if (!selection.status().ok()) { - return selection.status(); - } - MDIO_ASSIGN_OR_RETURN(var, ds.variables.at(varIdentifier)) - } + bool wasStruct = var.dimensions().labels().back() == ""; auto varStore = var.get_store(); std::vector implicitDims; std::vector newShape; - for (size_t i = 0; i < var.dimensions().shape().size(); i++) { + auto dims = var.dimensions().shape().size(); + if (wasStruct) { + --dims; + } + + for (size_t i = 0; i < dims; i++) { implicitDims.push_back(tensorstore::kImplicit); if (shapeDescriptors.count(var.dimensions().labels()[i]) > 0) { newShape.push_back(shapeDescriptors[var.dimensions().labels()[i]]); @@ -103,9 +87,16 @@ Future TrimDataset(std::string dataset_path, } } + if (wasStruct) { + implicitDims.push_back(tensorstore::kImplicit); + newShape.push_back(tensorstore::kImplicit); + } + tensorstore::ResizeOptions resizeOptions; if (delete_sliced_out_chunks) { resizeOptions.mode = tensorstore::ResizeMode::resize_tied_bounds; + } else if (wasStruct) { + resizeOptions.mode = tensorstore::ResizeMode::resize_metadata_only; } else { resizeOptions.mode = tensorstore::ResizeMode::resize_metadata_only; } diff --git a/mdio/utils/trim_test.cc b/mdio/utils/trim_test.cc index 76e0940..dd5f6f9 100644 --- a/mdio/utils/trim_test.cc +++ b/mdio/utils/trim_test.cc @@ -234,6 +234,55 @@ TEST(TrimDataset, oneSliceData) { } } +TEST(TrimDataset, oneSliceDataNoDelete) { + // Set up the dataset + ASSERT_TRUE(SETUP(kTestPath).status().ok()); + auto dsRes = mdio::Dataset::Open(kTestPath, mdio::constants::kOpen); + ASSERT_TRUE(dsRes.status().ok()) << dsRes.status(); + auto ds = dsRes.value(); + + // Write some data to the inline variable + auto inlineVarRes = ds.variables.get("inline"); + ASSERT_TRUE(inlineVarRes.status().ok()) << inlineVarRes.status(); + auto inlineVar = inlineVarRes.value(); + + auto inlineVarFuture = inlineVar.Read(); + ASSERT_TRUE(inlineVarFuture.status().ok()) << inlineVarFuture.status(); + auto inlineVarData = inlineVarFuture.value(); + + auto inlineDataAccessor = inlineVarData.get_data_accessor(); + + for (int i = 0; i < 256; ++i) { + inlineDataAccessor({i}) = i + 256; + } + + auto writeFuture = inlineVar.Write(inlineVarData); + ASSERT_TRUE(writeFuture.status().ok()) << writeFuture.status(); + + // Trim outside of a chunk boundry + mdio::RangeDescriptor slice = {"inline", 0, 128, 1}; + auto res = mdio::utils::TrimDataset(kTestPath, false, slice); + ASSERT_TRUE(res.status().ok()) << res.status(); + + auto newDsRes = mdio::Dataset::Open(kTestPath, mdio::constants::kOpen); + ASSERT_TRUE(newDsRes.status().ok()) << newDsRes.status(); + auto newDs = newDsRes.value(); + + std::string name = "inline"; + auto varRes = newDs.get_variable(name); + ASSERT_TRUE(varRes.status().ok()) << varRes.status(); + auto var = varRes.value(); + auto varFuture = var.Read(); + ASSERT_TRUE(varFuture.status().ok()) << varFuture.status(); + auto varData = varFuture.value(); + + auto varDataAccessor = reinterpret_cast( + varData.get_data_accessor().data()); + for (int i = 0; i < 128; ++i) { + EXPECT_EQ(varDataAccessor[i], i + 256) << "i: " << i; + } +} + TEST(TrimDataset, metadataConsistency) { ASSERT_TRUE(SETUP(kTestPath).status().ok()); nlohmann::json imageData;