From ca1f1482ae13950c7349a915417e1c651890dca5 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 5 Aug 2024 19:40:33 +0000
Subject: [PATCH 1/4] Re-enabled address change tracking

---
 mdio/variable.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mdio/variable.h b/mdio/variable.h
index 364bb19..2577bc6 100644
--- a/mdio/variable.h
+++ b/mdio/variable.h
@@ -1132,9 +1132,7 @@ class Variable {
     if (attributes.get() != nullptr && attributes->get() != nullptr) {
       std::uintptr_t newAddress =
           reinterpret_cast<std::uintptr_t>(&(**attributes));
-      // TODO(BrianMichell): Leaving this as active is causing segfaults.
-      //   The features requiring it are low priority.
-      // attributesAddress = newAddress;
+      attributesAddress = newAddress;
     }
     // It is fine that this will only change in the "collection" instance of the
     // Variable, because that is the only one that will be operated on by the

From 762f4e05901444444be3b4d87ea81f555a706b6a Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 5 Aug 2024 19:41:45 +0000
Subject: [PATCH 2/4] Added true modified Variable tracking back

---
 mdio/dataset.h  |  9 ++++++++-
 mdio/utils.h    |  1 +
 mdio/variable.h | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/mdio/dataset.h b/mdio/dataset.h
index 394e0c1..05a0924 100644
--- a/mdio/dataset.h
+++ b/mdio/dataset.h
@@ -834,7 +834,14 @@ class Dataset {
     // Build out list of modified variables
     std::vector<std::string> modifiedVariables;
     for (const auto& key : keys) {
-      modifiedVariables.push_back(key);
+      auto var = variables.at(key).value();
+      if (var.was_updated() || var.should_publish()) {
+        modifiedVariables.push_back(key);
+      }
+      if (var.should_publish()) {
+        // Reset the flag. This should only get set by the trim util.
+        var.set_metadata_publish_flag(false);
+      }
     }
 
     // If nothing changed, we don't want to perform any writes
diff --git a/mdio/utils.h b/mdio/utils.h
index 3984a8e..f3b5b5e 100644
--- a/mdio/utils.h
+++ b/mdio/utils.h
@@ -62,6 +62,7 @@ Future<void> TrimDataset(std::string dataset_path,
 
   for (auto& varIdentifier : ds.variables.get_iterable_accessor()) {
     MDIO_ASSIGN_OR_RETURN(auto var, ds.variables.at(varIdentifier))
+    var.set_metadata_publish_flag(true);
 
     if (var.dimensions().labels().back() == "") {
       auto spec = var.spec();
diff --git a/mdio/variable.h b/mdio/variable.h
index 2577bc6..d0fa4ae 100644
--- a/mdio/variable.h
+++ b/mdio/variable.h
@@ -1098,6 +1098,22 @@ class Variable {
     return attributesAddress != currentAddress;
   }
 
+  /**
+   * @brief Sets the flag whether the metadata should get republished.
+   * Intended for internal use with the trimming utility.
+   *
+   * @param shouldPublish True if the metadata should get republished.
+   */
+  void set_metadata_publish_flag(const bool shouldPublish) {
+    if (!toPublish) {
+      // This should never be the case, but better safe than sorry
+      toPublish = std::make_shared<std::shared_ptr<bool>>(
+          std::make_shared<bool>(shouldPublish));
+    } else {
+      *toPublish = std::make_shared<bool>(shouldPublish);
+    }
+  }
+
   // ===========================Member data getters===========================
   const std::string& get_variable_name() const { return variableName; }
 
@@ -1118,6 +1134,20 @@ class Variable {
     return attributesAddress;
   }
 
+  /**
+   * @brief Gets whether the metadata should get republished.
+   *
+   * @return True if the metadata should get republished.
+   */
+  bool should_publish() const {
+    if (toPublish && *toPublish) {
+      // Deref the shared_ptr so we're not increasing refcount
+      return **toPublish;
+    }
+    // If the flag was a nullptr, err on the side of caution and republish
+    return true;
+  }
+
  private:
   /**
    * This method should NEVER be called by the user.
@@ -1148,6 +1178,9 @@ class Variable {
   tensorstore::TensorStore<T, R, M> store;
   // The address of the attributes. This MUST NEVER be touched by the user.
   std::uintptr_t attributesAddress;
+  // The metadata will need to be updated if the trim util was used on it.
+  std::shared_ptr<std::shared_ptr<bool>> toPublish =
+      std::make_shared<std::shared_ptr<bool>>(std::make_shared<bool>(false));
 };
 
 // Tensorstore Array's don't have an IndexDomain and so they can't be slice with

From b8f956ae91e4490b40b80ba1ecab2bf0660b1558 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 5 Aug 2024 19:43:09 +0000
Subject: [PATCH 3/4] Switched to shared_ptr and captured variable in lambda to
 fix heap-use-after-free behavior

---
 mdio/dataset.h | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/mdio/dataset.h b/mdio/dataset.h
index 05a0924..5e79f56 100644
--- a/mdio/dataset.h
+++ b/mdio/dataset.h
@@ -18,6 +18,7 @@
 
 #include <fstream>
 #include <map>
+#include <memory>
 #include <string>
 #include <tuple>
 #include <unordered_map>
@@ -854,15 +855,14 @@ class Dataset {
 
     // We need to update the entire .zmetadata file
     std::vector<nlohmann::json> json_vars;
-    std::vector<Variable<>>
-        vars;  // Keeps the Variables in memory. Fix for premature reference
-               // decrement in LLVM compiler.
-    for (auto key : keys) {
-      auto var = variables.at(key).value();
+    std::vector<std::shared_ptr<Variable<>>>
+        vars;  // Keeps the Variables in memory using shared_ptr
+    for (const auto& key : keys) {
+      auto var = std::make_shared<Variable<>>(variables.at(key).value());
       vars.push_back(var);
       // Get the JSON, drop transform, and add attributes
       nlohmann::json json =
-          var.get_store().spec().value().ToJson(IncludeDefaults{}).value();
+          var->get_store().spec().value().ToJson(IncludeDefaults{}).value();
       json.erase("transform");
       json.erase("dtype");
       json["metadata"].erase("filters");
@@ -873,7 +873,7 @@ class Dataset {
       std::string path = json["kvstore"]["path"].get<std::string>();
       path.pop_back();
       json["kvstore"]["path"] = path;
-      nlohmann::json meta = var.getMetadata();
+      nlohmann::json meta = var->getMetadata();
       if (meta.contains("coordinates")) {
         meta["attributes"]["coordinates"] = meta["coordinates"];
         meta.erase("coordinates");
@@ -909,18 +909,16 @@ class Dataset {
         promises;
     std::vector<tensorstore::AnyFuture> futures;
 
-    vars.clear();  // Clear the vector so we can add only the modified Variables
-    for (auto key : modifiedVariables) {
+    for (const auto& key : modifiedVariables) {
       auto pair = tensorstore::PromiseFuturePair<
           tensorstore::TimestampedStorageGeneration>::Make();
-      auto var = variables.at(key).value();
-      vars.push_back(var);
-      auto updateFuture = var.PublishMetadata();
+      auto var = std::make_shared<Variable<>>(variables.at(key).value());
+      auto updateFuture = var->PublishMetadata();
       updateFuture.ExecuteWhenReady(
-          [promise = std::move(pair.promise)](
-              tensorstore::ReadyFuture<
-                  tensorstore::TimestampedStorageGeneration>
-                  readyFut) { promise.SetResult(readyFut.result()); });
+          [promise = std::move(pair.promise),
+           var](tensorstore::ReadyFuture<
+                tensorstore::TimestampedStorageGeneration>
+                    readyFut) { promise.SetResult(readyFut.result()); });
       variableFutures.push_back(std::move(updateFuture));
       futures.push_back(std::move(pair.future));
     }

From 4372c6ff4297105aa474be741f4a983c87bed496 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 5 Aug 2024 19:43:26 +0000
Subject: [PATCH 4/4] Expand test coverage

---
 mdio/variable_test.cc | 45 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/mdio/variable_test.cc b/mdio/variable_test.cc
index c95d018..a880c51 100644
--- a/mdio/variable_test.cc
+++ b/mdio/variable_test.cc
@@ -481,6 +481,51 @@ TEST(Variable, mismatchAttrs) {
   std::filesystem::remove_all("test.zarr");
 }
 
+TEST(Variable, updateMetadata) {
+  nlohmann::json json = GetJsonSpecStruct();
+  auto variableRes =
+      mdio::Variable<>::Open(json, mdio::constants::kCreateClean);
+  ASSERT_TRUE(variableRes.status().ok()) << variableRes.status();
+  auto variable = variableRes.value();
+
+  // Update the metadata
+  nlohmann::json attrs = {
+      {"count", 100},
+      {"min", -1000.0},
+      {"max", 1000.0},
+      {"sum", 0.0},
+      {"sumSquares", 0.0},
+      {"histogram", {{"binCenters", {1.0, 2.0, 3.0}}, {"counts", {1, 2, 3}}}}};
+  auto attrsUpdateRes = variable.UpdateAttributes<float>(attrs);
+  ASSERT_TRUE(attrsUpdateRes.status().ok()) << attrsUpdateRes.status();
+
+  auto publishFuture = variable.PublishMetadata();
+  EXPECT_TRUE(publishFuture.status().ok()) << publishFuture.status();
+}
+
+TEST(Variable, publishNoStats) {
+  nlohmann::json json = GetJsonSpecStruct();
+  auto variableRes =
+      mdio::Variable<>::Open(json, mdio::constants::kCreateClean);
+  ASSERT_TRUE(variableRes.status().ok()) << variableRes.status();
+  auto variable = variableRes.value();
+
+  auto publishFuture = variable.PublishMetadata();
+  EXPECT_TRUE(publishFuture.status().ok()) << publishFuture.status();
+}
+
+TEST(Variable, publishNoAttrs) {
+  nlohmann::json json = GetJsonSpecStruct();
+  json["attributes"].erase("long_name");
+  auto variableRes =
+      mdio::Variable<>::Open(json, mdio::constants::kCreateClean);
+  ASSERT_TRUE(variableRes.status().ok()) << variableRes.status();
+  auto variable = variableRes.value();
+
+  auto publishFuture = variable.PublishMetadata();
+  EXPECT_TRUE(publishFuture.status().ok()) << publishFuture.status();
+}
+
 TEST(Variable, slice) {
   auto variable =
       mdio::Variable<>::Open(json_good, mdio::constants::kCreateClean).value();