redpanda-data · travisdowns · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024 · Sep 11, 2024
diff --git a/src/v/bytes/iobuf.h b/src/v/bytes/iobuf.h
@@ -40,15 +40,56 @@
  *
  * General sharing-mutation caveat:
  *
+ * iobuf has more complicated mutation and cross-shard sharing rules, compared
+ * to most other types such as int or std::string. The underlying cause in both
+ * cases is that two different iobuf objects may share one or more underlying
+ * buffers, and hence operations on one iobuf may be visible to the other.
+ *
  * Operations such as share(), copy() and appending an iobuf or other compatible
  * buffer type to an iobuf may be zero-copy, in the sense that some or all of
  * the payload bytes may be shared between multiple iobufs (or between an iobuf
  * and a compatible buffer type like ss::temporary_buffer<>). The sharing occurs
  * at the fragment level.
  *
- * Be careful when any zero-copy operations are used as iobuf
- * does not perform copy-on-write, Therefore changes will be visible to all
- * iobufs that share the backing fragments.
+ * We say that two or more iobuf objects which share fragments have "internal
+ * sharing" and between such iobufs the following restrictions apply:
+ *
+ * BYTE MUTATION CAVEAT
+ *
+ * You should not write into the bytes held by an iobuf if it is internally
+ * shared with another buffer, since the updates will potentially been seen
+ * by both iobufs.
+ *
+ * On the other hand, two iobufs that have internal sharing will behave
+ * independently with respect to "structural updates", which are all mutations
+ * except for writing into the buffer itself. For example, if one iobuf is
+ * created as a copy of another via share() method, they will have full internal
+ * sharing, but appending to one buffer will not been seen Be careful when any
+ * zero-copy operations are used as iobuf does not perform copy-on-write,
+ * Therefore changes will be visible to all iobufs that share the backing
+ * fragments.
+ *
+ * CROSS-SHARD SHARING CAVEAT
+ *
+ * Two iobufs which have internal sharing should not be accessed concurrently on
+ * different shards. Note that this is a much stronger condition than the usual
+ * thread-safety requirements for C++ objects since this applies to different
+ * objects with (potentially hidden) internal sharing, while the usual rules
+ * apply only to sharing of the _same_ object.
+ *
+ * More formally and slightly stricter than the above: every iobuf has an
+ * "origin" shard which cannot be changed and it must only be accessed on that
+ * shard: access from an other shard is an error which may or may not be
+ * detected. An iobuf's origin shard is set at construction, as documented
+ * in the method doc (for example, the default constructor sets the origin
+ * shard to the current one, while the move constructor inherits the origin
+ * shard from the source and so on).
+ *
+ * The only safe way to get the contents of an iobuf from one shard to another
+ * is to pass the iobuf to the other shard and then call copy() on it, which is
+ * specifically excepted from the above prohibition on access from another
+ * shard. This will return a deep copy of the buffer with its origin shard set
+ * to the shard the copy was performed on.
  */
 class iobuf {
     // Not a lightweight object.
@@ -88,17 +129,35 @@ class iobuf {
         // noexcept
     }
     ~iobuf() noexcept;
+
+    /**
+     * @brief Construct a new iobuf object by moving the source iobuf into it
+     *
+     * This leaves the source iobuf empty. Note that the origin shard for the
+     * newly constructed iobuf is the same as the source, so there is no viable
+     * way to move an iobuf from one shard to another: the target of the move
+     * will always have the same origin shard as the source, no matter where the
+     * moves happen, and so will not be accessible on the target shard (see the
+     * sharing-mutation caveat in the class comment for details on this
+     * restriction).
+     *
+     * Instead, to "move" an iobuf across shards you must copy() it on the
+     * target shard and then clear or destroy the source buffer on the source
+     * shard.
+     */
     iobuf(iobuf&& x) noexcept
       : _frags(std::move(x._frags))
       , _size(x._size)
 #ifndef NDEBUG
       , _verify_shard(x._verify_shard)
 #endif
     {
+        x.mutating_method_called();
         x._frags = container{};
         x._size = 0;
     }
     iobuf& operator=(iobuf&& x) noexcept {
+        mutating_method_called();
         if (this != &x) {
             this->~iobuf();
             new (this) iobuf(std::move(x));
@@ -132,8 +191,11 @@ class iobuf {
      * copy will be the same as this iobuf, but callers should not rely on the
      * precise details.
      *
-     * Since this call performs zero-copy operations, the sharing-mutation
-     * caveat in the class comment applies.
+     * Like almost all methods, this method must only be called on the origin
+     * shard of this iobuf. The returned iobuf will have the same origin, and so
+     * this method cannot be used to safely share iobufs across shards (see the
+     * sharing-mutation caveat in the class comment for details). Use copy() to
+     * move iobuf content from one shard to another.
      */
     iobuf share(size_t pos, size_t len);
 
@@ -143,8 +205,17 @@ class iobuf {
      * mutations to the payload bytes of this iobuf do not affected the returned
      * value or vice-versa.
      *
-     * Copying an iobuf is optimized for cases where the size of the resulting
-     * iobuf will not be increased (e.g. via iobuf::append).
+     * The returned iobuf is linearized, and is optimized is optimized for cases
+     * where the size of the resulting iobuf will not be increased (e.g. via
+     * iobuf::append). That is, the last fragment is sized relatively tightly
+     * the size of the data, rather having a lot of padding as it might if the
+     * same sequence of bytes were appended to an empty iobuf.
+     *
+     * Unlike most methods which create a new iobuf based on an existing one,
+     * this method sets the origin shard of the iobuf to the current shard, so
+     * it is safe to send an iobuf to another shard, then call copy on it and
+     * then access the copy on other shard. See the sharing-mutation caveat in
+     * the class comment for further details.
      */
     iobuf copy() const;
 
@@ -257,13 +328,20 @@ class iobuf {
     void create_new_fragment(size_t);
     size_t last_allocation_size() const;
 
+    /**
+     * Should be called before every mutating method in order to perform any
+     * consistency checks associated with mutating methods.
+     */
+    void mutating_method_called() const;
+
     container _frags;
     size_t _size{0};
     expression_in_debug_mode(oncore _verify_shard);
     friend std::ostream& operator<<(std::ostream&, const iobuf&);
 };
 
 inline void iobuf::clear() {
+    mutating_method_called();
     _frags.clear_and_dispose(&details::dispose_io_fragment);
     _size = 0;
 }
@@ -296,7 +374,18 @@ inline size_t iobuf::last_allocation_size() const {
     return _frags.empty() ? details::io_allocation_size::default_chunk_size
                           : _frags.back().capacity();
 }
+
+inline void iobuf::mutating_method_called() const {
+    // It is a bug to access an iobuf on any shard other than its "origin shard"
+    // (which may be different than the shard it was constructed on), so check
+    // that we aren't doing this in debug mode. This check should also apply to
+    // const methods, but currently we mostly only check this on mutating
+    // methods.
+    oncore_debug_verify(_verify_shard);
+}
+
 inline void iobuf::append(std::unique_ptr<fragment> f) {
+    mutating_method_called();
     if (!_frags.empty()) {
         _frags.back().trim();
     }
@@ -305,20 +394,21 @@ inline void iobuf::append(std::unique_ptr<fragment> f) {
     _frags.push_back(*f.release());
 }
 inline void iobuf::prepend(std::unique_ptr<fragment> f) {
+    mutating_method_called();
     _size += f->size();
     _frags.push_front(*f.release());
 }
 
 inline void iobuf::create_new_fragment(size_t sz) {
-    oncore_debug_verify(_verify_shard);
+    mutating_method_called();
     auto chunk_max = std::max(sz, last_allocation_size());
     auto asz = details::io_allocation_size::next_allocation_size(chunk_max);
     append(std::make_unique<fragment>(asz));
 }
 /// only ensures that a segment of at least reservation is avaible
 /// as an empty details::io_fragment
 inline void iobuf::reserve_memory(size_t reservation) {
-    oncore_debug_verify(_verify_shard);
+    mutating_method_called();
     if (auto b = available_bytes(); b < reservation) {
         if (b > 0) {
             _frags.back().trim();
@@ -329,13 +419,14 @@ inline void iobuf::reserve_memory(size_t reservation) {
 
 [[gnu::always_inline]] void inline iobuf::prepend(
   ss::temporary_buffer<char> b) {
+    mutating_method_called();
     if (unlikely(!b.size())) {
         return;
     }
     prepend(std::make_unique<fragment>(std::move(b)));
 }
 [[gnu::always_inline]] void inline iobuf::prepend(iobuf b) {
-    oncore_debug_verify(_verify_shard);
+    mutating_method_called();
     while (!b._frags.empty()) {
         b._frags.pop_back_and_dispose([this](fragment* f) {
             prepend(f->share());
@@ -346,12 +437,13 @@ inline void iobuf::reserve_memory(size_t reservation) {
 /// append src + len into storage
 [[gnu::always_inline]] void inline iobuf::append(
   const uint8_t* src, size_t len) {
+    mutating_method_called();
     // NOLINTNEXTLINE
     append(reinterpret_cast<const char*>(src), len);
 }
 
 [[gnu::always_inline]] void inline iobuf::append(const char* ptr, size_t size) {
-    oncore_debug_verify(_verify_shard);
+    mutating_method_called();
     if (unlikely(size == 0)) {
         return;
     }
@@ -374,10 +466,10 @@ inline void iobuf::reserve_memory(size_t reservation) {
 
 /// appends the contents of buffer; might pack values into existing space
 [[gnu::always_inline]] inline void iobuf::append(ss::temporary_buffer<char> b) {
+    mutating_method_called();
     if (unlikely(!b.size())) {
         return;
     }
-    oncore_debug_verify(_verify_shard);
     const size_t last_asz = last_allocation_size();
     // The following is a heuristic to decide between copying and zero-copy
     // append of the source buffer. The rule we apply is if the buffer we are
@@ -405,7 +497,7 @@ inline void iobuf::reserve_memory(size_t reservation) {
 }
 /// appends the contents of buffer; might pack values into existing space
 inline void iobuf::append(iobuf o) {
-    oncore_debug_verify(_verify_shard);
+    mutating_method_called();
     while (!o._frags.empty()) {
         o._frags.pop_front_and_dispose([this](fragment* f) {
             append(f->share());
@@ -415,7 +507,7 @@ inline void iobuf::append(iobuf o) {
 }
 
 inline void iobuf::append_fragments(iobuf o) {
-    oncore_debug_verify(_verify_shard);
+    mutating_method_called();
     while (!o._frags.empty()) {
         o._frags.pop_front_and_dispose([this](fragment* f) {
             append(std::make_unique<fragment>(f->share()));
@@ -425,17 +517,17 @@ inline void iobuf::append_fragments(iobuf o) {
 }
 /// used for iostreams
 inline void iobuf::pop_front() {
-    oncore_debug_verify(_verify_shard);
+    mutating_method_called();
     _size -= _frags.front().size();
     _frags.pop_front_and_dispose(&details::dispose_io_fragment);
 }
 inline void iobuf::pop_back() {
-    oncore_debug_verify(_verify_shard);
+    mutating_method_called();
     _size -= _frags.back().size();
     _frags.pop_back_and_dispose(&details::dispose_io_fragment);
 }
 inline void iobuf::trim_front(size_t n) {
-    oncore_debug_verify(_verify_shard);
+    mutating_method_called();
     while (!_frags.empty()) {
         auto& f = _frags.front();
         if (f.size() > n) {
@@ -448,7 +540,7 @@ inline void iobuf::trim_front(size_t n) {
     }
 }
 inline void iobuf::trim_back(size_t n) {
-    oncore_debug_verify(_verify_shard);
+    mutating_method_called();
     while (!_frags.empty()) {
         auto& f = _frags.back();
         if (f.size() > n) {

diff --git a/tests/rptest/services/cluster.py b/tests/rptest/services/cluster.py
@@ -118,12 +118,9 @@ def wrapped(self: HasRedpanda, *args: Any, **kwargs: Any):
                         f"Test failed, doing failure checks on {redpanda.who_am_i()}..."
                     )
 
-                    # Disabled to avoid addr2line hangs
-                    # (https://github.com/redpanda-data/redpanda/issues/5004)
-                    # self.redpanda.decode_backtraces()
-
-                    if isinstance(redpanda, RedpandaServiceBase):
+                    if isinstance(redpanda, RedpandaService):
                         redpanda.cloud_storage_diagnostics()
+                        redpanda.decode_backtraces()
                     if isinstance(redpanda,
                                   RedpandaService | RedpandaServiceCloud):
                         redpanda.raise_on_crash(log_allow_list=log_allow_list)

diff --git a/tests/rptest/tests/redpanda_test.py b/tests/rptest/tests/redpanda_test.py
@@ -36,7 +36,11 @@ def __init__(self, test_context: TestContext):
         self.scale = Scale(test_context)
 
     def setUp(self):
-        self.__redpanda.start()
+        try:
+            self.__redpanda.start()
+        except:
+            self.__redpanda.decode_backtraces()
+            raise
         self._create_initial_topics()
 
     @abstractmethod