From cdb19a334a8811025e5a705e01b6fac8c703b06b Mon Sep 17 00:00:00 2001 From: evan-forbes Date: Mon, 3 Feb 2025 07:25:31 -0600 Subject: [PATCH 1/4] docs: init commit --- .../adr-024-high-throughput-recovery.md | 42 +++++++ .../assets/adr024/connecting_to_consensus.md | 104 ++++++++++++++++++ .../assets/adr024/handlers_and_state.md | 3 + docs/architecture/assets/adr024/messages.md | 73 ++++++++++++ 4 files changed, 222 insertions(+) create mode 100644 docs/architecture/adr-024-high-throughput-recovery.md create mode 100644 docs/architecture/assets/adr024/connecting_to_consensus.md create mode 100644 docs/architecture/assets/adr024/handlers_and_state.md create mode 100644 docs/architecture/assets/adr024/messages.md diff --git a/docs/architecture/adr-024-high-throughput-recovery.md b/docs/architecture/adr-024-high-throughput-recovery.md new file mode 100644 index 0000000000..d3089f565f --- /dev/null +++ b/docs/architecture/adr-024-high-throughput-recovery.md @@ -0,0 +1,42 @@ +# ADR 024: High Throughput Recovery + +## Changelog + +- 2025/01/29: Initial draft (@evan-forbes) + +## Status + +Proposed + +## Context + +The Celestia protocol will likely separate block propagation into two phases. "Preparation", for distributing data before the block is created, and "recovery" for distributing data after the block has been created. In order to utilize the data distributed before the block is created, the recovery phase must also be pull based. Therefore, the constraints for recovery are: + +- 100% of the Block data MUST be delivered to >2/3 of the voting power before the ProposalTimeout is reached +- MUST use pull based gossip + +## Decision + +TBD + +## Detailed Design + +- [Messages](./assets/adr024/messages.md) +- [Handlers and State](./assets/adr024/handlers_and_state.md) +- [Connecting to Consensus](./assets/adr024/connecting_to_consensus.md) + +## Alternative Approaches + +### PBBT w/o erasure encoding + +### No broadcast tree + +## Consequences + +### Positive + +### Negative + +### Neutral + +## References diff --git a/docs/architecture/assets/adr024/connecting_to_consensus.md b/docs/architecture/assets/adr024/connecting_to_consensus.md new file mode 100644 index 0000000000..a101df8f48 --- /dev/null +++ b/docs/architecture/assets/adr024/connecting_to_consensus.md @@ -0,0 +1,104 @@ +# Backwards Compatible Block Propagation + +This document is an extension of ADR024. + +## Intro + +Changes to gossiping protocols need to be backwards compatible with the existing +mechanism to allow for seemless upgrades. This means that the gossiping +mechanisms need to be hotswapple. This can be challenging due to the consensus +reactor and state having their own propagation mechanism, and that they were not +designed to be easily modifiable. + +### Compatability + +Minimally invasive modularity can be added by not touching the consensus state, +and utilizing the same entry points that exist now. That is, the consenus +reactors internal message channel to the consensus state. While far from optimal +from an engineering or performance perspective, by simply adding (yet another) +syncing routine, we can sync the data from the block propagation reactor to the +consensus. + +```go +// sync data periodically checks to make sure that all block parts in the data +// routine are pushed through to the state. +func (cs *State) syncData() { + for { + select { + case <-cs.Quit(): + return + case <-time.After(time.Millisecond * SyncDataInterval): + if cs.dr == nil { + continue + } + + cs.mtx.RLock() + h, r := cs.Height, cs.Round + pparts := cs.ProposalBlockParts + pprop := cs.Proposal + completeProp := cs.isProposalComplete() + cs.mtx.RUnlock() + + if completeProp { + continue + } + + prop, parts, _, has := cs.dr.GetProposal(h, r) + + if !has { + continue + } + + if prop != nil && pprop == nil { + cs.peerMsgQueue <- msgInfo{&ProposalMessage{prop}, ""} + } + + if pparts != nil && pparts.IsComplete() { + continue + } + + for i := 0; i < int(parts.Total()); i++ { + if pparts != nil { + if p := pparts.GetPart(i); p != nil { + continue + } + } + + part := parts.GetPart(i) + if part == nil { + continue + } + cs.peerMsgQueue <- msgInfo{&BlockPartMessage{cs.Height, cs.Round, part}, ""} + } + } + } +} +``` + +This allows for the old routine, alongside the rest of the consensus state +logic, to function as it used to for peers that have yet to migrate to newer +versions. If the peer does not indicate that they are using the new block prop +reactor during the handshake, then the old gossiping routines are spun up like +normal upon adding the peer to the consensus reactor. However, if the peer has +indicated that they are using the new consensus reactor, then the old routines +are simply not spun up. Something along the lines of the below code should +suffice. + +```go +func legacyPropagation(peer p2p.Peer) (bool, error) { + legacyblockProp := true + ni, ok := peer.NodeInfo().(p2p.DefaultNodeInfo) + if !ok { + return false, errors.New("wrong NodeInfo type. Expected DefaultNodeInfo") + } + + for _, ch := range ni.Channels { + if ch == types.BlockPropagationChannel { + legacyblockProp = false + break + } + } + + return legacyblockProp, nil +} +``` diff --git a/docs/architecture/assets/adr024/handlers_and_state.md b/docs/architecture/assets/adr024/handlers_and_state.md new file mode 100644 index 0000000000..8002870255 --- /dev/null +++ b/docs/architecture/assets/adr024/handlers_and_state.md @@ -0,0 +1,3 @@ +# Logic and State + +The pbbt reactor logic at a high level is described in the spec. \ No newline at end of file diff --git a/docs/architecture/assets/adr024/messages.md b/docs/architecture/assets/adr024/messages.md new file mode 100644 index 0000000000..015d081c63 --- /dev/null +++ b/docs/architecture/assets/adr024/messages.md @@ -0,0 +1,73 @@ +# PBBT Messages and Validation Logic + +At a high level, all flavors of PBBT have four message types. + +## Commitment + +```protobuf= +message BlobMetaData { + bytes hash = 1; + uint32 size = 2; +} + +// CompactBlock commits to the transaction included in a proposal. +message CompactBlock { + int64 height = 1; + int32 round = 2; + bytes pbbt_root = 3; + repeated BlobMetaData blobs = 4; + bytes signature = 5; +} +``` + +The compact block is signed over by the proposer, and verified by converting to signbytes, and verifying using the proposer's public key to verify the included signature. This siganture is separate from the proposal signature as it is purely related to block propagation, and not meant to be part of the proposal. This allows for block propagation to be backwards compatible with older implementations. + +The `BlobMetaData` contains the hash of the PFB for the blob transaction that it commits to, alongside the `size`. `size` is the length of the blobTx in bytes. + +The `pbbt_root` is generated by taking the merkle root over of each of the blob transactions in `BlobMetaData` and `Have` messasges. + +Verification: +- The signature MUST be valid and from the expected proposer for that height and round + +## Have + +```protobuf= +message HavePart { + bytes hash = 1; + int64 height = 2; + int32 round = 3; + tendermint.crypto.Proof proof = 4 [(gogoproto.nullable) = false]; +} + +``` + +### Forge Resistant + +In order to help verify that a `Have` message originated from a proposer, a nonce can be added to the proposer. This nonce is added to the data that is hashed. It is then included in the `HavePart`. (todo evan: list this as an option in the call) + +Verification: +- The merkle proof must be verified using the `pbbt_root` in the `CompactBlock` for that height and round. + +### Want + +```protobuf +message WantParts { + tendermint.libs.bits.BitArray parts = 1 [(gogoproto.nullable) = false]; + int64 height = 2; + int32 round = 3; +} +``` + +## Data + +```protobuf +message Part { + int64 height = 1; + int32 round = 2; + uint32 index = 3; + bytes data = 4; +} +``` + +Verification +- The hash of the bytes in the data field MUST match that of the `Have` message. \ No newline at end of file From 13aa2629a892ef81b09b628ed1b57f3d9f6cc7e0 Mon Sep 17 00:00:00 2001 From: evan-forbes Date: Wed, 5 Feb 2025 13:03:01 -0600 Subject: [PATCH 2/4] chore: bump --- .../assets/adr024/connecting_to_consensus.md | 19 ++++++++-- docs/architecture/assets/adr024/messages.md | 36 ++++++++++++------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/docs/architecture/assets/adr024/connecting_to_consensus.md b/docs/architecture/assets/adr024/connecting_to_consensus.md index a101df8f48..85188031cd 100644 --- a/docs/architecture/assets/adr024/connecting_to_consensus.md +++ b/docs/architecture/assets/adr024/connecting_to_consensus.md @@ -10,12 +10,12 @@ mechanisms need to be hotswapple. This can be challenging due to the consensus reactor and state having their own propagation mechanism, and that they were not designed to be easily modifiable. -### Compatability +## Compatability with the Consensus Reactor Minimally invasive modularity can be added by not touching the consensus state, and utilizing the same entry points that exist now. That is, the consenus reactors internal message channel to the consensus state. While far from optimal -from an engineering or performance perspective, by simply adding (yet another) +from an engineering or even performance perspective, by simply adding (yet another) syncing routine, we can sync the data from the block propagation reactor to the consensus. @@ -102,3 +102,18 @@ func legacyPropagation(peer p2p.Peer) (bool, error) { return legacyblockProp, nil } ``` + +## Compatability with Parity Data + +Adding parity data is highly advantageous for broadcast trees and pull based +gossip. However, the added parity data also requires being committed to by the +proposer. At the moment, the proposer commits over the block data via the +`PartSetHeader`. In order to be backwards compatible, we can't break this. +Simulataneously, we don't want to add excessive overhead via requiring +commitments computed twice. In order to solve this dilemma, we can simply reuse +the first commitment, add a second parity commitment computed identically to the +original `PartSetHeader` hash. + +Setting the `PartSetHeader` hash to the zero value and not using it is an +option. Since this is a consensus breaking change, changing the commitment in +the `CompactBlock` can be done at the same time. diff --git a/docs/architecture/assets/adr024/messages.md b/docs/architecture/assets/adr024/messages.md index 015d081c63..c897bedbdf 100644 --- a/docs/architecture/assets/adr024/messages.md +++ b/docs/architecture/assets/adr024/messages.md @@ -1,33 +1,47 @@ # PBBT Messages and Validation Logic -At a high level, all flavors of PBBT have four message types. +At a high level, all flavors of PBBT have four message types. `Commitment`, +`Have`, `Want`, and `Data`. ## Commitment ```protobuf= -message BlobMetaData { - bytes hash = 1; - uint32 size = 2; +message TxMetaData { + repeated bytes hash = 1; + uint32 start = 2; + uint32 end = 3; } // CompactBlock commits to the transaction included in a proposal. message CompactBlock { int64 height = 1; int32 round = 2; - bytes pbbt_root = 3; + bytes bp_hash = 3; repeated BlobMetaData blobs = 4; bytes signature = 5; } ``` -The compact block is signed over by the proposer, and verified by converting to signbytes, and verifying using the proposer's public key to verify the included signature. This siganture is separate from the proposal signature as it is purely related to block propagation, and not meant to be part of the proposal. This allows for block propagation to be backwards compatible with older implementations. +The compact block is signed over by the proposer, and verified by converting to +signbytes, and using the proposer's public key to verify the included signature. -The `BlobMetaData` contains the hash of the PFB for the blob transaction that it commits to, alongside the `size`. `size` is the length of the blobTx in bytes. +> Note: This siganture is separate from the proposal signature as it is purely +> related to block propagation, and not meant to be part of the proposal. This +> allows for block propagation to be backwards compatible with older +> implementations. -The `pbbt_root` is generated by taking the merkle root over of each of the blob transactions in `BlobMetaData` and `Have` messasges. +The `TxMetaData` contains the hash of the PFB for the blob transaction that it +commits to, alongside the `start` and `end`. `start` is the inclusive index of +the starting byte in the protobuf encoded block. `end` depicts the last byte +occupied by the blob transaction. + +The `pbbt_root` is generated by taking the merkle root over of each of the blob +transactions in `BlobMetaData` and `Have` messasges. Verification: -- The signature MUST be valid and from the expected proposer for that height and round + +- The signature MUST be valid and from the expected proposer for that height and + round ## Have @@ -41,10 +55,6 @@ message HavePart { ``` -### Forge Resistant - -In order to help verify that a `Have` message originated from a proposer, a nonce can be added to the proposer. This nonce is added to the data that is hashed. It is then included in the `HavePart`. (todo evan: list this as an option in the call) - Verification: - The merkle proof must be verified using the `pbbt_root` in the `CompactBlock` for that height and round. From 215446c464e235c192f83f1566eec8430ee23f43 Mon Sep 17 00:00:00 2001 From: evan-forbes Date: Wed, 5 Feb 2025 21:09:40 -0600 Subject: [PATCH 3/4] docs: add blurb on subparts --- docs/architecture/assets/adr024/messages.md | 61 ++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/docs/architecture/assets/adr024/messages.md b/docs/architecture/assets/adr024/messages.md index c897bedbdf..a54b915535 100644 --- a/docs/architecture/assets/adr024/messages.md +++ b/docs/architecture/assets/adr024/messages.md @@ -80,4 +80,63 @@ message Part { ``` Verification -- The hash of the bytes in the data field MUST match that of the `Have` message. \ No newline at end of file +- The hash of the bytes in the data field MUST match that of the `Have` message. + +### Parity Data + +Parity data is required for all practical broadcast trees. This becomes +problematic mainly due to the requirement that transactions downloaded before +the block is created need to be used during recovery. Using erasure encoding +means that the data must be chunked in an even size. All transactions in that +chunk must have been downloaded in order to use it alongside parity data to +reconstruct the block. Most scenarios would likely be fine, however it would be +possible for a node to have downloaded a large portion of the block, but have no +complete parts, rendering all of the parity data useless. The way to fix this +while remaining backwards compatible is to still commit over and propagate +parts, but to erasure encode smaller chunks of those parts, aka `SubParts`. + +```go +const ( + SubPartsPerPart uint32 = 32 + SubPartSize = BlockPartSizeBytes / SubPartsPerPart +) + +type Part struct { + Index uint32 `json:"index"` + Bytes cmtbytes.HexBytes `json:"bytes"` + Proof merkle.Proof `json:"proof"` +} + +// SubPart is a portion of a part and block that is used for generating parity +// data. +type SubPart struct { + Index uint32 `json:"index"` + Bytes cmtbytes.HexBytes `json:"bytes"` +} + +// SubPart breaks a block part into smaller equal sized subparts. +func (p *Part) SubParts() []SubPart { + sps := make([]SubPart, SubPartsPerPart) + for i := uint32(0); i < SubPartsPerPart; i++ { + sps[i] = SubPart{ + Index: uint32(i), + Bytes: p.Bytes[i*SubPartSize : (i+1)*SubPartSize], + } + } + return sps +} + +func PartFromSubParts(index uint32, sps []SubPart) *Part { + if len(sps) != int(SubPartsPerPart) { + panic(fmt.Sprintf("invalid number of subparts: %d", len(sps))) + } + b := make([]byte, 0, BlockPartSizeBytes) + for _, sp := range sps { + b = append(b, sp.Bytes...) + } + return &Part{ + Index: index, + Bytes: b, + } +} +``` \ No newline at end of file From 3b46eedd6a4e2c87aa5a14a3a3a14ce3693d4e1b Mon Sep 17 00:00:00 2001 From: evan-forbes Date: Mon, 10 Feb 2025 07:56:41 -0600 Subject: [PATCH 4/4] chore: sync with implementation --- docs/architecture/assets/adr024/messages.md | 70 +++++++++++---------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/docs/architecture/assets/adr024/messages.md b/docs/architecture/assets/adr024/messages.md index a54b915535..6c2d20e4d6 100644 --- a/docs/architecture/assets/adr024/messages.md +++ b/docs/architecture/assets/adr024/messages.md @@ -5,20 +5,20 @@ At a high level, all flavors of PBBT have four message types. `Commitment`, ## Commitment -```protobuf= +```proto message TxMetaData { - repeated bytes hash = 1; + bytes hash = 1; uint32 start = 2; uint32 end = 3; } // CompactBlock commits to the transaction included in a proposal. message CompactBlock { - int64 height = 1; - int32 round = 2; - bytes bp_hash = 3; - repeated BlobMetaData blobs = 4; - bytes signature = 5; + int64 height = 1; + int32 round = 2; + bytes bp_hash = 3; + repeated TxMetaData blobs = 4; + bytes signature = 5; } ``` @@ -40,23 +40,26 @@ transactions in `BlobMetaData` and `Have` messasges. Verification: -- The signature MUST be valid and from the expected proposer for that height and - round +- The signature MUST be valid using the sign bytes of the compact block and the public key of the expected proposer for that height and + round. ## Have ```protobuf= -message HavePart { +message HaveParts { bytes hash = 1; int64 height = 2; int32 round = 3; tendermint.crypto.Proof proof = 4 [(gogoproto.nullable) = false]; } - ``` Verification: -- The merkle proof must be verified using the `pbbt_root` in the `CompactBlock` for that height and round. + +- The merkle proof MUST be verified using the roots included in the + `CompactBlock` for that height and round. If the data is parity data, then it + MUST use the `parity_root`, if the data is original block data, then it MUST + use the `PartSetHeaderRoot`. ### Want @@ -71,7 +74,7 @@ message WantParts { ## Data ```protobuf -message Part { +message RecoveryPart { int64 height = 1; int32 round = 2; uint32 index = 3; @@ -80,6 +83,7 @@ message Part { ``` Verification + - The hash of the bytes in the data field MUST match that of the `Have` message. ### Parity Data @@ -116,27 +120,27 @@ type SubPart struct { // SubPart breaks a block part into smaller equal sized subparts. func (p *Part) SubParts() []SubPart { - sps := make([]SubPart, SubPartsPerPart) - for i := uint32(0); i < SubPartsPerPart; i++ { - sps[i] = SubPart{ - Index: uint32(i), - Bytes: p.Bytes[i*SubPartSize : (i+1)*SubPartSize], - } - } - return sps + sps := make([]SubPart, SubPartsPerPart) + for i := uint32(0); i < SubPartsPerPart; i++ { + sps[i] = SubPart{ + Index: uint32(i), + Bytes: p.Bytes[i*SubPartSize : (i+1)*SubPartSize], + } + } + return sps } func PartFromSubParts(index uint32, sps []SubPart) *Part { - if len(sps) != int(SubPartsPerPart) { - panic(fmt.Sprintf("invalid number of subparts: %d", len(sps))) - } - b := make([]byte, 0, BlockPartSizeBytes) - for _, sp := range sps { - b = append(b, sp.Bytes...) - } - return &Part{ - Index: index, - Bytes: b, - } + if len(sps) != int(SubPartsPerPart) { + panic(fmt.Sprintf("invalid number of subparts: %d", len(sps))) + } + b := make([]byte, 0, BlockPartSizeBytes) + for _, sp := range sps { + b = append(b, sp.Bytes...) + } + return &Part{ + Index: index, + Bytes: b, + } } -``` \ No newline at end of file +```