Skip to content

Commit

Permalink
Merge pull request #5531 from oasisprotocol/ptrus/feature/proofs-expl…
Browse files Browse the repository at this point in the history
…icit-internal

mkvs/proofs: Remove implicit internal leaf nodes from proofs
  • Loading branch information
ptrus authored Feb 15, 2024
2 parents ffb6395 + a085a27 commit 9925351
Show file tree
Hide file tree
Showing 15 changed files with 955 additions and 254 deletions.
10 changes: 10 additions & 0 deletions .changelog/5531.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Support for Proofs Without Implicit Internal Leaf Nodes

Previously, internal MKVS nodes in proofs included full leaf nodes implicitly.
With this update, leaf nodes are explicitly added as regular child nodes
within the proof structure. This modification optimizes proof sizes by
avoiding inclusion of potentially large values associated with leaf nodes that
are not directly relevant to the proof's target node.

This change maintains backward compatibility. Existing proofs are unmarshaled
as version 0, while version 1 proofs adopt the new scheme.
7 changes: 6 additions & 1 deletion go/storage/mkvs/checkpoint/chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ func createChunk(
nextOffset node.Key,
err error,
) {
it := tree.NewIterator(ctx, mkvs.WithProof(root.Hash))
it := tree.NewIterator(
ctx,
// V1 checkpoints use V0 proofs.
mkvs.WithProofBuilder(syncer.NewProofBuilderV0(root.Hash, root.Hash)),
)
defer it.Close()

// We build the chunk until the proof becomes too large or we have reached the end.
Expand Down Expand Up @@ -82,6 +86,7 @@ func restoreChunk(ctx context.Context, ndb db.NodeDB, chunk *ChunkMetadata, r io
// Reconstruct the proof.
var decodeErr error
var p syncer.Proof
p.V = checkpointProofsVersion
for {
if ctx.Err() != nil {
return ctx.Err()
Expand Down
4 changes: 4 additions & 0 deletions go/storage/mkvs/checkpoint/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ const (
chunksDir = "chunks"
checkpointMetadataFile = "meta"
checkpointVersion = 1

// Versions 1 of checkpoint chunks use proofs version 0. Consider bumping
// this to latest version when introducing new checkpoint versions.
checkpointProofsVersion = 0
)

type fileCreator struct {
Expand Down
20 changes: 12 additions & 8 deletions go/storage/mkvs/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"errors"

"github.com/oasisprotocol/oasis-core/go/common/crypto/hash"
"github.com/oasisprotocol/oasis-core/go/storage/mkvs/node"
"github.com/oasisprotocol/oasis-core/go/storage/mkvs/syncer"
)
Expand All @@ -25,12 +24,16 @@ func (t *tree) SyncIterate(ctx context.Context, request *syncer.IterateRequest)
if !t.cache.pendingRoot.IsClean() {
return nil, syncer.ErrDirtyRoot
}
pb, err := syncer.NewProofBuilderForVersion(request.Tree.Root.Hash, request.Tree.Root.Hash, request.ProofVersion)
if err != nil {
return nil, err
}

// Create an iterator which generates proofs. Always anchor the proof at the
// root as an iterator may encompass many subtrees. Make sure to propagate
// prefetching to any upstream remote syncers.
it := t.NewIterator(ctx,
WithProof(request.Tree.Root.Hash),
WithProofBuilder(pb),
IteratorPrefetch(request.Prefetch),
)
defer it.Close()
Expand Down Expand Up @@ -64,8 +67,9 @@ func (t *tree) newFetcherSyncIterate(key node.Key, prefetch uint16) readSyncFetc
Root: t.cache.syncRoot,
Position: ptr.Hash,
},
Key: key,
Prefetch: prefetch,
Key: key,
Prefetch: prefetch,
ProofVersion: syncProofsVersion,
})
if err != nil {
return nil, err
Expand Down Expand Up @@ -146,11 +150,11 @@ func IteratorPrefetch(prefetch uint16) IteratorOption {
}
}

// WithProof configures the iterator for generating proofs of all
// visited nodes.
func WithProof(root hash.Hash) IteratorOption {
// WithProofBuilder configures the iterator for generating proofs of all
// visited nodes with the given proof builder.
func WithProofBuilder(pb *syncer.ProofBuilder) IteratorOption {
return func(it Iterator) {
it.(*treeIterator).proofBuilder = syncer.NewProofBuilder(root, root)
it.(*treeIterator).proofBuilder = pb
}
}

Expand Down
57 changes: 33 additions & 24 deletions go/storage/mkvs/lookup.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import (
"github.com/oasisprotocol/oasis-core/go/storage/mkvs/syncer"
)

// Use version 0 proofs in sync requests for now.
const syncProofsVersion uint16 = 0

// Implements Tree.
func (t *tree) Get(ctx context.Context, key []byte) ([]byte, error) {
t.cache.Lock()
Expand Down Expand Up @@ -48,12 +51,15 @@ func (t *tree) SyncGet(ctx context.Context, request *syncer.GetRequest) (*syncer
// Remember where the path from root to target node ends (will end).
t.cache.markPosition()

pb := syncer.NewProofBuilder(request.Tree.Root.Hash, request.Tree.Position)
pb, err := syncer.NewProofBuilderForVersion(request.Tree.Root.Hash, request.Tree.Position, request.ProofVersion)
if err != nil {
return nil, err
}
opts := doGetOptions{
proofBuilder: pb,
includeSiblings: request.IncludeSiblings,
}
if _, err := t.doGet(ctx, t.cache.pendingRoot, 0, request.Key, opts, false); err != nil {
if _, err = t.doGet(ctx, t.cache.pendingRoot, 0, request.Key, opts, false); err != nil {
return nil, err
}
proof, err := pb.Build(ctx)
Expand All @@ -75,6 +81,7 @@ func (t *tree) newFetcherSyncGet(key node.Key, includeSiblings bool) readSyncFet
},
Key: key,
IncludeSiblings: includeSiblings,
ProofVersion: syncProofsVersion,
})
if err != nil {
return nil, err
Expand Down Expand Up @@ -127,7 +134,6 @@ func (t *tree) doGet(

// Does lookup key end here? Look into LeafNode.
if key.BitLength() == bitLength {
// Include siblings before disabling the proof builder for the leaf node.
if opts.includeSiblings {
// Also fetch the left and right siblings.
_, err = t.doGet(ctx, n.Left, bitLength, key, opts, true)
Expand All @@ -140,9 +146,12 @@ func (t *tree) doGet(
}
}

// Omit the proof builder as the leaf node is always included with
// the internal node itself.
opts.proofBuilder = nil
if pb := opts.proofBuilder; pb != nil && pb.Version() == 0 {
// Omit the proof builder as the leaf node is always included with
// the internal node itself in V0 proofs.
opts.proofBuilder = nil
}

return t.doGet(ctx, n.LeafNode, bitLength, key, opts, false)
}

Expand All @@ -152,36 +161,36 @@ func (t *tree) doGet(
}

// Continue recursively based on a bit value.
var value []byte
if key.GetBit(bitLength) {
value, err = t.doGet(ctx, n.Right, bitLength, key, opts, false)
fn := func(visit, other, leaf *node.Pointer) ([]byte, error) {
value, err := t.doGet(ctx, visit, bitLength, key, opts, false)
if err != nil {
return nil, err
}

if opts.includeSiblings {
// Also fetch the left sibling.
_, err = t.doGet(ctx, n.Left, bitLength, key, opts, true)
if pb := opts.proofBuilder; pb != nil && pb.Version() > 0 {
// In V0, the leaf node is included in internal node.
// Also fetch the leaf.
_, err = t.doGet(ctx, leaf, bitLength, key, opts, true)
if err != nil {
return nil, err
}
}

// Also fetch the other sibling.
_, err = t.doGet(ctx, other, bitLength, key, opts, true)
if err != nil {
return nil, err
}
}
return value, nil
}

value, err = t.doGet(ctx, n.Left, bitLength, key, opts, false)
if err != nil {
return nil, err
}

if opts.includeSiblings {
// Also fetch the right sibling.
_, err = t.doGet(ctx, n.Right, bitLength, key, opts, true)
if err != nil {
return nil, err
}
switch key.GetBit(bitLength) {
case true:
return fn(n.Right, n.Left, n.LeafNode)
default:
return fn(n.Left, n.Right, n.LeafNode)
}
return value, nil
case *node.LeafNode:
// Reached a leaf node, check if key matches.
if n.Key.Equal(key) {
Expand Down
110 changes: 64 additions & 46 deletions go/storage/mkvs/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,13 @@ type Node interface {
// IsClean returns true if the node is non-dirty.
IsClean() bool

// CompactMarshalBinary encodes a node into binary form without any hash
// pointers (e.g., for proofs).
CompactMarshalBinary() ([]byte, error)
// CompactMarshalBinaryV0 is a backwards compatibility compact marshalling for
// version 0 proofs.
CompactMarshalBinaryV0() ([]byte, error)

// CompactMarshalBinaryV1 encodes a node into binary form without any hash
// pointers, for version 1 proofs.
CompactMarshalBinaryV1() ([]byte, error)

// GetHash returns the node's cached hash.
GetHash() hash.Hash
Expand Down Expand Up @@ -346,10 +350,6 @@ func (n *InternalNode) GetHash() hash.Hash {
}

// Extract makes a copy of the node containing only hash references.
//
// For LeafNode, it makes a deep copy so that the parent internal node always
// ships it since we cannot address the LeafNode uniquely with NodeID (both the
// internal node and LeafNode have the same path and bit depth).
func (n *InternalNode) Extract() Node {
if !n.Clean {
panic("mkvs: extract called on dirty node")
Expand All @@ -359,35 +359,29 @@ func (n *InternalNode) Extract() Node {
Hash: n.Hash,
Label: n.Label,
LabelBitLength: n.LabelBitLength,
// LeafNode is always contained in internal node.
LeafNode: n.LeafNode.ExtractWithNode(),
Left: n.Left.Extract(),
Right: n.Right.Extract(),
LeafNode: n.LeafNode.Extract(),
Left: n.Left.Extract(),
Right: n.Right.Extract(),
}
}

// ExtractUnchecked makes a copy of the node containing only hash references without
// checking the dirty flag.
//
// For LeafNode, it makes a deep copy so that the parent internal node always
// ships it since we cannot address the LeafNode uniquely with NodeID (both the
// internal node and LeafNode have the same path and bit depth).
func (n *InternalNode) ExtractUnchecked() Node {
return &InternalNode{
Clean: true,
Hash: n.Hash,
Label: n.Label,
LabelBitLength: n.LabelBitLength,
// LeafNode is always contained in internal node.
LeafNode: n.LeafNode.ExtractWithNodeUnchecked(),
Left: n.Left.ExtractUnchecked(),
Right: n.Right.ExtractUnchecked(),
LeafNode: n.LeafNode.ExtractUnchecked(),
Left: n.Left.ExtractUnchecked(),
Right: n.Right.ExtractUnchecked(),
}
}

// CompactMarshalBinary encodes an internal node into binary form without
// any hash pointers (e.g., for proofs).
func (n *InternalNode) CompactMarshalBinary() (data []byte, err error) {
// CompactMarshalBinaryV0 is a backwards compatibility compact marshalling for
// version 0 proofs.
func (n *InternalNode) CompactMarshalBinaryV0() (data []byte, err error) {
// Internal node's LeafNode is always marshaled along the internal node.
var leafNodeBinary []byte
if n.LeafNode == nil {
Expand All @@ -399,30 +393,51 @@ func (n *InternalNode) CompactMarshalBinary() (data []byte, err error) {
}
}

data = make([]byte, 1+DepthSize+len(n.Label)+len(leafNodeBinary))
pos := 0
data[pos] = PrefixInternalNode
pos++
copy(data[pos:pos+DepthSize], n.LabelBitLength.MarshalBinary()[:])
pos += DepthSize
copy(data[pos:pos+len(n.Label)], n.Label)
pos += len(n.Label)
copy(data[pos:pos+len(leafNodeBinary)], leafNodeBinary[:])
data = make([]byte, 0, 1+DepthSize+len(n.Label)+len(leafNodeBinary))
data = append(data, PrefixInternalNode)
data = append(data, n.LabelBitLength.MarshalBinary()...)
data = append(data, n.Label...)
data = append(data, leafNodeBinary...)

return
}

// CompactMarshalBinaryV1 encodes an internal node into binary form without
// any hash pointers and also doesn't include the leaf node (e.g., for proofs).
func (n *InternalNode) CompactMarshalBinaryV1() (data []byte, err error) {
data = make([]byte, 0, 1+DepthSize+len(n.Label)+1)
data = append(data, PrefixInternalNode)
data = append(data, n.LabelBitLength.MarshalBinary()...)
data = append(data, n.Label...)
data = append(data, PrefixNilNode)

return
}

// MarshalBinary encodes an internal node into binary form.
func (n *InternalNode) MarshalBinary() (data []byte, err error) {
data, err = n.CompactMarshalBinary()
if err != nil {
return
// Internal node's LeafNode is always marshaled along the internal node.
var leafNodeBinary []byte
if n.LeafNode == nil {
leafNodeBinary = make([]byte, 1)
leafNodeBinary[0] = PrefixNilNode
} else {
if leafNodeBinary, err = n.LeafNode.Node.MarshalBinary(); err != nil {
return nil, fmt.Errorf("mkvs: failed to marshal leaf node: %w", err)
}
}

leftHash := n.Left.GetHash()
rightHash := n.Right.GetHash()

data = make([]byte, 0, 1+DepthSize+len(n.Label)+len(leafNodeBinary)+2*hash.Size)
data = append(data, PrefixInternalNode)
data = append(data, n.LabelBitLength.MarshalBinary()...)
data = append(data, n.Label...)
data = append(data, leafNodeBinary...)
data = append(data, leftHash[:]...)
data = append(data, rightHash[:]...)

return
}

Expand Down Expand Up @@ -584,28 +599,31 @@ func (n *LeafNode) ExtractUnchecked() Node {
}
}

// CompactMarshalBinary encodes a leaf node into binary form.
func (n *LeafNode) CompactMarshalBinary() (data []byte, err error) {
// CompactMarshalBinaryV0 is a backwards compatibility compact marshaling for
// version 0 proofs.
func (n *LeafNode) CompactMarshalBinaryV0() ([]byte, error) {
return n.CompactMarshalBinaryV1() // Leaf node format is the same between versions.
}

// CompactMarshalBinaryV1 encodes a leaf node into binary form.
func (n *LeafNode) CompactMarshalBinaryV1() (data []byte, err error) {
keyData, err := n.Key.MarshalBinary()
if err != nil {
return nil, err
}

data = make([]byte, 1+len(keyData)+ValueLengthSize+len(n.Value))
pos := 0
data[pos] = PrefixLeafNode
pos++
copy(data[pos:pos+len(keyData)], keyData)
pos += len(keyData)
binary.LittleEndian.PutUint32(data[pos:pos+ValueLengthSize], uint32(len(n.Value)))
pos += ValueLengthSize
copy(data[pos:], n.Value)
data = make([]byte, 0, 1+len(keyData)+ValueLengthSize+len(n.Value))
data = append(data, PrefixLeafNode)
data = append(data, keyData...)
data = binary.LittleEndian.AppendUint32(data, uint32(len(n.Value)))
data = append(data, n.Value...)

return
}

// MarshalBinary encodes a leaf node into binary form.
func (n *LeafNode) MarshalBinary() ([]byte, error) {
return n.CompactMarshalBinary()
return n.CompactMarshalBinaryV0() // Leaf node format is the same for compact and non-compact.
}

// UnmarshalBinary decodes a binary marshaled leaf node.
Expand Down
Loading

0 comments on commit 9925351

Please sign in to comment.