From 44a51553e6c1ef28aa25e516f135396851a102c9 Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Thu, 12 Dec 2024 14:51:14 +0000 Subject: [PATCH] Python ledger optimisations: Add a `--quiet` flag, and reuse calculated values in MerkleTree (#6702) Co-authored-by: Amaury Chamayou --- CHANGELOG.md | 8 ++++ python/pyproject.toml | 2 +- python/src/ccf/merkletree.py | 77 +++++++++++++++++++++-------------- python/src/ccf/read_ledger.py | 57 ++++++++++++++++++-------- tests/governance_history.py | 7 +++- 5 files changed, 102 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb43a8832e6e..2e95893ae98d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [6.0.0-dev9] + +[6.0.0-dev9]: https://github.com/microsoft/CCF/releases/tag/6.0.0-dev9 + +### Changed + +- The `read_ledger.py` tool now has a `--quiet` option which avoids printing anything per-transaction, as well as other performance improvements, which should make it more useful in verifying the integrity of large ledgers. + ## [6.0.0-dev8] [6.0.0-dev8]: https://github.com/microsoft/CCF/releases/tag/6.0.0-dev8 diff --git a/python/pyproject.toml b/python/pyproject.toml index c9220c85de37..ad1ed8b74c50 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ccf" -version = "6.0.0-dev8" +version = "6.0.0-dev9" authors = [ { name="CCF Team", email="CCF-Sec@microsoft.com" }, ] diff --git a/python/src/ccf/merkletree.py b/python/src/ccf/merkletree.py index e9ffd446ae3b..91e11ecc7727 100644 --- a/python/src/ccf/merkletree.py +++ b/python/src/ccf/merkletree.py @@ -2,6 +2,7 @@ # Licensed under the Apache 2.0 License. from hashlib import sha256 +import math class MerkleTree(object): @@ -10,18 +11,22 @@ class MerkleTree(object): """ def __init__(self): - self.levels = None self.reset_tree() def reset_tree(self): - self.leaves = list() - self.levels = None + self._levels = [[]] + self._root = None def add_leaf(self, values: bytes, do_hash=True): digest = values if do_hash: digest = sha256(values).digest() - self.leaves.append(digest) + self._levels[0].append(digest) + self._root = None # Need to recalculate + + @property + def leaves(self): + return self._levels[0] def get_leaf(self, index: int) -> bytes: return self.leaves[index] @@ -30,46 +35,56 @@ def get_leaf_count(self) -> int: return len(self.leaves) def get_merkle_root(self) -> bytes: - # Always make tree before getting root - self._make_tree() - assert ( - self.levels is not None - ), "Unexpected error while getting root. MerkleTree has no levels." + if self._root is None: + # Make tree before getting root if root not already calculated + self._make_tree() + assert ( + self._levels is not None + ), "Unexpected error while getting root. MerkleTree has no levels." + self._root = self._levels[-1][0] - return self.levels[0][0] + return self._root - def _calculate_next_level(self): - solo_leaf = None - # number of leaves on the level - number_of_leaves_on_current_level = len(self.levels[0]) + def _recalculate_level(self, level): + assert len(self._levels) > level - 1 + prev_level = self._levels[level - 1] + number_of_leaves_on_prev_level = len(prev_level) assert ( - number_of_leaves_on_current_level > 1 + number_of_leaves_on_prev_level > 1 ), "Merkle Tree should have more than one leaf at every level" + solo_leaf = None + if ( - number_of_leaves_on_current_level % 2 == 1 + number_of_leaves_on_prev_level % 2 == 1 ): # if odd number of leaves on the level # Get the solo leaf (last leaf in-case the leaves are odd numbered) - solo_leaf = self.levels[0][-1] - number_of_leaves_on_current_level -= 1 + solo_leaf = prev_level[-1] + number_of_leaves_on_prev_level -= 1 + + if not len(self._levels) > level: + self._levels.append([]) + + # Reuse existing level as much as possible + current_level = self._levels[level] + + # Since we may have copied a solo-leaf to the rightmost node last time, pop and re-calculate it + if len(current_level): + current_level.pop(-1) + + done = len(current_level) - new_level = [] for left_node, right_node in zip( - self.levels[0][0:number_of_leaves_on_current_level:2], - self.levels[0][1:number_of_leaves_on_current_level:2], + prev_level[done * 2 : number_of_leaves_on_prev_level : 2], + prev_level[done * 2 + 1 : number_of_leaves_on_prev_level : 2], ): - new_level.append(sha256(left_node + right_node).digest()) + current_level.append(sha256(left_node + right_node).digest()) if solo_leaf is not None: - new_level.append(solo_leaf) - self.levels = [ - new_level, - ] + self.levels # prepend new level + current_level.append(solo_leaf) def _make_tree(self): if self.get_leaf_count() > 0: - self.levels = [ - self.leaves, - ] - while len(self.levels[0]) > 1: - self._calculate_next_level() + num_levels = 1 + math.ceil(math.log(self.get_leaf_count(), 2)) + for level in range(1, num_levels): + self._recalculate_level(level) diff --git a/python/src/ccf/read_ledger.py b/python/src/ccf/read_ledger.py index 21e9f2431dc6..8b7531760c6c 100644 --- a/python/src/ccf/read_ledger.py +++ b/python/src/ccf/read_ledger.py @@ -6,10 +6,17 @@ import json import re import argparse +from enum import Enum, auto from loguru import logger as LOG +class PrintMode(Enum): + Quiet = auto() + Digests = auto() + Contents = auto() + + def indent(n): return " " * n @@ -129,15 +136,16 @@ def dump_entry(entry, table_filter, tables_format_rules): def run( paths, + print_mode: PrintMode, is_snapshot=False, - tables=None, - uncommitted=False, + tables_regex=None, insecure_skip_verification=False, + uncommitted=False, tables_format_rules=None, - digests_only=None, ): + table_filter = re.compile(tables_regex) if tables_regex is not None else None + # Extend and compile rules - table_filter = re.compile(tables) if tables is not None else None tables_format_rules = tables_format_rules or [] tables_format_rules.extend(default_tables_format_rules) tables_format_rules = [ @@ -170,11 +178,13 @@ def run( f"chunk {chunk.filename()} ({'' if chunk.is_committed() else 'un'}committed)" ) for transaction in chunk: - if digests_only: + if print_mode == PrintMode.Quiet: + pass + elif print_mode == PrintMode.Digests: print( f"{transaction.gcm_header.view}.{transaction.gcm_header.seqno} {transaction.get_write_set_digest().hex()}" ) - else: + elif print_mode == PrintMode.Contents: dump_entry(transaction, table_filter, tables_format_rules) except Exception as e: LOG.exception(f"Error parsing ledger: {e}") @@ -216,37 +226,52 @@ def main(): action="store_true", ) parser.add_argument( + "--uncommitted", help="Also parse uncommitted ledger files", action="store_true" + ) + + display_options = parser.add_mutually_exclusive_group() + display_options.add_argument( + "-q", + "--quiet", + help="Don't print transaction digests or contents", + action="store_true", + ) + display_options.add_argument( "-d", "--digests-only", help="Only print transaction digests", action="store_true", ) - parser.add_argument( + display_options.add_argument( "-t", "--tables", help="Regex filter for tables to display", type=str, default=None, ) - parser.add_argument( - "--uncommitted", help="Also parse uncommitted ledger files", action="store_true" - ) + parser.add_argument( "--insecure-skip-verification", help="INSECURE: skip ledger Merkle tree integrity verification", action="store_true", default=False, ) + args = parser.parse_args() + print_mode = PrintMode.Contents + if args.quiet: + print_mode = PrintMode.Quiet + elif args.digests_only: + print_mode = PrintMode.Digests + if not run( args.paths, - args.snapshot, - args.tables, - args.uncommitted, - args.insecure_skip_verification, - None, - args.digests_only, + print_mode, + is_snapshot=args.snapshot, + tables_regex=args.tables, + insecure_skip_verification=args.insecure_skip_verification, + uncommitted=args.uncommitted, ): sys.exit(1) diff --git a/tests/governance_history.py b/tests/governance_history.py index 22f2fe17a22d..b0e519a571b2 100644 --- a/tests/governance_history.py +++ b/tests/governance_history.py @@ -222,11 +222,16 @@ def fmt_str(data: bytes) -> str: primary, backups = network.find_nodes() for node in (primary, *backups): ledger_dirs = node.remote.ledger_paths() - assert ccf.read_ledger.run(paths=ledger_dirs, tables_format_rules=format_rule) + assert ccf.read_ledger.run( + paths=ledger_dirs, + print_mode=ccf.read_ledger.PrintMode.Contents, + tables_format_rules=format_rule, + ) snapshot_dir = network.get_committed_snapshots(primary) assert ccf.read_ledger.run( paths=[os.path.join(snapshot_dir, os.listdir(snapshot_dir)[-1])], + print_mode=ccf.read_ledger.PrintMode.Contents, is_snapshot=True, tables_format_rules=format_rule, )