From 10fc96608cab4b33a83069b72331a99bd40aabf7 Mon Sep 17 00:00:00 2001 From: Robin Salen <30937548+Nashtare@users.noreply.github.com> Date: Wed, 17 Jan 2024 07:56:35 -0500 Subject: [PATCH] Remove some more CPU cycles (#1472) * Speed-up some mload/mstore calls * Misc * Improve logs loop * Speed-up bloom * Speed-up access_lists loops * Fix * Speed up selfdestruct loop * Speed-up touched_addresses loop * Speed-up receipt loop * Skip rep loop * Fix * Misc * Review --- evm/src/cpu/kernel/asm/bloom_filter.asm | 32 ++++---- evm/src/cpu/kernel/asm/core/access_lists.asm | 74 +++++++++++-------- .../cpu/kernel/asm/core/create_addresses.asm | 18 ++--- .../cpu/kernel/asm/core/create_receipt.asm | 30 +++----- .../cpu/kernel/asm/core/jumpdest_analysis.asm | 45 ++++++----- evm/src/cpu/kernel/asm/core/log.asm | 19 +++-- .../cpu/kernel/asm/core/selfdestruct_list.asm | 27 ++++--- evm/src/cpu/kernel/asm/core/terminate.asm | 6 +- .../cpu/kernel/asm/core/touched_addresses.asm | 34 ++++++--- evm/src/cpu/kernel/asm/journal/journal.asm | 7 +- evm/src/cpu/kernel/asm/memory/core.asm | 21 +++++- .../kernel/asm/mpt/delete/delete_branch.asm | 15 ++-- evm/src/cpu/kernel/asm/mpt/hex_prefix.asm | 1 - .../asm/mpt/insert/insert_extension.asm | 18 ++++- .../cpu/kernel/asm/mpt/insert/insert_leaf.asm | 19 ++++- evm/src/cpu/kernel/asm/rlp/encode.asm | 3 +- evm/src/cpu/kernel/asm/util/basic_macros.asm | 8 ++ evm/src/cpu/kernel/asm/util/keccak.asm | 5 +- 18 files changed, 239 insertions(+), 143 deletions(-) diff --git a/evm/src/cpu/kernel/asm/bloom_filter.asm b/evm/src/cpu/kernel/asm/bloom_filter.asm index 8fa84a80f5..35a4ebd763 100644 --- a/evm/src/cpu/kernel/asm/bloom_filter.asm +++ b/evm/src/cpu/kernel/asm/bloom_filter.asm @@ -55,20 +55,21 @@ logs_bloom_loop: // Add address to bloom filter. %increment // stack: addr_ptr, i, logs_len, retdest + PUSH @SEGMENT_LOGS_DATA %build_kernel_address DUP1 - %mload_kernel(@SEGMENT_LOGS_DATA) - // stack: addr, addr_ptr, i, logs_len, retdest + MLOAD_GENERAL + // stack: addr, full_addr_ptr, i, logs_len, retdest PUSH 0 - // stack: is_topic, addr, addr_ptr, i, logs_len, retdest + // stack: is_topic, addr, full_addr_ptr, i, logs_len, retdest %add_to_bloom - // stack: addr_ptr, i, logs_len, retdest + // stack: full_addr_ptr, i, logs_len, retdest %increment - // stack: num_topics_ptr, i, logs_len, retdest + // stack: full_num_topics_ptr, i, logs_len, retdest DUP1 - %mload_kernel(@SEGMENT_LOGS_DATA) - // stack: num_topics, num_topics_ptr, i, logs_len, retdest + MLOAD_GENERAL + // stack: num_topics, full_num_topics_ptr, i, logs_len, retdest SWAP1 %increment - // stack: topics_ptr, num_topics, i, logs_len, retdest + // stack: full_topics_ptr, num_topics, i, logs_len, retdest PUSH 0 logs_bloom_topic_loop: @@ -78,7 +79,7 @@ logs_bloom_topic_loop: %jumpi(logs_bloom_topic_end) DUP2 DUP2 ADD // stack: curr_topic_ptr, j, topics_ptr, num_topics, i, logs_len, retdest - %mload_kernel(@SEGMENT_LOGS_DATA) + MLOAD_GENERAL // stack: topic, j, topics_ptr, num_topics, i, logs_len, retdest PUSH 1 // stack: is_topic, topic, j, topics_ptr, num_topics, i, logs_len, retdest @@ -142,19 +143,20 @@ logs_bloom_end: // Also updates the block bloom filter. %macro bloom_write_bit // stack: byte_index, byte_bit_index + PUSH @SEGMENT_TXN_BLOOM + %build_kernel_address PUSH 1 DUP3 - // stack: byte_bit_index, 1, byte_index, byte_bit_index + // stack: byte_bit_index, 1, byte_addr, byte_bit_index PUSH 7 SUB SHL // Updates the current txn bloom filter. SWAP2 POP DUP1 - %mload_kernel(@SEGMENT_TXN_BLOOM) - // stack: old_bloom_byte, byte_index, one_shifted_by_index + MLOAD_GENERAL + // stack: old_bloom_byte, byte_addr, one_shifted_by_index DUP3 OR - // stack: new_bloom_byte, byte_index, one_shifted_by_index - SWAP1 - %mstore_kernel(@SEGMENT_TXN_BLOOM) + // stack: new_bloom_byte, byte_addr, one_shifted_by_index + MSTORE_GENERAL // stack: one_shifted_by_index POP // stack: empty diff --git a/evm/src/cpu/kernel/asm/core/access_lists.asm b/evm/src/cpu/kernel/asm/core/access_lists.asm index 5019b6840e..30afe27c41 100644 --- a/evm/src/cpu/kernel/asm/core/access_lists.asm +++ b/evm/src/cpu/kernel/asm/core/access_lists.asm @@ -25,12 +25,15 @@ global insert_accessed_addresses: // stack: addr, retdest %mload_global_metadata(@GLOBAL_METADATA_ACCESSED_ADDRESSES_LEN) // stack: len, addr, retdest - PUSH 0 + PUSH @SEGMENT_ACCESSED_ADDRESSES ADD + PUSH @SEGMENT_ACCESSED_ADDRESSES insert_accessed_addresses_loop: + // `i` and `len` are both scaled by SEGMENT_ACCESSED_ADDRESSES %stack (i, len, addr, retdest) -> (i, len, i, len, addr, retdest) EQ %jumpi(insert_address) // stack: i, len, addr, retdest - DUP1 %mload_kernel(@SEGMENT_ACCESSED_ADDRESSES) + DUP1 + MLOAD_GENERAL // stack: loaded_addr, i, len, addr, retdest DUP4 // stack: addr, loaded_addr, i, len, addr, retdest @@ -42,9 +45,10 @@ insert_accessed_addresses_loop: insert_address: %stack (i, len, addr, retdest) -> (i, addr, len, retdest) DUP2 %journal_add_account_loaded // Add a journal entry for the loaded account. - %mstore_kernel(@SEGMENT_ACCESSED_ADDRESSES) // Store new address at the end of the array. + %swap_mstore // Store new address at the end of the array. // stack: len, retdest %increment + %sub_const(@SEGMENT_ACCESSED_ADDRESSES) // unscale `len` %mstore_global_metadata(@GLOBAL_METADATA_ACCESSED_ADDRESSES_LEN) // Store new length. PUSH 1 // Return 1 to indicate that the address was inserted. SWAP1 JUMP @@ -59,12 +63,14 @@ global remove_accessed_addresses: // stack: addr, retdest %mload_global_metadata(@GLOBAL_METADATA_ACCESSED_ADDRESSES_LEN) // stack: len, addr, retdest - PUSH 0 + PUSH @SEGMENT_ACCESSED_ADDRESSES ADD + PUSH @SEGMENT_ACCESSED_ADDRESSES remove_accessed_addresses_loop: + // `i` and `len` are both scaled by SEGMENT_ACCESSED_ADDRESSES %stack (i, len, addr, retdest) -> (i, len, i, len, addr, retdest) EQ %jumpi(panic) // stack: i, len, addr, retdest - DUP1 %mload_kernel(@SEGMENT_ACCESSED_ADDRESSES) + DUP1 MLOAD_GENERAL // stack: loaded_addr, i, len, addr, retdest DUP4 // stack: addr, loaded_addr, i, len, addr, retdest @@ -74,12 +80,15 @@ remove_accessed_addresses_loop: %jump(remove_accessed_addresses_loop) remove_accessed_addresses_found: %stack (i, len, addr, retdest) -> (len, 1, i, retdest) - SUB DUP1 %mstore_global_metadata(@GLOBAL_METADATA_ACCESSED_ADDRESSES_LEN) // Decrement the access list length. + SUB // len -= 1 + PUSH @SEGMENT_ACCESSED_ADDRESSES + DUP2 SUB // unscale `len` + %mstore_global_metadata(@GLOBAL_METADATA_ACCESSED_ADDRESSES_LEN) // Decrement the access list length. // stack: len-1, i, retdest - %mload_kernel(@SEGMENT_ACCESSED_ADDRESSES) // Load the last address in the access list. + MLOAD_GENERAL // Load the last address in the access list. // stack: last_addr, i, retdest - SWAP1 - %mstore_kernel(@SEGMENT_ACCESSED_ADDRESSES) // Store the last address at the position of the removed address. + MSTORE_GENERAL + // Store the last address at the position of the removed address. JUMP @@ -97,14 +106,16 @@ global insert_accessed_storage_keys: // stack: addr, key, value, retdest %mload_global_metadata(@GLOBAL_METADATA_ACCESSED_STORAGE_KEYS_LEN) // stack: len, addr, key, value, retdest - PUSH 0 + PUSH @SEGMENT_ACCESSED_STORAGE_KEYS ADD + PUSH @SEGMENT_ACCESSED_STORAGE_KEYS insert_accessed_storage_keys_loop: + // `i` and `len` are both scaled by SEGMENT_ACCESSED_STORAGE_KEYS %stack (i, len, addr, key, value, retdest) -> (i, len, i, len, addr, key, value, retdest) EQ %jumpi(insert_storage_key) // stack: i, len, addr, key, value, retdest - DUP1 %increment %mload_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) + DUP1 %increment MLOAD_GENERAL // stack: loaded_key, i, len, addr, key, value, retdest - DUP2 %mload_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) + DUP2 MLOAD_GENERAL // stack: loaded_addr, loaded_key, i, len, addr, key, value, retdest DUP5 EQ // stack: loaded_addr==addr, loaded_key, i, len, addr, key, value, retdest @@ -120,20 +131,18 @@ insert_storage_key: // stack: i, len, addr, key, value, retdest DUP4 DUP4 %journal_add_storage_loaded // Add a journal entry for the loaded storage key. // stack: i, len, addr, key, value, retdest - DUP1 - PUSH @SEGMENT_ACCESSED_STORAGE_KEYS - %build_kernel_address - %stack(dst, i, len, addr, key, value) -> (addr, dst, dst, key, dst, value, i, value) + %stack(dst, len, addr, key, value) -> (addr, dst, dst, key, dst, value, dst, @SEGMENT_ACCESSED_STORAGE_KEYS, value) MSTORE_GENERAL // Store new address at the end of the array. - // stack: dst, key, dst, value, i, value, retdest + // stack: dst, key, dst, value, dst, segment, value, retdest %increment SWAP1 MSTORE_GENERAL // Store new key after that - // stack: dst, value, i, value, retdest + // stack: dst, value, dst, segment, value, retdest %add_const(2) SWAP1 MSTORE_GENERAL // Store new value after that - // stack: i, value, retdest + // stack: dst, segment, value, retdest %add_const(3) + SUB // unscale dst %mstore_global_metadata(@GLOBAL_METADATA_ACCESSED_STORAGE_KEYS_LEN) // Store new length. %stack (value, retdest) -> (retdest, 1, value) // Return 1 to indicate that the storage key was inserted. JUMP @@ -141,7 +150,7 @@ insert_storage_key: insert_accessed_storage_keys_found: // stack: i, len, addr, key, value, retdest %add_const(2) - %mload_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) + MLOAD_GENERAL %stack (original_value, len, addr, key, value, retdest) -> (retdest, 0, original_value) // Return 0 to indicate that the storage key was already present. JUMP @@ -151,14 +160,16 @@ global remove_accessed_storage_keys: // stack: addr, key, retdest %mload_global_metadata(@GLOBAL_METADATA_ACCESSED_STORAGE_KEYS_LEN) // stack: len, addr, key, retdest - PUSH 0 + PUSH @SEGMENT_ACCESSED_STORAGE_KEYS ADD + PUSH @SEGMENT_ACCESSED_STORAGE_KEYS remove_accessed_storage_keys_loop: + // `i` and `len` are both scaled by SEGMENT_ACCESSED_STORAGE_KEYS %stack (i, len, addr, key, retdest) -> (i, len, i, len, addr, key, retdest) EQ %jumpi(panic) // stack: i, len, addr, key, retdest - DUP1 %increment %mload_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) + DUP1 %increment MLOAD_GENERAL // stack: loaded_key, i, len, addr, key, retdest - DUP2 %mload_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) + DUP2 MLOAD_GENERAL // stack: loaded_addr, loaded_key, i, len, addr, key, retdest DUP5 EQ // stack: loaded_addr==addr, loaded_key, i, len, addr, key, retdest @@ -172,18 +183,21 @@ remove_accessed_storage_keys_loop: remove_accessed_storage_keys_found: %stack (i, len, addr, key, retdest) -> (len, 3, i, retdest) - SUB DUP1 %mstore_global_metadata(@GLOBAL_METADATA_ACCESSED_STORAGE_KEYS_LEN) // Decrease the access list length. + SUB + PUSH @SEGMENT_ACCESSED_STORAGE_KEYS + DUP2 SUB // unscale + %mstore_global_metadata(@GLOBAL_METADATA_ACCESSED_STORAGE_KEYS_LEN) // Decrease the access list length. // stack: len-3, i, retdest - DUP1 %add_const(2) %mload_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) + DUP1 %add_const(2) MLOAD_GENERAL // stack: last_value, len-3, i, retdest - DUP2 %add_const(1) %mload_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) + DUP2 %add_const(1) MLOAD_GENERAL // stack: last_key, last_value, len-3, i, retdest - DUP3 %mload_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) + DUP3 MLOAD_GENERAL // stack: last_addr, last_key, last_value, len-3, i, retdest - DUP5 %mstore_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) // Move the last tuple to the position of the removed tuple. + DUP5 %swap_mstore // Move the last tuple to the position of the removed tuple. // stack: last_key, last_value, len-3, i, retdest - DUP4 %add_const(1) %mstore_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) + DUP4 %add_const(1) %swap_mstore // stack: last_value, len-3, i, retdest - DUP3 %add_const(2) %mstore_kernel(@SEGMENT_ACCESSED_STORAGE_KEYS) + DUP3 %add_const(2) %swap_mstore // stack: len-3, i, retdest %pop2 JUMP diff --git a/evm/src/cpu/kernel/asm/core/create_addresses.asm b/evm/src/cpu/kernel/asm/core/create_addresses.asm index 77b6f6044e..8c2de08bd2 100644 --- a/evm/src/cpu/kernel/asm/core/create_addresses.asm +++ b/evm/src/cpu/kernel/asm/core/create_addresses.asm @@ -37,17 +37,17 @@ global get_create_address: // Post stack: address global get_create2_address: // stack: sender, code_hash, salt, retdest - PUSH 0xff PUSH 0 %mstore_kernel_general - %stack (sender, code_hash, salt, retdest) -> (@SEGMENT_KERNEL_GENERAL, 1, sender, salt, code_hash, retdest) - ADD + PUSH @SEGMENT_KERNEL_GENERAL + DUP1 + PUSH 0xff + MSTORE_GENERAL + // stack: addr, sender, code_hash, salt, retdest + %increment + %stack (addr, sender, code_hash, salt, retdest) -> (addr, sender, salt, code_hash, retdest) MSTORE_32BYTES_20 - POP - %stack (salt, code_hash, retdest) -> (@SEGMENT_KERNEL_GENERAL, 21, salt, code_hash, retdest) - ADD + // stack: addr, salt, code_hash, retdest MSTORE_32BYTES_32 - POP - %stack (code_hash, retdest) -> (@SEGMENT_KERNEL_GENERAL, 53, code_hash, retdest) - ADD + // stack: addr, code_hash, retdest MSTORE_32BYTES_32 POP %stack (retdest) -> (@SEGMENT_KERNEL_GENERAL, 85, retdest) // offset == context == 0 diff --git a/evm/src/cpu/kernel/asm/core/create_receipt.asm b/evm/src/cpu/kernel/asm/core/create_receipt.asm index 9f7cb9f89d..60e9264739 100644 --- a/evm/src/cpu/kernel/asm/core/create_receipt.asm +++ b/evm/src/cpu/kernel/asm/core/create_receipt.asm @@ -116,22 +116,23 @@ process_receipt_logs_loop: %mload_kernel(@SEGMENT_LOGS) // stack: log_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest // Write payload_len. + PUSH @SEGMENT_LOGS_DATA %build_kernel_address DUP1 - %mload_kernel(@SEGMENT_LOGS_DATA) + MLOAD_GENERAL %append_to_trie_data // stack: log_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest // Write address. %increment // stack: addr_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest DUP1 - %mload_kernel(@SEGMENT_LOGS_DATA) + MLOAD_GENERAL %append_to_trie_data // stack: addr_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest //Write num_topics. %increment // stack: num_topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest DUP1 - %mload_kernel(@SEGMENT_LOGS_DATA) + MLOAD_GENERAL // stack: num_topics, num_topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest DUP1 %append_to_trie_data @@ -151,7 +152,7 @@ process_receipt_topics_loop: DUP3 DUP2 ADD // stack: cur_topic_ptr, j, num_topics, topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest - %mload_kernel(@SEGMENT_LOGS_DATA) + MLOAD_GENERAL %append_to_trie_data // stack: j, num_topics, topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest %increment @@ -164,7 +165,7 @@ process_receipt_topics_end: // stack: data_len_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest // Write data_len DUP1 - %mload_kernel(@SEGMENT_LOGS_DATA) + MLOAD_GENERAL // stack: data_len, data_len_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest DUP1 %append_to_trie_data @@ -184,7 +185,7 @@ process_receipt_data_loop: DUP3 DUP2 ADD // stack: cur_data_ptr, j, data_len, data_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest - %mload_kernel(@SEGMENT_LOGS_DATA) + MLOAD_GENERAL %append_to_trie_data // stack: j, data_len, data_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest %increment @@ -205,19 +206,10 @@ process_receipt_after_write: DUP5 %mpt_insert_receipt_trie // stack: new_cum_gas, txn_nb, num_nibbles, retdest - // Now, we set the Bloom filter back to 0. We proceed by chunks of 32 bytes. - PUSH @SEGMENT_TXN_BLOOM // ctx == offset == 0 - %rep 8 - // stack: addr, new_cum_gas, txn_nb, num_nibbles, retdest - PUSH 0 // we will fill the memory segment with zeroes - DUP2 - // stack: addr, 0, addr, new_cum_gas, txn_nb, num_nibbles, retdest - MSTORE_32BYTES_32 - // stack: new_addr, addr, new_cum_gas, txn_nb, num_nibbles, retdest - SWAP1 POP - %endrep - POP - // stack: new_cum_gas, txn_nb, num_nibbles, retdest + + // We don't need to reset the bloom filter segment as we only process a single transaction. + // TODO: Revert in case we add back support for multi-txn proofs. + %stack (new_cum_gas, txn_nb, num_nibbles, retdest) -> (retdest, new_cum_gas) JUMP diff --git a/evm/src/cpu/kernel/asm/core/jumpdest_analysis.asm b/evm/src/cpu/kernel/asm/core/jumpdest_analysis.asm index efcd66420f..6ed4df814f 100644 --- a/evm/src/cpu/kernel/asm/core/jumpdest_analysis.asm +++ b/evm/src/cpu/kernel/asm/core/jumpdest_analysis.asm @@ -3,52 +3,57 @@ // Pre stack: init_pos, ctx, final_pos, retdest // Post stack: (empty) global verify_path_and_write_jumpdest_table: + SWAP2 + DUP2 + ADD // final_addr + // stack: final_addr, ctx, i, retdest + SWAP2 + ADD // init_addr loop: - // stack: i, ctx, final_pos, retdest - DUP3 DUP2 EQ // i == final_pos + // stack: i, final_pos, retdest + DUP2 DUP2 EQ // i == final_pos %jumpi(proof_ok) - DUP3 DUP2 GT // i > final_pos + DUP2 DUP2 GT // i > final_pos %jumpi(proof_not_ok) - // stack: i, ctx, final_pos, retdest - %stack (i, ctx) -> (ctx, i, i, ctx) - ADD // combine context and offset to make an address (SEGMENT_CODE == 0) - MLOAD_GENERAL - // stack: opcode, i, ctx, final_pos, retdest + // stack: i, final_pos, retdest + DUP1 + MLOAD_GENERAL // SEGMENT_CODE == 0 + // stack: opcode, i, final_pos, retdest DUP1 // Slightly more efficient than `%eq_const(0x5b) ISZERO` PUSH 0x5b SUB - // stack: opcode != JUMPDEST, opcode, i, ctx, final_pos, retdest + // stack: opcode != JUMPDEST, opcode, i, final_pos, retdest %jumpi(continue) - // stack: JUMPDEST, i, ctx, code_len, retdest - %stack (JUMPDEST, i, ctx) -> (ctx, @SEGMENT_JUMPDEST_BITS, i, JUMPDEST, i, ctx) - %build_address + // stack: JUMPDEST, i, code_len, retdest + %stack (JUMPDEST, i) -> (@SEGMENT_JUMPDEST_BITS, i, JUMPDEST, i) + ADD // address to write jumpdest bit, i already contains the context PUSH 1 - // stack: 1, addr, JUMPDEST, i, ctx + // stack: 1, addr, JUMPDEST, i MSTORE_GENERAL continue: - // stack: opcode, i, ctx, final_pos, retdest + // stack: opcode, i, final_pos, retdest %add_const(code_bytes_to_skip) %mload_kernel_code - // stack: bytes_to_skip, i, ctx, final_pos, retdest + // stack: bytes_to_skip, i, final_pos, retdest ADD - // stack: i, ctx, final_pos, retdest + // stack: i, final_pos, retdest %jump(loop) proof_ok: - // stack: i, ctx, final_pos, retdest + // stack: i, final_pos, retdest // We already know final_pos is a jumpdest - %stack (i, ctx, final_pos) -> (ctx, @SEGMENT_JUMPDEST_BITS, final_pos) - %build_address + %stack (i, final_pos) -> (@SEGMENT_JUMPDEST_BITS, final_pos) + ADD // final_pos already contains the context PUSH 1 MSTORE_GENERAL JUMP proof_not_ok: - %pop3 + %pop2 JUMP // Determines how many bytes away is the next opcode, based on the opcode we read. diff --git a/evm/src/cpu/kernel/asm/core/log.asm b/evm/src/cpu/kernel/asm/core/log.asm index 0689d49211..f23d5e174c 100644 --- a/evm/src/cpu/kernel/asm/core/log.asm +++ b/evm/src/cpu/kernel/asm/core/log.asm @@ -206,22 +206,27 @@ log_after_topics: // stack: next_log_ptr, data_ptr, data_offset, retdest SWAP1 // stack: data_ptr, next_log_ptr, data_offset, retdest + SWAP2 + PUSH @SEGMENT_MAIN_MEMORY GET_CONTEXT %build_address + SWAP2 + // stack: data_ptr, next_log_ptr, data_addr, retdest + store_log_data_loop: - // stack: cur_data_ptr, next_log_ptr, cur_data_offset, retdest + // stack: cur_data_ptr, next_log_ptr, cur_data_addr, retdest DUP2 DUP2 EQ - // stack: cur_data_ptr == next_log_ptr, cur_data_ptr, next_log_ptr, cur_data_offset, retdest + // stack: cur_data_ptr == next_log_ptr, cur_data_ptr, next_log_ptr, cur_data_addr, retdest %jumpi(store_log_data_loop_end) - // stack: cur_data_ptr, next_log_ptr, cur_data_offset, retdest + // stack: cur_data_ptr, next_log_ptr, cur_data_addr, retdest DUP3 - %mload_current(@SEGMENT_MAIN_MEMORY) - // stack: cur_data, cur_data_ptr, next_log_ptr, cur_data_offset, retdest + MLOAD_GENERAL + // stack: cur_data, cur_data_ptr, next_log_ptr, cur_data_addr, retdest // Store current data byte. DUP2 %mstore_kernel(@SEGMENT_LOGS_DATA) - // stack: cur_data_ptr, next_log_ptr, cur_data_offset, retdest + // stack: cur_data_ptr, next_log_ptr, cur_data_addr, retdest SWAP2 %increment SWAP2 - // stack: cur_data_ptr, next_log_ptr, next_data_offset, retdest + // stack: cur_data_ptr, next_log_ptr, next_data_addr, retdest %increment %jump(store_log_data_loop) diff --git a/evm/src/cpu/kernel/asm/core/selfdestruct_list.asm b/evm/src/cpu/kernel/asm/core/selfdestruct_list.asm index 5084541aa2..258f794054 100644 --- a/evm/src/cpu/kernel/asm/core/selfdestruct_list.asm +++ b/evm/src/cpu/kernel/asm/core/selfdestruct_list.asm @@ -5,8 +5,9 @@ %macro insert_selfdestruct_list // stack: addr %mload_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN) - %stack (len, addr) -> (len, addr, len) - %mstore_kernel(@SEGMENT_SELFDESTRUCT_LIST) // Store new address at the end of the array. + DUP1 PUSH @SEGMENT_SELFDESTRUCT_LIST %build_kernel_address + %stack (write_addr, len, addr) -> (addr, write_addr, len) + MSTORE_GENERAL // Store new address at the end of the array. // stack: len %increment %mstore_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN) // Store new length. @@ -18,12 +19,14 @@ global remove_selfdestruct_list: // stack: addr, retdest %mload_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN) // stack: len, addr, retdest - PUSH 0 + PUSH @SEGMENT_SELFDESTRUCT_LIST ADD + PUSH @SEGMENT_SELFDESTRUCT_LIST remove_selfdestruct_list_loop: + // `i` and `len` are both scaled by SEGMENT_SELFDESTRUCT_LIST %stack (i, len, addr, retdest) -> (i, len, i, len, addr, retdest) EQ %jumpi(panic) // stack: i, len, addr, retdest - DUP1 %mload_kernel(@SEGMENT_SELFDESTRUCT_LIST) + DUP1 MLOAD_GENERAL // stack: loaded_addr, i, len, addr, retdest DUP4 // stack: addr, loaded_addr, i, len, addr, retdest @@ -33,24 +36,28 @@ remove_selfdestruct_list_loop: %jump(remove_selfdestruct_list_loop) remove_selfdestruct_list_found: %stack (i, len, addr, retdest) -> (len, 1, i, retdest) - SUB DUP1 %mstore_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN) // Decrement the list length. + SUB + PUSH @SEGMENT_SELFDESTRUCT_LIST + DUP2 SUB // unscale + %mstore_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN) // Decrement the list length. // stack: len-1, i, retdest - %mload_kernel(@SEGMENT_SELFDESTRUCT_LIST) // Load the last address in the list. + MLOAD_GENERAL // Load the last address in the list. // stack: last_addr, i, retdest - SWAP1 - %mstore_kernel(@SEGMENT_SELFDESTRUCT_LIST) // Store the last address at the position of the removed address. + MSTORE_GENERAL // Store the last address at the position of the removed address. JUMP global delete_all_selfdestructed_addresses: // stack: retdest %mload_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN) // stack: len, retdest - PUSH 0 + PUSH @SEGMENT_SELFDESTRUCT_LIST ADD + PUSH @SEGMENT_SELFDESTRUCT_LIST delete_all_selfdestructed_addresses_loop: + // `i` and `len` are both scaled by SEGMENT_SELFDESTRUCT_LIST // stack: i, len, retdest DUP2 DUP2 EQ %jumpi(delete_all_selfdestructed_addresses_done) // stack: i, len, retdest - DUP1 %mload_kernel(@SEGMENT_SELFDESTRUCT_LIST) + DUP1 MLOAD_GENERAL // stack: loaded_addr, i, len, retdest DUP1 %is_non_existent ISZERO %jumpi(bingo) // stack: loaded_addr, i, len, retdest diff --git a/evm/src/cpu/kernel/asm/core/terminate.asm b/evm/src/cpu/kernel/asm/core/terminate.asm index 9b52591933..8572f34f28 100644 --- a/evm/src/cpu/kernel/asm/core/terminate.asm +++ b/evm/src/cpu/kernel/asm/core/terminate.asm @@ -203,9 +203,11 @@ global terminate_common: // Similarly, we write the parent PC to SEGMENT_KERNEL_GENERAL[2] so that // we can later read it after switching to the parent context. - %mload_context_metadata(@CTX_METADATA_PARENT_PC) PUSH 2 - %mstore_kernel(@SEGMENT_KERNEL_GENERAL) + PUSH @SEGMENT_KERNEL_GENERAL + %build_kernel_address + %mload_context_metadata(@CTX_METADATA_PARENT_PC) + MSTORE_GENERAL // stack: (empty) // Go back to the parent context. diff --git a/evm/src/cpu/kernel/asm/core/touched_addresses.asm b/evm/src/cpu/kernel/asm/core/touched_addresses.asm index f2c0394a66..044b103ff4 100644 --- a/evm/src/cpu/kernel/asm/core/touched_addresses.asm +++ b/evm/src/cpu/kernel/asm/core/touched_addresses.asm @@ -15,12 +15,14 @@ global insert_touched_addresses: // stack: addr, retdest %mload_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN) // stack: len, addr, retdest - PUSH 0 + PUSH @SEGMENT_TOUCHED_ADDRESSES ADD + PUSH @SEGMENT_TOUCHED_ADDRESSES insert_touched_addresses_loop: + // `i` and `len` are both scaled by SEGMENT_TOUCHED_ADDRESSES %stack (i, len, addr, retdest) -> (i, len, i, len, addr, retdest) EQ %jumpi(insert_address) // stack: i, len, addr, retdest - DUP1 %mload_kernel(@SEGMENT_TOUCHED_ADDRESSES) + DUP1 MLOAD_GENERAL // stack: loaded_addr, i, len, addr, retdest DUP4 // stack: addr, loaded_addr, i, len, addr, retdest @@ -30,10 +32,11 @@ insert_touched_addresses_loop: %jump(insert_touched_addresses_loop) insert_address: - %stack (i, len, addr, retdest) -> (i, addr, len, retdest) + %stack (i, len, addr, retdest) -> (i, addr, len, @SEGMENT_TOUCHED_ADDRESSES, retdest) DUP2 %journal_add_account_touched // Add a journal entry for the touched account. - %mstore_kernel(@SEGMENT_TOUCHED_ADDRESSES) // Store new address at the end of the array. - // stack: len, retdest + %swap_mstore // Store new address at the end of the array. + // stack: len, segment, retdest + SUB // unscale %increment %mstore_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN) // Store new length. JUMP @@ -49,12 +52,14 @@ global remove_touched_addresses: // stack: addr, retdest %mload_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN) // stack: len, addr, retdest - PUSH 0 + PUSH @SEGMENT_TOUCHED_ADDRESSES ADD + PUSH @SEGMENT_TOUCHED_ADDRESSES remove_touched_addresses_loop: + // `i` and `len` are both scaled by SEGMENT_TOUCHED_ADDRESSES %stack (i, len, addr, retdest) -> (i, len, i, len, addr, retdest) EQ %jumpi(panic) // stack: i, len, addr, retdest - DUP1 %mload_kernel(@SEGMENT_TOUCHED_ADDRESSES) + DUP1 MLOAD_GENERAL // stack: loaded_addr, i, len, addr, retdest DUP4 // stack: addr, loaded_addr, i, len, addr, retdest @@ -64,12 +69,15 @@ remove_touched_addresses_loop: %jump(remove_touched_addresses_loop) remove_touched_addresses_found: %stack (i, len, addr, retdest) -> (len, 1, i, retdest) - SUB DUP1 %mstore_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN) // Decrement the list length. + SUB + PUSH @SEGMENT_TOUCHED_ADDRESSES DUP2 + SUB // unscale + %mstore_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN) // Decrement the list length. // stack: len-1, i, retdest - %mload_kernel(@SEGMENT_TOUCHED_ADDRESSES) // Load the last address in the list. + MLOAD_GENERAL // Load the last address in the list. // stack: last_addr, i, retdest SWAP1 - %mstore_kernel(@SEGMENT_TOUCHED_ADDRESSES) // Store the last address at the position of the removed address. + MLOAD_GENERAL // Store the last address at the position of the removed address. JUMP @@ -77,12 +85,14 @@ global delete_all_touched_addresses: // stack: retdest %mload_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN) // stack: len, retdest - PUSH 0 + PUSH @SEGMENT_TOUCHED_ADDRESSES ADD + PUSH @SEGMENT_TOUCHED_ADDRESSES delete_all_touched_addresses_loop: + // `i` and `len` are both scaled by SEGMENT_TOUCHED_ADDRESSES // stack: i, len, retdest DUP2 DUP2 EQ %jumpi(delete_all_touched_addresses_done) // stack: i, len, retdest - DUP1 %mload_kernel(@SEGMENT_TOUCHED_ADDRESSES) + DUP1 MLOAD_GENERAL // stack: loaded_addr, i, len, retdest DUP1 %is_empty %jumpi(bingo) // stack: loaded_addr, i, len, retdest diff --git a/evm/src/cpu/kernel/asm/journal/journal.asm b/evm/src/cpu/kernel/asm/journal/journal.asm index 2715bc6b98..9ba4350878 100644 --- a/evm/src/cpu/kernel/asm/journal/journal.asm +++ b/evm/src/cpu/kernel/asm/journal/journal.asm @@ -182,9 +182,12 @@ // stack: (empty) %current_checkpoint // stack: current_checkpoint + DUP1 + PUSH @SEGMENT_JOURNAL_CHECKPOINTS + %build_kernel_address %journal_size - // stack: journal_size, current_checkpoint - DUP2 %mstore_kernel(@SEGMENT_JOURNAL_CHECKPOINTS) + // stack: journal_size, addr, current_checkpoint + MSTORE_GENERAL // stack: current_checkpoint %mload_context_metadata(@CTX_METADATA_CHECKPOINTS_LEN) // stack: i, current_checkpoint diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index 74b49f2780..da8a05fb18 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -225,6 +225,15 @@ // stack: value %endmacro +// Load a single value from the given segment of kernel (context 0) memory. +%macro mload_kernel_no_offset(segment) + // stack: empty + PUSH $segment + // stack: addr + MLOAD_GENERAL + // stack: value +%endmacro + // Store a single value from the given segment of kernel (context 0) memory. %macro mstore_kernel(segment) // stack: offset, value @@ -237,6 +246,16 @@ // stack: (empty) %endmacro +// Store a single value from the given segment of kernel (context 0) memory. +%macro mstore_kernel_no_offset(segment) + // stack: value + PUSH $segment + // stack: addr, value + SWAP1 + MSTORE_GENERAL + // stack: (empty) +%endmacro + // Store a single value from the given segment of kernel (context 0) memory. %macro mstore_kernel(segment, offset) // stack: value @@ -393,7 +412,7 @@ %macro mstore_kernel_code // stack: offset, value // ctx == SEGMENT_CODE == 0 - MLOAD_GENERAL + MSTORE_GENERAL // stack: (empty) %endmacro diff --git a/evm/src/cpu/kernel/asm/mpt/delete/delete_branch.asm b/evm/src/cpu/kernel/asm/mpt/delete/delete_branch.asm index 775e4e11ed..64187ac83a 100644 --- a/evm/src/cpu/kernel/asm/mpt/delete/delete_branch.asm +++ b/evm/src/cpu/kernel/asm/mpt/delete/delete_branch.asm @@ -43,7 +43,10 @@ update_branch: // If it's one, transform the branch node into an leaf/extension node and return it. maybe_normalize_branch: // stack: updated_child_ptr, first_nibble, node_payload_ptr, retdest - PUSH 0 %mstore_kernel_general(0) PUSH 0 %mstore_kernel_general(1) + PUSH 0 + PUSH @SEGMENT_KERNEL_GENERAL + MSTORE_32BYTES_2 + POP // stack: updated_child_ptr, first_nibble, node_payload_ptr, retdest PUSH 0 // Loop from i=0..16 excluding `first_nibble` and store the number of non-empty children in @@ -61,16 +64,18 @@ loop_eq_first_nibble: %increment %jump(loop) loop_non_empty: // stack: i, updated_child_ptr, first_nibble, node_payload_ptr, retdest - %mload_kernel_general(0) %increment %mstore_kernel_general(0) - DUP1 %mstore_kernel_general(1) + %mload_kernel_no_offset(@SEGMENT_KERNEL_GENERAL) %increment %mstore_kernel_no_offset(@SEGMENT_KERNEL_GENERAL) + PUSH 1 PUSH @SEGMENT_KERNEL_GENERAL %build_kernel_address + DUP2 + MSTORE_GENERAL %increment %jump(loop) loop_end: // stack: i, updated_child_ptr, first_nibble, node_payload_ptr, retdest POP // stack: updated_child_ptr, first_nibble, node_payload_ptr, retdest // If there's more than one non-empty child, simply update the branch node. - %mload_kernel_general(0) %gt_const(1) %jumpi(update_branch) - %mload_kernel_general(0) ISZERO %jumpi(panic) // This should never happen. + %mload_kernel_no_offset(@SEGMENT_KERNEL_GENERAL) %gt_const(1) %jumpi(update_branch) + %mload_kernel_no_offset(@SEGMENT_KERNEL_GENERAL) ISZERO %jumpi(panic) // This should never happen. // Otherwise, transform the branch node into a leaf/extension node. // stack: updated_child_ptr, first_nibble, node_payload_ptr, retdest %mload_kernel_general(1) diff --git a/evm/src/cpu/kernel/asm/mpt/hex_prefix.asm b/evm/src/cpu/kernel/asm/mpt/hex_prefix.asm index 532966a0ce..0ca2458f0c 100644 --- a/evm/src/cpu/kernel/asm/mpt/hex_prefix.asm +++ b/evm/src/cpu/kernel/asm/mpt/hex_prefix.asm @@ -111,7 +111,6 @@ rlp_header_large: DUP2 // rlp_addr PUSH 0xb8 // value = 0xb7 + len_of_len = 0xb8 MSTORE_GENERAL - // stack: rlp_addr, value, hp_len, i, rlp_addr, num_nibbles, packed_nibbles, terminated, retdest // stack: hp_len, rlp_addr, num_nibbles, packed_nibbles, terminated, retdest DUP2 %increment diff --git a/evm/src/cpu/kernel/asm/mpt/insert/insert_extension.asm b/evm/src/cpu/kernel/asm/mpt/insert/insert_extension.asm index 3ead805b1d..21a4b7558b 100644 --- a/evm/src/cpu/kernel/asm/mpt/insert/insert_extension.asm +++ b/evm/src/cpu/kernel/asm/mpt/insert/insert_extension.asm @@ -74,9 +74,21 @@ node_key_continues: // Pseudocode: new_node = [MPT_TYPE_BRANCH] + [0] * 17 %get_trie_data_size // pointer to the branch node we're about to create PUSH @MPT_NODE_BRANCH %append_to_trie_data - %rep 17 - PUSH 0 %append_to_trie_data - %endrep + + PUSH 0 + // Increment trie data size by 17 + %get_trie_data_size + // stack: trie_data_size, 0 + DUP1 + %add_const(17) + %set_trie_data_size + + // stack: trie_data_size, 0 + + // Write 17 consecutive 0s at once + PUSH @SEGMENT_TRIE_DATA %build_kernel_address + MSTORE_32BYTES_17 + POP process_node_child: // stack: new_node_ptr, common_len, common_key, node_len, node_key, insert_len, insert_key, node_child_ptr, insert_value_ptr, retdest diff --git a/evm/src/cpu/kernel/asm/mpt/insert/insert_leaf.asm b/evm/src/cpu/kernel/asm/mpt/insert/insert_leaf.asm index 72a014ceec..806fc0ddbd 100644 --- a/evm/src/cpu/kernel/asm/mpt/insert/insert_leaf.asm +++ b/evm/src/cpu/kernel/asm/mpt/insert/insert_leaf.asm @@ -69,9 +69,22 @@ global mpt_insert_leaf: // For now, we allocate the branch node, initially with no children or value. %get_trie_data_size // pointer to the branch node we're about to create PUSH @MPT_NODE_BRANCH %append_to_trie_data - %rep 17 - PUSH 0 %append_to_trie_data - %endrep + + PUSH 0 + // Increment trie data size by 17 + %get_trie_data_size + // stack: trie_data_size, 0 + DUP1 + %add_const(17) + %set_trie_data_size + + // stack: trie_data_size, 0 + + // Write 17 consecutive 0s at once + PUSH @SEGMENT_TRIE_DATA %build_kernel_address + MSTORE_32BYTES_17 + POP + // stack: branch_ptr, common_len, common_key, node_len, node_key, insert_len, insert_key, node_value_ptr, insert_value_ptr, retdest // Now, we branch based on whether each key continues beyond the common diff --git a/evm/src/cpu/kernel/asm/rlp/encode.asm b/evm/src/cpu/kernel/asm/rlp/encode.asm index 721932df50..9f6813ab18 100644 --- a/evm/src/cpu/kernel/asm/rlp/encode.asm +++ b/evm/src/cpu/kernel/asm/rlp/encode.asm @@ -195,7 +195,8 @@ prepend_rlp_list_prefix_big: PUSH 1 DUP6 SUB // start_rlp_addr - 1 SUB // stack: prefix_start_rlp_addr, len_of_len, payload_len, end_rlp_addr, start_rlp_addr, retdest - DUP2 %add_const(0xf7) DUP2 %swap_mstore // rlp[prefix_start_rlp_addr] = 0xf7 + len_of_len + DUP1 + DUP3 %add_const(0xf7) MSTORE_GENERAL // rlp[prefix_start_rlp_addr] = 0xf7 + len_of_len // stack: prefix_start_rlp_addr, len_of_len, payload_len, end_rlp_addr, start_rlp_addr, retdest DUP1 %increment // start_len_rlp_addr = prefix_start_rlp_addr + 1 %stack (start_len_rlp_addr, prefix_start_rlp_addr, len_of_len, payload_len, end_rlp_addr, start_rlp_addr, retdest) diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 76def0c7cc..8753e1cafb 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -446,6 +446,14 @@ // stack: addr (ctx == 0) %endmacro +%macro build_address_with_ctx(seg, off) + // stack: ctx + PUSH $seg + PUSH $off + %build_address + // stack: addr +%endmacro + %macro build_address_with_ctx_no_offset(seg) // stack: ctx PUSH $seg diff --git a/evm/src/cpu/kernel/asm/util/keccak.asm b/evm/src/cpu/kernel/asm/util/keccak.asm index 80c6d841fe..dceb7b195b 100644 --- a/evm/src/cpu/kernel/asm/util/keccak.asm +++ b/evm/src/cpu/kernel/asm/util/keccak.asm @@ -38,11 +38,10 @@ sys_keccak256_empty: %macro keccak256_word(num_bytes) // Since KECCAK_GENERAL takes its input from memory, we will first write // input_word's bytes to @SEGMENT_KERNEL_GENERAL[0..$num_bytes]. - %stack (word) -> (@SEGMENT_KERNEL_GENERAL, word, $num_bytes, %%after_mstore) + %stack (word) -> (@SEGMENT_KERNEL_GENERAL, word, $num_bytes, %%after_mstore, $num_bytes, $num_bytes) %jump(mstore_unpacking) %%after_mstore: - // stack: addr - %stack(addr) -> (addr, $num_bytes, $num_bytes) + // stack: addr, $num_bytes, $num_bytes SUB KECCAK_GENERAL %endmacro