From 43ae7c5c6eb3d8fcddd091fcc576cc28778defcd Mon Sep 17 00:00:00 2001 From: Edgar Date: Thu, 14 May 2026 13:47:59 +0200 Subject: [PATCH] perf(l1): BAL optimistic merkleization on validation path Decouple merkleization from EVM execution when the validation path receives a BAL: synthesize per-field deltas from the input BlockAccessList pre-execution and run merkle stages B/C/D in parallel with execution + warming. validate_block_access_list_hash remains the post-execution correctness gate. Closes #6584. --- CHANGELOG.md | 4 + crates/blockchain/blockchain.rs | 283 +++++++++++--------- crates/common/types/block_access_list.rs | 319 ++++++++++++++++++++++- crates/common/types/mod.rs | 1 + crates/vm/backends/levm/mod.rs | 31 ++- crates/vm/backends/mod.rs | 2 +- 6 files changed, 501 insertions(+), 139 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69ee0e6d0ac..ea56e5ebc5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Perf +### 2026-05-14 + +- BAL optimistic merkleization: synthesize state deltas from the input Block Access List pre-execution and merkleize in parallel with the EVM on the `engine_newPayload` validation path. Includes a parallel state-trie pre-warm and per-account hashed-key-sorted storage inserts to keep the trie node arena hot for Stage B/C [#6655](https://github.com/lambdaclass/ethrex/pull/6655) + ### 2026-04-27 - Reduce peak disk usage during snap sync by moving SST files into the temp DB instead of copying [#6532](https://github.com/lambdaclass/ethrex/pull/6532) diff --git a/crates/blockchain/blockchain.rs b/crates/blockchain/blockchain.rs index ddb72857a5b..a0e68410b19 100644 --- a/crates/blockchain/blockchain.rs +++ b/crates/blockchain/blockchain.rs @@ -64,8 +64,9 @@ use ethrex_common::types::block_access_list::BlockAccessList; use ethrex_common::types::block_execution_witness::ExecutionWitness; use ethrex_common::types::fee_config::FeeConfig; use ethrex_common::types::{ - AccountInfo, AccountState, AccountUpdate, Block, BlockHash, BlockHeader, BlockNumber, - ChainConfig, Code, Receipt, Transaction, WrappedEIP4844Transaction, validate_block_body, + AccountInfo, AccountState, AccountUpdate, BalSynthesisItem, Block, BlockHash, BlockHeader, + BlockNumber, ChainConfig, Code, Receipt, Transaction, WrappedEIP4844Transaction, + synthesize_bal_updates, validate_block_body, }; use ethrex_common::types::{ELASTICITY_MULTIPLIER, P2PTransaction}; use ethrex_common::types::{Fork, MempoolTransaction}; @@ -92,6 +93,7 @@ use ethrex_vm::backends::levm::db::DatabaseLogger; use ethrex_vm::{BlockExecutionResult, DynVmDatabase, Evm, EvmError}; use mempool::Mempool; use payload::PayloadOrTask; +use rayon::prelude::*; use rustc_hash::{FxHashMap, FxHashSet}; use std::collections::hash_map::Entry; use std::collections::{BTreeMap, HashMap, HashSet}; @@ -136,7 +138,7 @@ type BlockExecutionPipelineResult = ( Option>, Option, // produced BAL (Some on Amsterdam+ blocks) usize, // max queue length - [Instant; 6], // timing instants + [Instant; 7], // timing instants Duration, // warmer duration ); @@ -316,7 +318,9 @@ struct PreMerkelizedAccountState { /// Work item for BAL state trie shard workers. struct BalStateWorkItem { hashed_address: H256, - info: Option, + nonce: Option, + balance: Option, + code_hash: Option, removed: bool, /// Pre-computed storage root from Stage B, or None to keep existing. storage_root: Option, @@ -467,6 +471,24 @@ impl Blockchain { let cancelled = AtomicBool::new(false); + // Synthesize BAL updates pre-scope so the merkleizer thread can start + // trie work immediately, in parallel with execution. + let optimistic_updates: Option> = + bal.map(synthesize_bal_updates); + let optimistic_witness: Option> = if self.options.precompute_witnesses { + optimistic_updates.as_ref().map(|m| { + m.iter() + .map(|(addr, item)| AccountUpdate { + address: *addr, + added_storage: item.added_storage.clone(), + ..Default::default() + }) + .collect() + }) + } else { + None + }; + let (execution_result, merkleization_result, warmer_duration) = std::thread::scope(|s| -> Result<_, ChainError> { let vm_type = vm.vm_type; @@ -502,7 +524,16 @@ impl Blockchain { ChainError::Custom(format!("Failed to spawn warmer thread: {e}")) })?; let max_queue_length_ref = &mut max_queue_length; - let (tx, rx) = channel(); + // Channel only exists on the streaming (non-BAL) path. On the BAL path the + // EVM merkleizes nothing on its own (the synthesized map drives merkleization), + // so no Sender / drain thread are needed. + let (tx, rx_for_merkle) = if bal.is_some() { + (None, None) + } else { + let (tx, rx) = channel(); + (Some(tx), Some(rx)) + }; + let execution_handle = std::thread::Builder::new() .name("block_executor_execution".to_string()) .spawn_scoped(s, move || -> Result<_, ChainError> { @@ -548,28 +579,40 @@ impl Blockchain { ChainError::Custom(format!("Failed to spawn execution thread: {e}")) })?; let parent_header_ref = &parent_header; // Avoid moving to thread + // Merkleizer returns (list, streaming witness or None on BAL path, merkle_start, merkle_end). + type MerkleResult = Result< + ( + AccountUpdatesList, + Option>, + Instant, + Instant, + ), + StoreError, + >; let merkleize_handle = std::thread::Builder::new() .name("block_executor_merkleizer".to_string()) - .spawn_scoped(s, move || -> Result<_, StoreError> { - let (account_updates_list, accumulated_updates) = if bal.is_some() { - self.handle_merkleization_bal( - rx, - parent_header_ref, - queue_length_ref, - max_queue_length_ref, - )? - } else { - self.handle_merkleization( - rx, - parent_header_ref, - queue_length_ref, - max_queue_length_ref, - )? - }; + .spawn_scoped(s, move || -> MerkleResult { + let merkle_start_instant = Instant::now(); + let (account_updates_list, streaming_witness) = + if let Some(prepared) = optimistic_updates { + let list = self.handle_merkleization_bal_from_updates( + prepared, + parent_header_ref, + )?; + (list, None) + } else { + self.handle_merkleization( + rx_for_merkle.expect("rx is Some on non-BAL path"), + parent_header_ref, + queue_length_ref, + max_queue_length_ref, + )? + }; let merkle_end_instant = Instant::now(); Ok(( account_updates_list, - accumulated_updates, + streaming_witness, + merkle_start_instant, merkle_end_instant, )) }) @@ -591,9 +634,13 @@ impl Blockchain { .unwrap_or(Duration::ZERO); Ok((execution_result, merkleization_result, warmer_duration)) })?; - let (account_updates_list, accumulated_updates, merkle_end_instant) = merkleization_result?; + let (account_updates_list, streaming_witness, merkle_start_instant, merkle_end_instant) = + merkleization_result?; let (execution_result, produced_bal, exec_end_instant) = execution_result?; + // Synthesized witness wins when BAL is present; streaming witness wins otherwise. + let accumulated_updates = optimistic_witness.or(streaming_witness); + let exec_merkle_end_instant = Instant::now(); Ok(( @@ -606,6 +653,7 @@ impl Blockchain { start_instant, block_validated_instant, exec_merkle_start, + merkle_start_instant, exec_end_instant, merkle_end_instant, exec_merkle_end_instant, @@ -837,85 +885,68 @@ impl Blockchain { result } - /// BAL-specific merkleization handler. - /// - /// When the Block Access List is available (Amsterdam+), all dirty accounts - /// and storage slots are known upfront. This enables computing storage roots - /// in parallel across accounts before feeding final results into state trie - /// shards. + /// Validation path synthesizes `BalSynthesisItem`s from the input BAL pre-execution and + /// merkleizes optimistically in parallel with EVM execution. Correctness is gated by + /// `validate_block_access_list_hash` post-execution; on mismatch the optimistic merkle + /// result is discarded by the `?` error propagation on the execution thread's join result. #[instrument( level = "trace", name = "Trie update (BAL)", skip_all, fields(namespace = "block_execution") )] - fn handle_merkleization_bal( + fn handle_merkleization_bal_from_updates( &self, - rx: Receiver>, + prepared: FxHashMap, parent_header: &BlockHeader, - queue_length: &AtomicUsize, - max_queue_length: &mut usize, - ) -> Result<(AccountUpdatesList, Option>), StoreError> { + ) -> Result { const NUM_WORKERS: usize = 16; let parent_state_root = parent_header.state_root; - // === Stage A: Drain + accumulate all AccountUpdates === - // BAL guarantees completeness, so we block until execution finishes. - let mut all_updates: FxHashMap = FxHashMap::default(); - for updates in rx { - let current_length = queue_length.fetch_sub(1, Ordering::Acquire); - *max_queue_length = current_length.max(*max_queue_length); - for update in updates { - match all_updates.entry(update.address) { - Entry::Vacant(e) => { - e.insert(update); - } - Entry::Occupied(mut e) => { - e.get_mut().merge(update); - } - } - } - } - - // Extract witness accumulator before consuming updates - let accumulated_updates = if self.options.precompute_witnesses { - Some(all_updates.values().cloned().collect::>()) - } else { - None - }; - - // Extract code updates and build work items with pre-hashed addresses + // Build code updates and work items with pre-hashed addresses from the + // pre-synthesized map. No Stage A drain needed: the synthesis happened + // pre-scope at the call site. let mut code_updates: Vec<(H256, Code)> = Vec::new(); - let mut accounts: Vec<(H256, AccountUpdate)> = Vec::with_capacity(all_updates.len()); - for (addr, update) in all_updates { + let mut accounts: Vec<(H256, BalSynthesisItem)> = Vec::with_capacity(prepared.len()); + for (addr, item) in prepared { let hashed = keccak(addr); - if let Some(info) = &update.info - && let Some(code) = &update.code + if let Some(ch) = item.code_hash + && let Some(ref code) = item.code { - code_updates.push((info.code_hash, code.clone())); + code_updates.push((ch, code.clone())); } - accounts.push((hashed, update)); + accounts.push((hashed, item)); } + // Warm parent state-trie pages for all touched accounts in parallel before + // Stage B / Stage C race for them. This replaces the prefetch that the old + // streaming path got for free via `bal_to_account_updates`. + accounts + .par_iter() + .try_for_each(|(hashed_address, _)| -> Result<(), StoreError> { + let state_trie = self.storage.open_state_trie(parent_state_root)?; + let _ = state_trie.get(hashed_address.as_bytes())?; + Ok(()) + })?; + // === Stage B: Parallel per-account storage root computation === // Sort by storage weight (descending) for greedy bin packing. // Every item with real Stage B work MUST have weight >= 1: the greedy // algorithm does `bin_weights[min] += weight`, so weight-0 items never // change the bin weight and `min_by_key` keeps returning the same bin, - // piling ALL of them into a single worker. Removed accounts are cheap - // individually (just push EMPTY_TRIE_HASH) but must still be distributed. + // piling ALL of them into a single worker. + // Synthesis never sets `removed`/`removed_storage`, so weight is purely + // based on storage slot count. let mut work_indices: Vec<(usize, usize)> = accounts .iter() .enumerate() - .map(|(i, (_, update))| { - let weight = - if update.removed || update.removed_storage || !update.added_storage.is_empty() - { - 1.max(update.added_storage.len()) - } else { - 0 - }; + .map(|(i, (_, item))| { + let weight = if !item.added_storage.is_empty() { + 1.max(item.added_storage.len()) + } else { + 0 + }; (i, weight) }) .collect(); @@ -959,42 +990,32 @@ impl Blockchain { let state_trie = self.storage.open_state_trie(parent_state_root)?; for idx in bin { - let (hashed_address, update) = &accounts_ref[idx]; - let has_storage_changes = update.removed - || update.removed_storage - || !update.added_storage.is_empty(); - if !has_storage_changes { - continue; - } - - if update.removed { - results.push(( - idx, - *EMPTY_TRIE_HASH, - vec![(Nibbles::default(), vec![RLP_NULL])], - )); + let (hashed_address, item) = &accounts_ref[idx]; + if item.added_storage.is_empty() { continue; } - let mut trie = if update.removed_storage { - Trie::new_temp() - } else { - let storage_root = - match state_trie.get(hashed_address.as_bytes())? { - Some(rlp) => { - AccountState::decode(&rlp)?.storage_root - } - None => *EMPTY_TRIE_HASH, - }; - self.storage.open_storage_trie( - *hashed_address, - parent_state_root, - storage_root, - )? + let storage_root = match state_trie + .get(hashed_address.as_bytes())? + { + Some(rlp) => AccountState::decode(&rlp)?.storage_root, + None => *EMPTY_TRIE_HASH, }; - - for (key, value) in &update.added_storage { - let hashed_key = keccak(key); + let mut trie = self.storage.open_storage_trie( + *hashed_address, + parent_state_root, + storage_root, + )?; + + // Pre-hash and sort by trie path so per-slot inserts + // walk the node arena in order, improving cache locality. + let mut hashed_storage: Vec<(H256, U256)> = item + .added_storage + .iter() + .map(|(k, v)| (keccak(k), *v)) + .collect(); + hashed_storage.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + for (hashed_key, value) in &hashed_storage { if value.is_zero() { trie.remove(hashed_key.as_bytes())?; } else { @@ -1033,12 +1054,14 @@ impl Blockchain { // Build per-shard work items let mut shards: Vec> = (0..NUM_WORKERS).map(|_| Vec::new()).collect(); - for (idx, (hashed_address, update)) in accounts.iter().enumerate() { + for (idx, (hashed_address, item)) in accounts.iter().enumerate() { let bucket = (hashed_address.as_fixed_bytes()[0] >> 4) as usize; shards[bucket].push(BalStateWorkItem { hashed_address: *hashed_address, - info: update.info.clone(), - removed: update.removed, + nonce: item.nonce, + balance: item.balance, + code_hash: item.code_hash, + removed: false, storage_root: storage_roots[idx], }); } @@ -1083,10 +1106,14 @@ impl Blockchain { if item.removed { account_state = AccountState::default(); } else { - if let Some(ref info) = item.info { - account_state.nonce = info.nonce; - account_state.balance = info.balance; - account_state.code_hash = info.code_hash; + if let Some(n) = item.nonce { + account_state.nonce = n; + } + if let Some(b) = item.balance { + account_state.balance = b; + } + if let Some(ch) = item.code_hash { + account_state.code_hash = ch; } if let Some(storage_root) = item.storage_root { account_state.storage_root = storage_root; @@ -1133,15 +1160,12 @@ impl Blockchain { *EMPTY_TRIE_HASH }; - Ok(( - AccountUpdatesList { - state_trie_hash, - state_updates, - storage_updates, - code_updates, - }, - accumulated_updates, - )) + Ok(AccountUpdatesList { + state_trie_hash, + state_updates, + storage_updates, + code_updates, + }) } fn collapse_root_node( @@ -2019,11 +2043,12 @@ impl Blockchain { start_instant, block_validated_instant, exec_merkle_start, + merkle_start_instant, exec_end_instant, merkle_end_instant, exec_merkle_end_instant, stored_instant, - ]: [Instant; 7], + ]: [Instant; 8], ) { let total_ms = stored_instant.duration_since(start_instant).as_secs_f64() * 1000.0; if total_ms == 0.0 { @@ -2122,6 +2147,11 @@ impl Blockchain { "after exec" }; + let merkle_start_delay_ms = merkle_start_instant + .duration_since(exec_merkle_start) + .as_secs_f64() + * 1000.0; + info!("{}", header); info!( " |- validate: {:>7.2} ms ({:>2}%){}", @@ -2136,7 +2166,7 @@ impl Blockchain { bottleneck_marker("exec") ); info!( - " |- merkle: {:>7.2} ms ({:>2}%){} [concurrent: {:.2} ms, drain: {:.2} ms, overlap: {:.0}%, queue: {}]", + " |- merkle: {:>7.2} ms ({:>2}%){} [concurrent: {:.2} ms, drain: {:.2} ms, overlap: {:.0}%, queue: {}, start_delay: {:.2} ms]", merkle_drain_ms, pct(merkle_drain_ms), bottleneck_marker("merkle"), @@ -2144,6 +2174,7 @@ impl Blockchain { merkle_drain_ms, overlap_pct, merkle_queue_length, + merkle_start_delay_ms, ); info!( " |- store: {:>7.2} ms ({:>2}%){}", diff --git a/crates/common/types/block_access_list.rs b/crates/common/types/block_access_list.rs index fb595e30e8c..570554aeea5 100644 --- a/crates/common/types/block_access_list.rs +++ b/crates/common/types/block_access_list.rs @@ -1,5 +1,5 @@ use bytes::{BufMut, Bytes}; -use ethereum_types::{Address, H256, U256}; +use ethereum_types::{Address, BigEndianHash, H256, U256}; use ethrex_rlp::{ decode::RLPDecode, encode::{RLPEncode, encode_length, list_length}, @@ -11,6 +11,7 @@ use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, BTreeSet}; use crate::constants::{EMPTY_BLOCK_ACCESS_LIST_HASH, SYSTEM_ADDRESS}; +use crate::types::Code; use crate::utils::keccak; /// Encode a slice of items in sorted order without cloning. @@ -1591,6 +1592,81 @@ impl BlockAccessListRecorder { } } +/// Per-field delta for a single account, synthesized directly from a [`BlockAccessList`]. +/// +/// Each optional field is `Some` only when the BAL records a change for that field. +/// Fields absent from the BAL are left as `None` so that Stage C writes only the +/// deltas it knows about, without fabricating defaults for unchanged state. +#[derive(Debug, Clone, Default)] +pub struct BalSynthesisItem { + pub address: Address, + pub balance: Option, + pub nonce: Option, + pub code_hash: Option, + pub code: Option, + pub added_storage: FxHashMap, +} + +/// Converts a [`BlockAccessList`] into a per-account map of field-level deltas. +/// +/// Accounts that appear only via `storage_reads` (no balance/nonce/code/storage +/// changes) are omitted: Stage B weight is 0, Stage C field writes all no-op, +/// and the witness builder captures them from `logger.state_accessed`. +pub fn synthesize_bal_updates(bal: &BlockAccessList) -> FxHashMap { + let mut result = FxHashMap::default(); + + for account in bal.accounts() { + // Skip accounts with no actual changes (storage_reads only). + if account.balance_changes.is_empty() + && account.nonce_changes.is_empty() + && account.code_changes.is_empty() + && account.storage_changes.is_empty() + { + continue; + } + + let balance = account.balance_changes.last().map(|c| c.post_balance); + let nonce = account.nonce_changes.last().map(|c| c.post_nonce); + let code = account.code_changes.last().map(|c| { + let hash = keccak(&c.new_code); + Code::from_bytecode_unchecked(c.new_code.clone(), hash) + }); + let code_hash = code.as_ref().map(|c| c.hash); + + let mut added_storage: FxHashMap = FxHashMap::default(); + for sc in &account.storage_changes { + debug_assert!( + !sc.slot_changes.is_empty(), + "SlotChange has empty slot_changes; canonical BAL ordering requires at least one entry" + ); + if sc.slot_changes.is_empty() { + continue; + } + let key = H256::from_uint(&sc.slot); + let value = sc + .slot_changes + .last() + .expect("slot_changes non-empty: checked above") + .post_value; + added_storage.insert(key, value); + } + + result.insert( + account.address, + BalSynthesisItem { + address: account.address, + balance, + nonce, + code_hash, + code, + added_storage, + }, + ); + } + + result +} + #[cfg(test)] mod decode_tests { use super::*; @@ -1676,3 +1752,244 @@ mod decode_tests { ); } } + +#[cfg(test)] +mod synthesize_tests { + use super::*; + use bytes::Bytes; + use ethereum_types::Address; + + fn addr(b: u8) -> Address { + let mut a = Address::zero(); + a.0[19] = b; + a + } + + fn make_bal(account: AccountChanges) -> BlockAccessList { + BlockAccessList::from_accounts(vec![account]) + } + + /// Accounts with only `storage_reads` must be skipped entirely. + #[test] + fn synthesize_skips_read_only_account() { + let mut account = AccountChanges::new(addr(1)); + account.storage_reads = vec![U256::from(42)]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + assert!( + result.is_empty(), + "expected empty map for read-only account" + ); + } + + /// A single storage write with no other deltas. + #[test] + fn synthesize_pure_storage_write() { + let sc = + SlotChange::with_changes(U256::from(5), vec![StorageChange::new(0, U256::from(42))]); + let mut account = AccountChanges::new(addr(2)); + account.storage_changes = vec![sc]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(2)).expect("expected entry"); + assert!(item.balance.is_none()); + assert!(item.nonce.is_none()); + assert!(item.code_hash.is_none()); + assert!(item.code.is_none()); + let key = H256::from_uint(&U256::from(5)); + assert_eq!(item.added_storage.get(&key), Some(&U256::from(42))); + } + + /// Balance-only change: nonce, code, and storage must be None/empty. + /// Regression case for partial-info corruption (Blocker 1). + #[test] + fn synthesize_balance_only_no_nonce_no_code() { + let mut account = AccountChanges::new(addr(3)); + account.balance_changes = vec![BalanceChange::new(2, U256::from(100))]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(3)).expect("expected entry"); + assert_eq!(item.balance, Some(U256::from(100))); + assert!(item.nonce.is_none()); + assert!(item.code_hash.is_none()); + assert!(item.code.is_none()); + assert!(item.added_storage.is_empty()); + } + + /// Nonce-only change. + #[test] + fn synthesize_nonce_only() { + let mut account = AccountChanges::new(addr(4)); + account.nonce_changes = vec![NonceChange::new(2, 7)]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(4)).expect("expected entry"); + assert!(item.balance.is_none()); + assert_eq!(item.nonce, Some(7)); + assert!(item.code_hash.is_none()); + assert!(item.code.is_none()); + assert!(item.added_storage.is_empty()); + } + + /// Code-only change: code_hash must equal keccak of the bytecode. + #[test] + fn synthesize_code_only() { + let bytecode = Bytes::from_static(b"\xff\x00"); + let mut account = AccountChanges::new(addr(5)); + account.code_changes = vec![CodeChange::new(2, bytecode.clone())]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(5)).expect("expected entry"); + assert!(item.balance.is_none()); + assert!(item.nonce.is_none()); + let expected_hash = keccak(&bytecode); + assert_eq!(item.code_hash, Some(expected_hash)); + assert!(item.code.is_some()); + assert_eq!(item.code.as_ref().unwrap().bytecode, bytecode); + assert!(item.added_storage.is_empty()); + } + + /// When multiple balance changes exist, the last one wins. + #[test] + fn synthesize_takes_last_balance() { + let mut account = AccountChanges::new(addr(6)); + account.balance_changes = vec![ + BalanceChange::new(1, U256::from(50)), + BalanceChange::new(5, U256::from(200)), + ]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(6)).expect("expected entry"); + assert_eq!(item.balance, Some(U256::from(200))); + } + + /// When multiple nonce changes exist, the last one wins. + #[test] + fn synthesize_takes_last_nonce() { + let mut account = AccountChanges::new(addr(7)); + account.nonce_changes = vec![NonceChange::new(1, 3), NonceChange::new(5, 9)]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(7)).expect("expected entry"); + assert_eq!(item.nonce, Some(9)); + } + + /// When multiple code changes exist, the last one determines code_hash and code. + #[test] + fn synthesize_takes_last_code_and_hashes() { + let first = Bytes::from_static(b"\x60\x00"); + let last = Bytes::from_static(b"\xff\x00"); + let mut account = AccountChanges::new(addr(8)); + account.code_changes = vec![CodeChange::new(1, first), CodeChange::new(5, last.clone())]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(8)).expect("expected entry"); + let expected_hash = keccak(&last); + assert_eq!(item.code_hash, Some(expected_hash)); + assert_eq!(item.code.as_ref().unwrap().bytecode, last); + } + + /// When a slot has multiple StorageChanges, the last post_value wins. + #[test] + fn synthesize_slot_last_post_value() { + let sc = SlotChange::with_changes( + U256::from(10), + vec![ + StorageChange::new(0, U256::from(1)), + StorageChange::new(7, U256::from(99)), + ], + ); + let mut account = AccountChanges::new(addr(9)); + account.storage_changes = vec![sc]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(9)).expect("expected entry"); + let key = H256::from_uint(&U256::from(10)); + assert_eq!(item.added_storage.get(&key), Some(&U256::from(99))); + } + + /// A storage write ending in zero must be kept (Stage B routes to trie.remove). + #[test] + fn synthesize_zero_storage_kept() { + let sc = SlotChange::with_changes(U256::from(3), vec![StorageChange::new(0, U256::zero())]); + let mut account = AccountChanges::new(addr(10)); + account.storage_changes = vec![sc]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(10)).expect("expected entry"); + let key = H256::from_uint(&U256::from(3)); + assert_eq!( + item.added_storage.get(&key), + Some(&U256::zero()), + "zero-value storage must be present so Stage B can call trie.remove" + ); + } + + /// A SlotChange with empty slot_changes triggers the debug_assert in debug builds. + /// In release builds the defensive `continue` silently skips the slot. + /// This test covers the debug-build path: the assert fires as expected. + #[test] + #[cfg_attr( + debug_assertions, + should_panic(expected = "SlotChange has empty slot_changes") + )] + fn synthesize_skips_when_slot_changes_empty() { + let empty_sc = SlotChange::new(U256::from(1)); + let mut account = AccountChanges::new(addr(11)); + account.storage_changes = vec![empty_sc]; + // Add a balance change so the account itself is not skipped. + account.balance_changes = vec![BalanceChange::new(1, U256::from(5))]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + // Release-only path: outer entry present, but the empty slot is absent. + let item = result.get(&addr(11)).expect("expected outer entry"); + let key = H256::from_uint(&U256::from(1)); + assert!( + !item.added_storage.contains_key(&key), + "slot with empty slot_changes must not appear in added_storage" + ); + } + + /// Account creation: all four optionals populated, code_hash matches keccak. + #[test] + fn synthesize_creation() { + let bytecode = Bytes::from_static(b"\x60\x80\x60\x40"); + let mut account = AccountChanges::new(addr(12)); + account.balance_changes = vec![BalanceChange::new(1, U256::from(1000))]; + account.nonce_changes = vec![NonceChange::new(1, 1)]; + account.code_changes = vec![CodeChange::new(1, bytecode.clone())]; + let sc = + SlotChange::with_changes(U256::from(0), vec![StorageChange::new(2, U256::from(7))]); + account.storage_changes = vec![sc]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(12)).expect("expected entry"); + assert_eq!(item.balance, Some(U256::from(1000))); + assert_eq!(item.nonce, Some(1)); + let expected_hash = keccak(&bytecode); + assert_eq!(item.code_hash, Some(expected_hash)); + assert!(item.code.is_some()); + assert_eq!(item.code.as_ref().unwrap().bytecode, bytecode); + let key = H256::from_uint(&U256::zero()); + assert_eq!(item.added_storage.get(&key), Some(&U256::from(7))); + } + + /// EIP-6780 same-tx-created selfdestruct: only balance=0 is recorded. + /// Stage C writes balance=0 and leaves pre-state nonce/code intact. + /// EIP-161 removes the account only if pre-state nonce was 0 and code was empty + /// (i.e. a fresh account created in the same block). Otherwise trie keeps the + /// entry with balance=0 + original nonce/code, matching the streaming flow. + #[test] + fn synthesize_selfdestruct_collapses() { + let mut account = AccountChanges::new(addr(13)); + account.balance_changes = vec![BalanceChange::new(5, U256::zero())]; + let bal = make_bal(account); + let result = synthesize_bal_updates(&bal); + let item = result.get(&addr(13)).expect("expected entry"); + assert_eq!(item.balance, Some(U256::zero())); + assert!(item.nonce.is_none()); + assert!(item.code_hash.is_none()); + assert!(item.code.is_none()); + assert!(item.added_storage.is_empty()); + } +} diff --git a/crates/common/types/mod.rs b/crates/common/types/mod.rs index 3950ffcb95f..a732159b90f 100644 --- a/crates/common/types/mod.rs +++ b/crates/common/types/mod.rs @@ -21,6 +21,7 @@ pub use account::*; pub use account_update::*; pub use blobs_bundle::*; pub use block::*; +pub use block_access_list::{BalSynthesisItem, synthesize_bal_updates}; pub use constants::*; pub use fork_id::*; pub use genesis::*; diff --git a/crates/vm/backends/levm/mod.rs b/crates/vm/backends/levm/mod.rs index 861ef981e54..26b459401a7 100644 --- a/crates/vm/backends/levm/mod.rs +++ b/crates/vm/backends/levm/mod.rs @@ -334,11 +334,14 @@ impl LEVM { )) } + /// `merkleizer` is `Some` on the streaming (non-BAL) path; the BAL validation path + /// passes `None` because the caller merkleizes optimistically from the input BAL and + /// the EVM-side `bal_to_account_updates` send is then redundant work. pub fn execute_block_pipeline( block: &Block, db: &mut GeneralizedDatabase, vm_type: VMType, - merkleizer: Sender>, + merkleizer: Option>>, queue_length: &AtomicUsize, crypto: &dyn Crypto, header_bal: Option<&BlockAccessList>, @@ -387,7 +390,7 @@ impl LEVM { db, vm_type, bal, - &merkleizer, + merkleizer.as_ref(), queue_length, system_seed, crypto, @@ -524,7 +527,10 @@ impl LEVM { )); } - // Sequential path (existing code, for block production and non-Amsterdam) + // Sequential path (existing code, for block production and non-Amsterdam). + // The non-BAL caller always provides a Sender; the BAL path returned above. + let merkleizer = merkleizer + .expect("sequential execution path requires a merkleizer Sender (non-BAL caller)"); if is_amsterdam { db.enable_bal_recording(); // Set index 0 for pre-execution phase (system contracts) @@ -1000,7 +1006,7 @@ impl LEVM { db: &mut GeneralizedDatabase, vm_type: VMType, bal: &BlockAccessList, - merkleizer: &Sender>, + merkleizer: Option<&Sender>>, queue_length: &AtomicUsize, system_seed: Arc, crypto: &dyn Crypto, @@ -1029,13 +1035,16 @@ impl LEVM { "execute_block_parallel invoked on non-Amsterdam block" ); - // 1. Convert BAL → AccountUpdates and send to merkleizer (single batch) - // This covers ALL state changes: system calls, txs, withdrawals. - let account_updates = Self::bal_to_account_updates(bal, store.as_ref())?; - merkleizer - .send(account_updates) - .map_err(|e| EvmError::Custom(format!("merkleizer send failed: {e}")))?; - queue_length.fetch_add(1, Ordering::Relaxed); + // 1. Convert BAL → AccountUpdates and send to merkleizer (single batch). + // Skipped when the caller merkleizes optimistically from the input BAL; the + // conversion is then redundant work (and does pre-state reads we don't need). + if let Some(merkleizer) = merkleizer { + let account_updates = Self::bal_to_account_updates(bal, store.as_ref())?; + merkleizer + .send(account_updates) + .map_err(|e| EvmError::Custom(format!("merkleizer send failed: {e}")))?; + queue_length.fetch_add(1, Ordering::Relaxed); + } // Build a checklist of all BAL storage_reads. Entries are removed as they // are actually read during execution phases. Anything left over is extraneous. diff --git a/crates/vm/backends/mod.rs b/crates/vm/backends/mod.rs index df19f90da13..ae73d2d8a08 100644 --- a/crates/vm/backends/mod.rs +++ b/crates/vm/backends/mod.rs @@ -109,7 +109,7 @@ impl Evm { pub fn execute_block_pipeline( &mut self, block: &Block, - merkleizer: Sender>, + merkleizer: Option>>, queue_length: &AtomicUsize, bal: Option<&BlockAccessList>, ) -> Result<(BlockExecutionResult, Option), EvmError> {