lambdaclass · edg-l · Apr 28, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
@@ -2,6 +2,10 @@
 
 ## Perf
 
+### 2026-05-14
+
+- Reduce BAL parallel-path overhead: overlap merkleization with execution, memoize per-BAL code derivation, swap `CachingDatabase` `RwLock<HashMap>` for `DashMap` to remove rayon-worker contention, and move per-tx BAL validation inside the exec closure [#6639](https://github.com/lambdaclass/ethrex/pull/6639)
+
 ### 2026-04-27
 
 - Reduce peak disk usage during snap sync by moving SST files into the temp DB instead of copying [#6532](https://github.com/lambdaclass/ethrex/pull/6532)

@@ -973,13 +973,11 @@ pub async fn import_blocks_bench(
                     _ => warn!("Failed to add block {number} with hash {hash:#x}"),
                 })?;
 
-            // TODO: replace this
-            // This sleep is because we have a background process writing to disk the last layer
-            // And until it's done we can't execute the new block
-            // Because this wants to compare against running a real node in terms of reported performance
-            // It takes less than 500ms, so this is good enough, but we should report the performance
-            // without taking into account that wait.
-            tokio::time::sleep(Duration::from_millis(500)).await;
+            // Wait for the trie-update worker's Phase 2 (disk write of bottom-most
+            // diff layer) and Phase 3 (in-memory layer removal) for the block just
+            // applied to drain. Keeps the next block's per-block timer from
+            // absorbing the previous block's background persistence cost.
+            store.wait_for_persistence_idle().await?;
         }
 
         // Make head canonical and label all special blocks correctly.

@@ -859,36 +859,40 @@ impl Blockchain {
         const NUM_WORKERS: usize = 16;
         let parent_state_root = parent_header.state_root;
 
-        // === Stage A: Drain + accumulate all AccountUpdates ===
-        // BAL guarantees completeness, so we block until execution finishes.
-        let mut all_updates: FxHashMap<Address, AccountUpdate> = FxHashMap::default();
-        for updates in rx {
-            let current_length = queue_length.fetch_sub(1, Ordering::Acquire);
-            *max_queue_length = current_length.max(*max_queue_length);
-            for update in updates {
-                match all_updates.entry(update.address) {
-                    Entry::Vacant(e) => {
-                        e.insert(update);
-                    }
-                    Entry::Occupied(mut e) => {
-                        e.get_mut().merge(update);
-                    }
-                }
+        // === Stage A: receive the single BAL-derived batch ===
+        // execute_block_parallel calls bal_to_account_updates BEFORE the rayon tx
+        // loop and sends exactly one Vec<AccountUpdate>. Receiving once (instead of
+        // draining until channel close = exec end) lets Stage B's parallel storage
+        // roots overlap with parallel exec instead of serializing after it.
+        //
+        // BAL accounts are unique by address (one entry per touched address), so
+        // no merge step is needed — skip the FxHashMap detour entirely.
+        let updates: Vec<AccountUpdate> = match rx.recv() {
+            Ok(updates) => {
+                let current_length = queue_length.fetch_sub(1, Ordering::Acquire);
+                *max_queue_length = current_length.max(*max_queue_length);
+                updates
             }
-        }
+            Err(_) => {
+                // Channel closed without a message — execution failed before
+                // bal_to_account_updates ran. Return empty work so the exec
+                // error surfaces in execution_result rather than being masked.
+                Vec::new()
+            }
+        };
 
-        // Extract witness accumulator before consuming updates
+        // Witness accumulator (clone since we move `updates` into Stage B below).
         let accumulated_updates = if self.options.precompute_witnesses {
-            Some(all_updates.values().cloned().collect::<Vec<_>>())
+            Some(updates.clone())
         } else {
             None
         };
 
-        // Extract code updates and build work items with pre-hashed addresses
+        // Build work items with pre-hashed addresses + extract code updates.
         let mut code_updates: Vec<(H256, Code)> = Vec::new();
-        let mut accounts: Vec<(H256, AccountUpdate)> = Vec::with_capacity(all_updates.len());
-        for (addr, update) in all_updates {
-            let hashed = keccak(addr);
+        let mut accounts: Vec<(H256, AccountUpdate)> = Vec::with_capacity(updates.len());
+        for update in updates {
+            let hashed = keccak(update.address);
             if let Some(info) = &update.info
                 && let Some(code) = &update.code
             {