diff --git a/Cargo.lock b/Cargo.lock index 8b8d3a7ccaf..f6fcdc605da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3973,6 +3973,29 @@ dependencies = [ "tracing", ] +[[package]] +name = "ethrex-evm" +version = "11.0.0" +dependencies = [ + "bytes", + "clap", + "ethrex-blockchain", + "ethrex-common", + "ethrex-crypto", + "ethrex-levm", + "ethrex-storage", + "ethrex-vm", + "eyre", + "hex", + "regex", + "rustc-hash 2.1.2", + "secp256k1", + "serde", + "serde_json", + "tokio", + "walkdir", +] + [[package]] name = "ethrex-guest-program" version = "11.0.0" diff --git a/Cargo.toml b/Cargo.toml index c12dab3811e..476e691c280 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ members = [ "benches", "cmd/ethrex", + "cmd/ethrex-evm", "crates/blockchain", "crates/blockchain/dev", "crates/common", @@ -132,6 +133,8 @@ tower-http = { version = "0.6.2", features = ["cors"] } indexmap = { version = "2.11.4" } k256 = "0.13.4" anyhow = "1.0.86" +regex = "1" +walkdir = "2" rocksdb = { version = "0.24.0", default-features = false, features = [ "bindgen-runtime", diff --git a/Makefile b/Makefile index 059c814c4a1..81c567a41cd 100644 --- a/Makefile +++ b/Makefile @@ -57,7 +57,7 @@ dev: ## πŸƒ Run the ethrex client in DEV_MODE with the InMemory Engine --dev \ --datadir memory -ETHEREUM_PACKAGE_REVISION := e4b330579580477814cfaebb004e354f7eb396f4 +ETHEREUM_PACKAGE_REVISION := 71b02f6e4a57ad19629c729cb2989e7f868866d2 ETHEREUM_PACKAGE_DIR := ethereum-package checkout-ethereum-package: ## πŸ“¦ Checkout specific Ethereum package revision diff --git a/cmd/ethrex-evm/Cargo.toml b/cmd/ethrex-evm/Cargo.toml new file mode 100644 index 00000000000..6e1edee7481 --- /dev/null +++ b/cmd/ethrex-evm/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "ethrex-evm" +version.workspace = true +edition.workspace = true +authors.workspace = true +documentation.workspace = true +license.workspace = true + +[[bin]] +name = "ethrex-evm" +path = "src/main.rs" + +[dependencies] +ethrex-blockchain = { workspace = true } +ethrex-common = { workspace = true } +ethrex-crypto = { workspace = true } +ethrex-storage = { workspace = true } +ethrex-vm = { workspace = true } +ethrex-levm = { workspace = true } + +bytes.workspace = true +clap = { workspace = true } +serde = { workspace = true } +serde_json.workspace = true +hex.workspace = true +eyre.workspace = true +rustc-hash.workspace = true +regex.workspace = true +walkdir.workspace = true +secp256k1 = { workspace = true } +# add_initial_state is async, so we need a tokio runtime +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } + +[dev-dependencies] +ethrex-common = { workspace = true } +ethrex-levm = { workspace = true } +ethrex-storage = { workspace = true } +hex.workspace = true +serde_json.workspace = true diff --git a/cmd/ethrex-evm/README.md b/cmd/ethrex-evm/README.md new file mode 100644 index 00000000000..53147e7d20a --- /dev/null +++ b/cmd/ethrex-evm/README.md @@ -0,0 +1,116 @@ +# ethrex-evm + +A standalone EVM CLI for the `ethrex` execution client, intended as a +drop-in differential-fuzzing target for +[`holiman/goevmlab`](https://github.com/holiman/goevmlab). + +The binary exposes one subcommand: `statetest`. It accepts the exact +invocation goevmlab's `evms/geth.go` adapter uses, reads +GeneralStateTest JSON, runs each `(fork, subtest)` through LEVM, and +streams an EIP-3155 trace plus a `{"stateRoot": "0x..."}` terminator +on stderr. + +## Build + +```bash +cargo build -p ethrex-evm --bin ethrex-evm --release +# binary at target/release/ethrex-evm +``` + +## Usage + +```bash +ethrex-evm statetest --trace --trace.format=json \ + --trace.nomemory=true --trace.noreturndata=true \ + path/to/StateTest.json +``` + +Stdin batch mode (one path per line, EOF or blank line terminates): + +```bash +echo "path/to/test1.json +path/to/test2.json" | ethrex-evm statetest --trace --trace.format=json +``` + +### Flags + +| Flag | Default | Notes | +|---|---|---| +| `--trace` | off | Enable EIP-3155 streaming. Bare boolean. | +| `--trace.format` | `json` | Only `json` accepted; other values exit 1. | +| `--trace.nomemory` | `true` | Suppress `memory` in steps. | +| `--trace.memory` | `false` | Opt-in alias for the inverse. | +| `--trace.nostack` | `false` | Suppress `stack`. | +| `--trace.noreturndata` | `true` | Suppress `returnData`. | +| `--trace.nostorage` | `false` | Suppress storage diffs. | +| `--statetest.fork` | _all forks_ | Limit to one fork (e.g. `Prague`). | +| `--statetest.index` | _all subtests_ | Limit to one subtest by index. | +| `--run` | _match all_ | Regex applied to test names. | + +### EIP-3155 line schema + +Each opcode step is one `\n`-terminated JSON object: + +```json +{"pc":4,"op":1,"gas":"0x2540be3fa","gasCost":"0x3","memSize":0,"stack":["0x1","0x1"],"depth":1,"refund":0,"opName":"ADD"} +``` + +| Field | Encoding | Notes | +|---|---|---| +| `pc` | number | Program counter, decimal | +| `op` | number | Raw byte value (e.g. `96` for PUSH1) | +| `opName` | string | Mnemonic; fallback `"opcode 0xNN not defined"` | +| `gas` | hex string | Gas remaining before opcode | +| `gasCost` | hex string | Charged for this opcode | +| `memSize` | number | Bytes | +| `stack` | array of hex strings | Bottom-first; omitted when `--trace.nostack=true` | +| `memory` | hex string | Single contiguous blob; omitted unless enabled | +| `returnData` | hex string | Omitted unless enabled | +| `depth` | number | Call depth (1 = top) | +| `refund` | number | Refund counter | +| `error` | string | Present iff the step errored | + +Summary line (after the last opcode): + +```json +{"output":"","gasUsed":"0x...","error":"..."} +``` + +State-root terminator (after the summary): + +```json +{"stateRoot": "0x<64 hex chars>"} +``` + +The literal colon-space in `"stateRoot": "` is required for goevmlab's +`ParseStateRoot` byte search. + +## Supported transaction shapes + +GeneralStateTest vectors using any of these execute end-to-end: + +- Legacy / EIP-1559 / EIP-2930 (envelope unified via `EIP1559Transaction`). +- EIP-4844 blob txs (`blobVersionedHashes`, `maxFeePerBlobGas`, `currentExcessBlobGas`). +- EIP-7702 setcode txs (`authorizationList` with `v` or `yParity`). +- Vectors that ship a pre-derived `sender` field instead of `secretKey`. + +## goevmlab integration + +`ethrex-evm` is binary-compatible with goevmlab's invocation contract. +To register it as a fuzzing target in a goevmlab fork: + +1. Build the binary: `cargo build -p ethrex-evm --bin ethrex-evm --release`. +2. Add a goevmlab `evms/ethrex.go` adapter modeled after `evms/geth.go`, + pointing at the binary path. +3. Run goevmlab's state-fuzzer β€” it will diff `ethrex-evm`'s output + against the other configured clients. + +The upstream goevmlab adapter PR is tracked separately; this repo only +ships the binary. + +## Future work + +- `run` subcommand for raw-bytecode debugging. +- `t8n` subcommand. +- CI workflow running ethrex-evm against a goevmlab fuzz corpus nightly. +- Upstream `evms/ethrex.go` in goevmlab. diff --git a/cmd/ethrex-evm/src/lib.rs b/cmd/ethrex-evm/src/lib.rs new file mode 100644 index 00000000000..4a00e7207f3 --- /dev/null +++ b/cmd/ethrex-evm/src/lib.rs @@ -0,0 +1,6 @@ +pub mod statetest; + +pub use statetest::StatetestArgs; +pub use statetest::state_root::{ + build_generalized_db, compute_post_state_root, eoa_info, minimal_chain_config, setup_store, +}; diff --git a/cmd/ethrex-evm/src/main.rs b/cmd/ethrex-evm/src/main.rs new file mode 100644 index 00000000000..67fb569bd72 --- /dev/null +++ b/cmd/ethrex-evm/src/main.rs @@ -0,0 +1,23 @@ +use clap::{Parser, Subcommand}; + +use ethrex_evm::statetest::{StatetestArgs, runner}; + +fn main() -> eyre::Result<()> { + let cli = Cli::parse(); + match cli.command { + Command::Statetest(args) => runner::run(args), + } +} + +#[derive(Parser)] +#[command(name = "ethrex-evm", about = "EVM execution tool")] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Subcommand)] +enum Command { + /// Execute EF-style state tests and stream EIP-3155 traces to stderr. + Statetest(StatetestArgs), +} diff --git a/cmd/ethrex-evm/src/statetest/error_map.rs b/cmd/ethrex-evm/src/statetest/error_map.rs new file mode 100644 index 00000000000..88305aed841 --- /dev/null +++ b/cmd/ethrex-evm/src/statetest/error_map.rs @@ -0,0 +1,92 @@ +//! Maps LEVM [`VMError`] variants to the error strings emitted by geth's EVM. +//! +//! Strings are taken from `go-ethereum/core/vm/errors.go`. Keeping them +//! identical allows goevmlab diff tools to match traces across implementations. + +use ethrex_levm::errors::{ExceptionalHalt, TxValidationError, VMError}; + +/// Returns the geth-compatible error string for a LEVM [`VMError`]. +/// +/// For variants whose [`Display`] impl already matches the geth string exactly +/// (notably `StackUnderflow` and `StackOverflow` which were made geth-compatible +/// in Phase 4a), the Display output is used directly. +pub fn vm_error_to_geth_string(err: &VMError) -> String { + match err { + VMError::RevertOpcode => "execution reverted".to_owned(), + VMError::ExceptionalHalt(halt) => exceptional_halt_to_geth_string(halt), + VMError::TxValidation(tv) => tx_validation_to_geth_string(tv), + VMError::Internal(internal) => internal.to_string(), + } +} + +/// Maps `TxValidationError` variants to the strings geth emits from +/// `core/types/transaction.go` and `core/state_transition.go`. Variants without +/// a clear geth analog fall through to LEVM Display. +fn tx_validation_to_geth_string(tv: &TxValidationError) -> String { + match tv { + TxValidationError::IntrinsicGasTooLow + | TxValidationError::IntrinsicGasBelowFloorGasCost => "intrinsic gas too low".to_owned(), + TxValidationError::NonceMismatch { actual, expected } if actual < expected => { + "nonce too low".to_owned() + } + TxValidationError::NonceMismatch { .. } => "nonce too high".to_owned(), + TxValidationError::NonceIsMax => "nonce has max value".to_owned(), + TxValidationError::InsufficientAccountFunds => { + "insufficient funds for gas * price + value".to_owned() + } + TxValidationError::InsufficientMaxFeePerGas => { + "max fee per gas less than block base fee".to_owned() + } + TxValidationError::PriorityGreaterThanMaxFeePerGas { .. } => { + "max priority fee per gas higher than max fee per gas".to_owned() + } + TxValidationError::InsufficientMaxFeePerBlobGas { .. } => { + "max fee per blob gas less than block blob gas fee".to_owned() + } + TxValidationError::Type3TxPreFork => "blob tx used before Cancun".to_owned(), + TxValidationError::Type3TxZeroBlobs => "blobless blob transaction".to_owned(), + TxValidationError::Type3TxInvalidBlobVersionedHash => "invalid versioned hash".to_owned(), + TxValidationError::Type3TxBlobCountExceeded { .. } => "too many blobs".to_owned(), + TxValidationError::Type3TxContractCreation => { + "blob transaction is a contract creation".to_owned() + } + TxValidationError::Type4TxPreFork => "setcode tx used before Prague".to_owned(), + TxValidationError::Type4TxAuthorizationListIsEmpty => { + "EIP-7702 transaction with empty auth list".to_owned() + } + TxValidationError::Type4TxContractCreation => { + "setcode tx is a contract creation".to_owned() + } + TxValidationError::InitcodeSizeExceeded { .. } => "max initcode size exceeded".to_owned(), + TxValidationError::GasAllowanceExceeded { .. } => "gas limit reached".to_owned(), + TxValidationError::SenderNotEOA(_) => "sender not an eoa".to_owned(), + // Fall through to LEVM Display for variants without a clean geth analog. + TxValidationError::GasLimitPriceProductOverflow + | TxValidationError::TxMaxGasLimitExceeded { .. } => tv.to_string(), + } +} + +fn exceptional_halt_to_geth_string(halt: &ExceptionalHalt) -> String { + match halt { + // Phase 4a gave these variants a geth-compatible Display: use it directly. + ExceptionalHalt::StackUnderflow { .. } => halt.to_string(), + ExceptionalHalt::StackOverflow { .. } => halt.to_string(), + + ExceptionalHalt::OutOfGas => "out of gas".to_owned(), + ExceptionalHalt::InvalidJump => "invalid jump destination".to_owned(), + ExceptionalHalt::OpcodeNotAllowedInStaticContext => "write protection".to_owned(), + ExceptionalHalt::InvalidContractPrefix => { + "invalid code: must not begin with 0xef".to_owned() + } + // geth emits "invalid opcode: OPCODE_NAME"; without opcode-name info we + // emit the shorter form which is still valid per geth's error.go. + ExceptionalHalt::InvalidOpcode => "invalid opcode".to_owned(), + ExceptionalHalt::AddressAlreadyOccupied => "contract address collision".to_owned(), + ExceptionalHalt::ContractOutputTooBig => "max code size exceeded".to_owned(), + ExceptionalHalt::OutOfBounds => "return data out of bounds".to_owned(), + ExceptionalHalt::VeryLargeNumber => "gas uint64 overflow".to_owned(), + // Precompile errors are not a top-level geth error string; fall through + // to LEVM Display which includes the precompile-specific message. + ExceptionalHalt::Precompile(_) => halt.to_string(), + } +} diff --git a/cmd/ethrex-evm/src/statetest/mod.rs b/cmd/ethrex-evm/src/statetest/mod.rs new file mode 100644 index 00000000000..a4d4ec41878 --- /dev/null +++ b/cmd/ethrex-evm/src/statetest/mod.rs @@ -0,0 +1,150 @@ +//! # `statetest` subcommand β€” design notes (Phase 3 spike) +//! +//! ## Phase 3.2 decision: Option B β€” inline types +//! +//! The `ef_tests-state` crate lives in `tooling/Cargo.toml`, which is a **separate Cargo +//! workspace** from the main `ethrex_2/Cargo.toml`. Depending on it from a main-workspace crate +//! would require either: +//! +//! - Adding a path dependency that crosses workspace boundaries (unsupported by Cargo without +//! workspace-level `[patch]` gymnastics), or +//! - Publishing the crate β€” which is not the case here. +//! +//! Additionally, `ef_tests-state` pulls in `revm` (v27) and `simd-json`, neither of which belongs +//! in the main workspace. Therefore **Option B (inline the required types)** is the correct +//! approach: we define the minimal `StateTest` parsing types we need inside this module rather +//! than taking on the transitive dependency. +//! +//! ## Phase 3.1 β€” exact API call sequence per subtest +//! +//! The call sequence the Phase 4 `statetest` CLI will follow for each (fork, subtest) pair: +//! +//! ```text +//! 1. Parse test JSON β†’ StateTest (types.rs inlined types, serde_json). +//! +//! 2. Build pre_state: FxHashMap from StateTest::pre. +//! +//! 3. Build and execute the VM: +//! a. Construct a Genesis from pre_state (accounts β†’ GenesisAccount alloc). +//! b. let mut store = Store::new("", EngineType::InMemory)?; +//! c. store.add_initial_state(genesis).await?; // async +//! d. let block_header = genesis.get_block().header; +//! e. let vm_db: DynVmDatabase = +//! Box::new(StoreVmDatabase::new(store.clone(), block_header)?); +//! f. let mut db = GeneralizedDatabase::new(Arc::new(vm_db)); +//! g. Build Environment + TxKind from the subtest transaction fields. +//! h. let mut vm = VM::new(env, &mut db); +//! i. Optionally attach streaming EIP-3155 tracer. +//! j. vm.execute(); +//! +//! 4. Extract state transitions: +//! let updates = LEVM::get_state_transitions(&mut db)?; +//! // LEVM::get_state_transitions is a *static function* on LEVM taking &mut GeneralizedDatabase. +//! // Source: crates/vm/backends/levm/mod.rs:2090 +//! +//! 5. Compute post-state root: +//! let root = compute_post_state_root(&pre_state, &updates)?; +//! // Internally: applies updates via Store::apply_account_updates_batch (sync, store.rs:1753) +//! // against the genesis block hash. Returns state_trie_hash from AccountUpdatesList. +//! +//! 6. Emit result: +//! println!("{}", serde_json::to_string(&PostStateOutput { state_root: root })?); +//! ``` +//! +//! ## Async boundary +//! +//! `Store::add_initial_state` is `async` (`store.rs:2110`), but +//! `Store::apply_account_updates_batch` is sync (`store.rs:1753`). Therefore +//! `compute_post_state_root` spins up a single-shot `tokio::runtime::Runtime` internally to drive +//! the async setup, then uses the sync trie update path. The public API remains a plain `fn`. +//! +//! ## Open questions discovered during the spike +//! +//! - None blocking Phase 4. `apply_account_updates_batch` is public and accessible without +//! any API changes. + +pub mod error_map; +pub mod runner; +pub mod state_root; +pub mod types; + +use std::path::PathBuf; + +/// Arguments for the `statetest` subcommand. +/// +/// Flag names match geth's `cmd/evm/statetest` exactly so that goevmlab +/// can invoke this binary as a drop-in replacement. +/// +/// Authoritative reference: `go-ethereum/cmd/evm/main.go` flag definitions. +/// - `trace.nomemory` default: `true` (memory disabled by default) +/// - `trace.noreturndata` default: `true` (return data disabled by default) +/// - `trace.nostack` default: `false` (stack enabled by default) +/// - `trace.nostorage` default: `false` (storage enabled by default) +/// +/// All boolean flags use `num_args(0..=1)` so they accept both `--flag` and +/// `--flag=true` / `--flag=false` β€” matching goevmlab's invocation style which +/// passes e.g. `--trace.nomemory=true`. +#[derive(clap::Args, Debug, Clone)] +pub struct StatetestArgs { + /// Enable EIP-3155 structured-logging trace output on stderr. + /// Bare boolean (no value accepted); presence β‡’ true, absence β‡’ false. + /// This is what goevmlab passes: `statetest --trace `. + #[arg(long = "trace", action = clap::ArgAction::SetTrue)] + pub trace: bool, + + /// Trace output format. + /// Only `"json"` is currently supported; other values cause exit(1). + /// Geth default: `"json"`. + #[arg(long = "trace.format", default_value = "json")] + pub trace_format: String, + + /// Include memory in each trace step (opt-in; geth default: disabled). + #[arg(long = "trace.memory", default_value = "false", num_args(0..=1), require_equals = false, value_parser = parse_bool)] + pub trace_memory: bool, + + /// Disable memory in trace output (opt-out alias; geth default: true). + #[arg(long = "trace.nomemory", default_value = "true", num_args(0..=1), require_equals = false, value_parser = parse_bool)] + pub trace_nomemory: bool, + + /// Disable stack in trace output (geth default: false). + #[arg(long = "trace.nostack", default_value = "false", num_args(0..=1), require_equals = false, value_parser = parse_bool)] + pub trace_nostack: bool, + + /// Disable return data in trace output (geth default: true). + #[arg(long = "trace.noreturndata", default_value = "true", num_args(0..=1), require_equals = false, value_parser = parse_bool)] + pub trace_noreturndata: bool, + + /// Disable storage capture in trace output (geth default: false). + #[arg(long = "trace.nostorage", default_value = "false", num_args(0..=1), require_equals = false, value_parser = parse_bool)] + pub trace_nostorage: bool, + + /// Only run tests for the specified fork (e.g. `Prague`, `Cancun`). + #[arg(long = "statetest.fork")] + pub statetest_fork: Option, + + /// Only run the subtest at the given index (0-based). + #[arg(long = "statetest.index")] + pub statetest_index: Option, + + /// Regex filter applied to test names. + #[arg(long = "run")] + pub run: Option, + + /// Paths to JSON state-test files or directories. + /// When empty, paths are read from stdin (batch mode), one per line. + #[arg()] + pub paths: Vec, +} + +/// Parses `"true"` / `"false"` (case-insensitive) and bare flag invocations. +/// +/// When the flag is specified without a value (e.g. `--trace.nomemory`), clap +/// passes the `default_value` string. When specified as `--trace.nomemory=true` +/// goevmlab style, this parser handles both forms. +fn parse_bool(s: &str) -> Result { + match s.to_lowercase().as_str() { + "true" | "1" | "yes" => Ok(true), + "false" | "0" | "no" => Ok(false), + other => Err(format!("expected boolean (true/false), got: {other}")), + } +} diff --git a/cmd/ethrex-evm/src/statetest/runner.rs b/cmd/ethrex-evm/src/statetest/runner.rs new file mode 100644 index 00000000000..16d8645b83d --- /dev/null +++ b/cmd/ethrex-evm/src/statetest/runner.rs @@ -0,0 +1,547 @@ +//! Execution loop for the `statetest` subcommand. +//! +//! Reads JSON state-test files, runs each (fork, subtest) pair through LEVM, +//! and streams EIP-3155 trace lines + the goevmlab terminator to stderr. + +use std::{ + collections::BTreeMap, + io::{self, BufRead}, + path::{Path, PathBuf}, +}; + +use ethrex_common::{ + Address, H256, U256, + types::{ + Account, AccountInfo, Code, EIP1559Transaction, EIP7702Transaction, Fork, Genesis, + GenesisAccount, Transaction, TxKind, + tx_fields::{AccessList, AuthorizationTuple}, + }, +}; +use ethrex_crypto::NativeCrypto; +use ethrex_levm::{ + EVMConfig, Environment, + tracing::{LevmOpcodeTracer, OpcodeTracerConfig}, + utils::get_base_fee_per_blob_gas, + vm::{VM, VMType}, +}; +use ethrex_storage::{EngineType, Store}; +use ethrex_vm::backends; +use regex::Regex; +use rustc_hash::FxHashMap; +use walkdir::WalkDir; + +use crate::statetest::{ + StatetestArgs, + error_map::vm_error_to_geth_string, + state_root::{build_generalized_db, compute_post_state_root}, + types::{StateTestAccount, StateTestFile}, +}; + +/// Entry point for the `statetest` subcommand. +pub fn run(args: StatetestArgs) -> eyre::Result<()> { + // Validate trace format early β€” geth exits 1 on unsupported values. + if args.trace && args.trace_format != "json" { + eprintln!( + "unsupported trace format: {}; only \"json\" is supported", + args.trace_format + ); + std::process::exit(1); + } + + let files = collect_files(&args.paths)?; + + let run_re = Regex::new(args.run.as_deref().unwrap_or("")) + .map_err(|e| eyre::eyre!("invalid --run regex: {e}"))?; + + for file in &files { + run_file(file, &args, &run_re).map_err(|e| eyre::eyre!("file {}: {e}", file.display()))?; + } + + Ok(()) +} + +/// Collects `.json` files from paths. Directories are walked recursively. +/// When `paths` is empty, reads newline-separated paths from stdin. +fn collect_files(paths: &[PathBuf]) -> eyre::Result> { + if paths.is_empty() { + // Batch mode: read paths from stdin, one per line. + let stdin = io::stdin(); + let mut files = Vec::new(); + for line in stdin.lock().lines() { + let line = line.map_err(|e| eyre::eyre!("reading stdin: {e}"))?; + let line = line.trim().to_owned(); + if line.is_empty() { + break; + } + files.extend(collect_from_path(Path::new(&line))); + } + Ok(files) + } else { + let mut files = Vec::new(); + for p in paths { + files.extend(collect_from_path(p)); + } + Ok(files) + } +} + +/// Returns all `.json` files under `path` (or just `path` if it is a file). +fn collect_from_path(path: &Path) -> Vec { + if path.is_dir() { + WalkDir::new(path) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| { + e.file_type().is_file() + && e.path().extension().and_then(|s| s.to_str()) == Some("json") + }) + .map(|e| e.path().to_owned()) + .collect() + } else { + vec![path.to_owned()] + } +} + +fn run_file(path: &Path, args: &StatetestArgs, run_re: &Regex) -> eyre::Result<()> { + let src = std::fs::read_to_string(path)?; + let file: StateTestFile = + serde_json::from_str(&src).map_err(|e| eyre::eyre!("parse error: {e}"))?; + + for (test_name, test) in &file { + if !run_re.is_match(test_name) { + continue; + } + for (fork_name, post_vectors) in &test.post { + if let Some(ref wanted_fork) = args.statetest_fork + && fork_name != wanted_fork + { + continue; + } + let fork = parse_fork(fork_name)?; + + for (idx, vector) in post_vectors.iter().enumerate() { + if let Some(wanted_idx) = args.statetest_index + && idx != wanted_idx + { + continue; + } + + run_subtest( + args, + &test.pre, + &test.env, + &test.transaction, + fork, + vector, + idx, + )?; + } + } + } + Ok(()) +} + +fn run_subtest( + args: &StatetestArgs, + pre: &BTreeMap, + env: &crate::statetest::types::TestEnv, + tx_template: &crate::statetest::types::TestTransaction, + fork: Fork, + vector: &crate::statetest::types::PostStateVector, + _idx: usize, +) -> eyre::Result<()> { + // Build pre-state as FxHashMap. + let pre_state = build_pre_state(pre); + + // Build genesis and store for this subtest. + let genesis = build_genesis_from_pre(&pre_state, env); + let rt = tokio::runtime::Runtime::new()?; + let (store, _block_hash) = rt.block_on(async { + let mut store = + Store::new("./temp", EngineType::InMemory).map_err(|e| eyre::eyre!("{e}"))?; + store + .add_initial_state(genesis.clone()) + .await + .map_err(|e| eyre::eyre!("{e}"))?; + let block_hash = genesis.get_block().hash(); + Ok::<_, eyre::Error>((store, block_hash)) + })?; + + let mut db = build_generalized_db(store, &genesis)?; + + // Build the transaction from the template + subtest indexes. + let data = tx_template + .data + .get(vector.indexes.data) + .cloned() + .unwrap_or_default(); + let gas_limit = tx_template + .gas_limit + .get(vector.indexes.gas) + .copied() + .unwrap_or_default(); + let value = tx_template + .value + .get(vector.indexes.value) + .copied() + .unwrap_or(U256::zero()); + + let to = match tx_template.to { + Some(addr) => TxKind::Call(addr), + None => TxKind::Create, + }; + + let access_list: AccessList = parse_access_list(&tx_template.access_lists, vector.indexes.data); + + // Determine gas price / fee fields. + let (gas_price_u256, max_fee_per_gas_u256, max_priority_fee_per_gas_u256) = + compute_fee_fields(tx_template, env)?; + + // Resolve sender: use pre-derived address if present, otherwise derive from secret_key. + let sender: Address = match tx_template.sender { + Some(addr) => addr, + None => recover_sender(tx_template)?, + }; + + let blob_schedule = EVMConfig::canonical_values(fork); + let config = EVMConfig::new(fork, blob_schedule); + + let base_blob_fee_per_gas = get_base_fee_per_blob_gas(env.current_excess_blob_gas, &config) + .map_err(|e| eyre::eyre!("base blob fee: {e}"))?; + + // Build the authorization list for EIP-7702 transactions if present. + let auth_list_resolved: Option> = + tx_template.authorization_list.as_ref().map(|list| { + list.iter() + .map(|t| AuthorizationTuple { + chain_id: U256::from(t.chain_id), + address: t.address, + nonce: t.nonce, + y_parity: t.v, + r_signature: t.r, + s_signature: t.s, + }) + .collect() + }); + + // Dispatch to EIP-7702 envelope when an authorization list is present; + // otherwise use EIP-1559. Blob hashes are passed via Environment (not the + // envelope), matching how levm_runner handles blob vectors. + let tx = match auth_list_resolved { + Some(list) => { + let call_to = match to { + TxKind::Call(addr) => addr, + TxKind::Create => { + return Err(eyre::eyre!( + "EIP-7702 setcode tx cannot be a contract creation" + )); + } + }; + Transaction::EIP7702Transaction(EIP7702Transaction { + to: call_to, + value, + data, + access_list, + authorization_list: list, + gas_limit, + ..Default::default() + }) + } + None => Transaction::EIP1559Transaction(EIP1559Transaction { + to, + value, + data, + access_list, + gas_limit, + ..Default::default() + }), + }; + + let levm_env = Environment { + origin: sender, + gas_limit, + config, + block_number: env.current_number, + coinbase: env.current_coinbase, + timestamp: env.current_timestamp, + prev_randao: env.current_random, + difficulty: env.current_difficulty, + slot_number: U256::zero(), + chain_id: U256::from(1), + base_fee_per_gas: env.current_base_fee.unwrap_or(U256::zero()), + base_blob_fee_per_gas, + gas_price: gas_price_u256, + block_excess_blob_gas: env.current_excess_blob_gas, + block_blob_gas_used: None, + tx_blob_hashes: tx_template + .blob_versioned_hashes + .clone() + .unwrap_or_default(), + tx_max_priority_fee_per_gas: max_priority_fee_per_gas_u256, + tx_max_fee_per_gas: max_fee_per_gas_u256, + tx_max_fee_per_blob_gas: tx_template.max_fee_per_blob_gas, + tx_nonce: tx_template.nonce, + block_gas_limit: env.current_gas_limit, + is_privileged: false, + fee_token: None, + disable_balance_check: false, + }; + + // Build tracer. + let mut tracer = if args.trace { + let cfg = OpcodeTracerConfig { + disable_stack: args.trace_nostack, + enable_memory: args.trace_memory, + disable_storage: args.trace_nostorage, + enable_return_data: !args.trace_noreturndata, + limit: 0, + }; + LevmOpcodeTracer::streaming(cfg, Box::new(std::io::stderr())) + } else { + LevmOpcodeTracer::disabled() + }; + + // Execute. + let call_tracer = ethrex_levm::tracing::LevmCallTracer::disabled(); + let exec_result = VM::new( + levm_env, + &mut db, + &tx, + call_tracer, + VMType::L1, + &NativeCrypto, + ) + .map_err(|e| eyre::eyre!("VM init: {e}")) + .map(|mut vm| { + vm.opcode_tracer = std::mem::replace(&mut tracer, LevmOpcodeTracer::disabled()); + let result = vm.execute(); + tracer = std::mem::replace(&mut vm.opcode_tracer, LevmOpcodeTracer::disabled()); + result + }); + + let (output, gas_used, error_str) = match &exec_result { + Ok(Ok(report)) => { + let err = match &report.result { + ethrex_levm::errors::TxResult::Revert(e) => Some(vm_error_to_geth_string(e)), + ethrex_levm::errors::TxResult::Success => None, + }; + (report.output.to_vec(), report.gas_spent, err) + } + Ok(Err(vm_err)) => { + let err_str = vm_error_to_geth_string(vm_err); + (vec![], 0, Some(err_str)) + } + Err(e) => { + return Err(eyre::eyre!("VM setup: {e}")); + } + }; + + tracer.flush_summary(&output, gas_used, error_str.as_deref())?; + + // Get state transitions and compute post-state root. + let updates = backends::levm::LEVM::get_state_transitions(&mut db) + .map_err(|e| eyre::eyre!("get_state_transitions: {e}"))?; + + let post_root = compute_post_state_root(&pre_state, &updates)?; + + // Always emit the state root terminator to stderr β€” goevmlab reads this line + // to detect test completion regardless of whether per-step tracing is enabled. + // Mirror: go-ethereum/cmd/evm/staterunner.go fmt.Fprintf(os.Stderr, ...). + tracer.flush_state_root(post_root)?; + if !args.trace { + // When the tracer has no sink, write the terminator directly. + use ethrex_common::tracing::write_streaming_state_root; + use std::io::Write as _; + let mut stderr = std::io::stderr(); + write_streaming_state_root(&mut stderr, post_root) + .map_err(|e| eyre::eyre!("state root write: {e}"))?; + stderr + .flush() + .map_err(|e| eyre::eyre!("stderr flush: {e}"))?; + } + + if let Some(stream_err) = tracer.take_stream_error() { + return Err(eyre::eyre!("stream write error: {stream_err}")); + } + + Ok(()) +} + +/// Parses a fork name string into a [`Fork`] variant. +fn parse_fork(name: &str) -> eyre::Result { + // The statetest JSON uses geth's fork naming (e.g. "Prague", "Cancun"). + match name { + "Frontier" => Ok(Fork::Frontier), + "FrontierToHomesteadAt5" | "Homestead" => Ok(Fork::Homestead), + "HomesteadToDaoAt5" | "DaoFork" => Ok(Fork::DaoFork), + "EIP150" | "Tangerine" => Ok(Fork::Tangerine), + "EIP158" | "SpuriousDragon" => Ok(Fork::SpuriousDragon), + "Byzantium" => Ok(Fork::Byzantium), + "Constantinople" => Ok(Fork::Constantinople), + "ConstantinopleFix" | "Petersburg" => Ok(Fork::Petersburg), + "Istanbul" => Ok(Fork::Istanbul), + "MuirGlacier" => Ok(Fork::MuirGlacier), + "Berlin" => Ok(Fork::Berlin), + "London" => Ok(Fork::London), + "ArrowGlacier" => Ok(Fork::ArrowGlacier), + "GrayGlacier" => Ok(Fork::GrayGlacier), + "Merge" | "Paris" | "MergeEOF" => Ok(Fork::Paris), + "Shanghai" => Ok(Fork::Shanghai), + "Cancun" => Ok(Fork::Cancun), + "Prague" => Ok(Fork::Prague), + "Osaka" => Ok(Fork::Osaka), + other => Err(eyre::eyre!("unknown fork: {other}")), + } +} + +/// Converts a `BTreeMap` into `FxHashMap`. +fn build_pre_state(pre: &BTreeMap) -> FxHashMap { + let crypto = NativeCrypto; + pre.iter() + .map(|(addr, sta)| { + let code = Code::from_bytecode(sta.code.clone(), &crypto); + let code_hash = code.hash; + let storage: FxHashMap = sta + .storage + .iter() + .map(|(k, v)| (H256::from_slice(&k.to_big_endian()), *v)) + .collect(); + let account = Account { + info: AccountInfo { + balance: sta.balance, + nonce: sta.nonce, + code_hash, + }, + code, + storage, + }; + (*addr, account) + }) + .collect() +} + +/// Builds a [`Genesis`] from a pre-state map, using the block env for gas_limit. +fn build_genesis_from_pre( + pre_state: &FxHashMap, + env: &crate::statetest::types::TestEnv, +) -> Genesis { + let alloc: BTreeMap = pre_state + .iter() + .map(|(addr, account)| { + let storage: BTreeMap = account + .storage + .iter() + .map(|(k, v)| (U256::from_big_endian(k.as_bytes()), *v)) + .collect(); + let ga = GenesisAccount { + code: account.code.bytecode.clone(), + storage, + balance: account.info.balance, + nonce: account.info.nonce, + }; + (*addr, ga) + }) + .collect(); + + // Use the default ChainConfig (all forks inactive in the genesis header); + // LEVM's `EVMConfig` is what drives fork-specific behavior at exec time. + // Mirroring tooling/ef_tests/state's `Genesis::from(&EFTest)`. + Genesis { + alloc, + gas_limit: env.current_gas_limit, + coinbase: env.current_coinbase, + difficulty: env.current_difficulty, + mix_hash: env.current_random.unwrap_or_default(), + timestamp: env.current_timestamp, + base_fee_per_gas: env + .current_base_fee + .map(|v| v.try_into().unwrap_or(u64::MAX)), + ..Default::default() + } +} + +/// Computes (gas_price, max_fee_per_gas, max_priority_fee_per_gas) from the +/// transaction template and block environment. +fn compute_fee_fields( + tx: &crate::statetest::types::TestTransaction, + env: &crate::statetest::types::TestEnv, +) -> eyre::Result<(U256, Option, Option)> { + match tx.gas_price { + Some(price) => { + // Legacy / EIP-2930: effective gas price == gas_price. + Ok((price, tx.max_fee_per_gas, tx.max_priority_fee_per_gas)) + } + None => { + // EIP-1559: effective = min(max_fee, base_fee + priority). + let base_fee = env + .current_base_fee + .ok_or_else(|| eyre::eyre!("EIP-1559 tx but no currentBaseFee in env"))?; + let max_priority = tx + .max_priority_fee_per_gas + .ok_or_else(|| eyre::eyre!("EIP-1559 tx missing maxPriorityFeePerGas"))?; + let max_fee = tx + .max_fee_per_gas + .ok_or_else(|| eyre::eyre!("EIP-1559 tx missing maxFeePerGas"))?; + let effective = std::cmp::min(max_fee, base_fee + max_priority); + Ok((effective, Some(max_fee), Some(max_priority))) + } + } +} + +/// Parses the access_lists JSON array at the given data index into an [`AccessList`]. +/// +/// The `access_lists` field in the statetest JSON is an array-of-arrays; each +/// inner element is an access list for one `data` index. When the field is +/// absent or has no entry for this index we return an empty list. +fn parse_access_list(raw: &[serde_json::Value], data_idx: usize) -> AccessList { + let entry = match raw.get(data_idx) { + Some(v) => v, + None => return vec![], + }; + + // Each entry is an array of { "address": "0x...", "storageKeys": ["0x...",...] } + let items = match entry.as_array() { + Some(a) => a, + None => return vec![], + }; + + items + .iter() + .filter_map(|item| { + let addr_str = item["address"].as_str()?; + let addr: Address = addr_str.parse().ok()?; + let keys: Vec = item["storageKeys"] + .as_array() + .map(|ks| { + ks.iter() + .filter_map(|k| k.as_str()?.parse::().ok()) + .collect() + }) + .unwrap_or_default(); + Some((addr, keys)) + }) + .collect() +} + +/// Derives the sender address from the test's `secretKey`. +/// +/// EF statetests include the private key so the sender can be derived without +/// a signature: compute the uncompressed public key, keccak256 it, take +/// the last 20 bytes as the Ethereum address. Returns an error when +/// `secret_key` is `None` (the caller should use the `sender` field instead). +fn recover_sender(tx: &crate::statetest::types::TestTransaction) -> eyre::Result
{ + use ethrex_crypto::keccak::keccak_hash; + use secp256k1::{PublicKey, SECP256K1, SecretKey}; + + let key_bytes = tx + .secret_key + .ok_or_else(|| eyre::eyre!("no secretKey and no sender field in transaction"))?; + let sk = SecretKey::from_slice(key_bytes.as_bytes()) + .map_err(|e| eyre::eyre!("invalid secret key: {e}"))?; + let pubkey = PublicKey::from_secret_key(SECP256K1, &sk); + // Uncompressed public key: 65 bytes, first byte is 0x04 (prefix), skip it. + let uncompressed = pubkey.serialize_uncompressed(); + let hash = keccak_hash(&uncompressed[1..]); + // Address is the last 20 bytes of the 32-byte keccak hash. + Ok(Address::from_slice(&hash[12..])) +} diff --git a/cmd/ethrex-evm/src/statetest/state_root.rs b/cmd/ethrex-evm/src/statetest/state_root.rs new file mode 100644 index 00000000000..7692e60a218 --- /dev/null +++ b/cmd/ethrex-evm/src/statetest/state_root.rs @@ -0,0 +1,163 @@ +use std::{collections::BTreeMap, sync::Arc}; + +use ethrex_blockchain::vm::StoreVmDatabase; +use ethrex_common::{ + Address, H256, U256, + types::{Account, AccountInfo, AccountUpdate, Genesis, GenesisAccount}, +}; +use ethrex_levm::db::gen_db::GeneralizedDatabase; +use ethrex_storage::{EngineType, Store}; +use ethrex_vm::DynVmDatabase; +use rustc_hash::FxHashMap; + +/// Given a pre-state account map and a set of post-execution updates, returns +/// the post-state root by applying them to an in-memory Store. Returns +/// `eyre::Result` for caller-friendly error reporting. +/// +/// # How it works +/// +/// 1. Converts `pre_state` into a [`Genesis`] alloc and creates an in-memory [`Store`]. +/// 2. Calls `store.add_initial_state(genesis).await` (async) to commit the pre-state trie and +/// obtain the genesis block hash. +/// 3. Calls `store.apply_account_updates_batch(block_hash, updates)` (sync) to apply the +/// post-execution deltas and compute the new state root from the trie. +/// 4. Returns the `state_trie_hash` from the returned [`AccountUpdatesList`]. +/// +/// The tokio runtime is created and discarded inside this function; the public signature stays +/// synchronous for ergonomics. +pub fn compute_post_state_root( + pre_state: &FxHashMap, + updates: &[AccountUpdate], +) -> eyre::Result { + let genesis = build_genesis(pre_state); + + let rt = tokio::runtime::Runtime::new()?; + let (store, block_hash) = rt.block_on(async { + let mut store = + Store::new("./temp", EngineType::InMemory).map_err(|e| eyre::eyre!("{e}"))?; + store + .add_initial_state(genesis.clone()) + .await + .map_err(|e| eyre::eyre!("{e}"))?; + let block_hash = genesis.get_block().hash(); + Ok::<_, eyre::Error>((store, block_hash)) + })?; + + let result = store + .apply_account_updates_batch(block_hash, updates) + .map_err(|e| eyre::eyre!("{e}"))? + .ok_or_else(|| { + eyre::eyre!("apply_account_updates_batch: state trie not found for genesis block hash") + })?; + + Ok(result.state_trie_hash) +} + +/// Builds a minimal [`Genesis`] whose alloc matches the given account map. +/// +/// The chain config uses Cancun-era activation times of 0 so that the genesis block +/// header is valid across all common forks. The genesis timestamp is 0. +fn build_genesis(pre_state: &FxHashMap) -> Genesis { + let alloc: BTreeMap = pre_state + .iter() + .map(|(addr, account)| { + let genesis_account = account_to_genesis_account(account); + (*addr, genesis_account) + }) + .collect(); + + // Mirror tooling/ef_tests/state/types.rs `Genesis::from(&EFTest)`: leave + // the chain config at its `Default` (all-forks-inactive). LEVM's + // execution-time `EVMConfig` is what drives fork-specific behavior; a + // bespoke chain config here can leak Amsterdam/Prague checks into a + // Shanghai run (observed: +32 gas overhead on a 21000-gas transfer). + Genesis { + alloc, + gas_limit: 30_000_000, + ..Default::default() + } +} + +/// Converts an [`Account`] back to a [`GenesisAccount`] for inclusion in a genesis alloc. +fn account_to_genesis_account(account: &Account) -> GenesisAccount { + let storage: BTreeMap = account + .storage + .iter() + .map(|(k, v)| (U256::from_big_endian(k.as_bytes()), *v)) + .collect(); + + GenesisAccount { + code: account.code.bytecode.clone(), + storage, + balance: account.info.balance, + nonce: account.info.nonce, + } +} + +/// Returns a [`ChainConfig`] with all common forks activated at block 0 / timestamp 0. +/// +/// Exposed as `pub` so Phase 4 can reuse it when building a `Genesis` from a `StateTest`. +pub fn minimal_chain_config() -> ethrex_common::types::ChainConfig { + use ethrex_common::types::ChainConfig; + ChainConfig { + chain_id: 1, + homestead_block: Some(0), + dao_fork_block: Some(0), + dao_fork_support: true, + eip150_block: Some(0), + eip155_block: Some(0), + eip158_block: Some(0), + byzantium_block: Some(0), + constantinople_block: Some(0), + petersburg_block: Some(0), + istanbul_block: Some(0), + muir_glacier_block: Some(0), + berlin_block: Some(0), + london_block: Some(0), + arrow_glacier_block: Some(0), + gray_glacier_block: Some(0), + merge_netsplit_block: Some(0), + shanghai_time: Some(0), + cancun_time: Some(0), + prague_time: Some(0), + terminal_total_difficulty: Some(0), + terminal_total_difficulty_passed: true, + ..Default::default() + } +} + +/// Builds a [`GeneralizedDatabase`] from a genesis + in-memory store. +/// +/// This is a convenience used in tests to set up the pre-state for LEVM execution. +/// Phase 4 will use an equivalent inline construction when wiring the full VM pipeline. +pub fn build_generalized_db(store: Store, genesis: &Genesis) -> eyre::Result { + let block_header = genesis.get_block().header; + let vm_db: DynVmDatabase = + Box::new(StoreVmDatabase::new(store, block_header).map_err(|e| eyre::eyre!("{e}"))?); + Ok(GeneralizedDatabase::new(Arc::new(vm_db))) +} + +/// Thin wrapper used in tests: sets up an in-memory store from `pre_state` and returns both the +/// store and the genesis block hash, without consuming the tokio runtime. +pub async fn setup_store( + pre_state: &FxHashMap, +) -> eyre::Result<(Store, H256, Genesis)> { + let genesis = build_genesis(pre_state); + let mut store = Store::new("./temp", EngineType::InMemory).map_err(|e| eyre::eyre!("{e}"))?; + store + .add_initial_state(genesis.clone()) + .await + .map_err(|e| eyre::eyre!("{e}"))?; + let block_hash = genesis.get_block().hash(); + Ok((store, block_hash, genesis)) +} + +/// Returns a minimal [`AccountInfo`] with the given balance and default nonce/code_hash. +pub fn eoa_info(balance: u64) -> AccountInfo { + use ethrex_common::constants::EMPTY_KECCACK_HASH; + AccountInfo { + balance: U256::from(balance), + nonce: 0, + code_hash: *EMPTY_KECCACK_HASH, + } +} diff --git a/cmd/ethrex-evm/src/statetest/types.rs b/cmd/ethrex-evm/src/statetest/types.rs new file mode 100644 index 00000000000..ed70a1f9e70 --- /dev/null +++ b/cmd/ethrex-evm/src/statetest/types.rs @@ -0,0 +1,219 @@ +//! Minimal inlined types for parsing EIP-3155 `statetest` JSON files. +//! +//! These are inlined here (Option B) rather than imported from `tooling/ef_tests/state` +//! because that crate lives in a separate Cargo workspace and pulls in `revm` and `simd-json`. +//! Only the fields required for Phase 4 execution are included; unknown JSON fields are +//! silently ignored (no `#[serde(deny_unknown_fields)]`). + +use std::collections::BTreeMap; + +use bytes::Bytes; +use ethrex_common::{ + Address, H256, U256, + types::{ChainConfig, GenesisAccount}, +}; +use serde::{Deserialize, Serialize}; + +/// Authorization tuple for EIP-7702 set-code transactions. +/// +/// Mirrors `EFTestAuthorizationListTuple` from `tooling/ef_tests/state/types.rs`. +/// Accepts both `"v"` and `"yParity"` JSON keys for the y-parity field because +/// older EF vectors used `"v"` while newer Prague vectors use `"yParity"`. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TestAuthTuple { + #[serde(deserialize_with = "ethrex_common::serde_utils::u64::deser_hex_or_dec_str")] + pub chain_id: u64, + pub address: Address, + #[serde(deserialize_with = "ethrex_common::serde_utils::u64::deser_hex_or_dec_str")] + pub nonce: u64, + #[serde(alias = "yParity", alias = "y_parity")] + pub v: U256, + pub r: U256, + pub s: U256, +} + +/// A single `statetest` JSON file (the outer map keyed by test name). +pub type StateTestFile = BTreeMap; + +/// One named state test containing pre-state, environment, transaction, and post-state vectors. +#[derive(Debug, Clone, Deserialize)] +pub struct StateTest { + /// Pre-execution account states. + pub pre: BTreeMap, + /// Block environment fields. + pub env: TestEnv, + /// Transaction template (gas / value fields are indexed per subtest). + pub transaction: TestTransaction, + /// Post-state vectors keyed by fork name, then subtest index. + pub post: BTreeMap>, +} + +/// An account entry in the `pre` section. +#[derive(Debug, Clone, Deserialize)] +pub struct StateTestAccount { + #[serde(default, with = "ethrex_common::serde_utils::bytes")] + pub code: Bytes, + #[serde(default)] + pub storage: BTreeMap, + #[serde(deserialize_with = "ethrex_common::serde_utils::u256::deser_hex_or_dec_str")] + pub balance: U256, + #[serde(default, with = "ethrex_common::serde_utils::u64::hex_str")] + pub nonce: u64, +} + +impl From<&StateTestAccount> for GenesisAccount { + fn from(a: &StateTestAccount) -> Self { + GenesisAccount { + code: a.code.clone(), + storage: a.storage.clone(), + balance: a.balance, + nonce: a.nonce, + } + } +} + +/// Block environment fields for a state test. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TestEnv { + pub current_coinbase: Address, + #[serde(deserialize_with = "ethrex_common::serde_utils::u256::deser_hex_or_dec_str")] + pub current_difficulty: U256, + #[serde(deserialize_with = "ethrex_common::serde_utils::u64::deser_hex_or_dec_str")] + pub current_gas_limit: u64, + #[serde(deserialize_with = "ethrex_common::serde_utils::u64::deser_hex_or_dec_str")] + pub current_number: u64, + #[serde(deserialize_with = "ethrex_common::serde_utils::u64::deser_hex_or_dec_str")] + pub current_timestamp: u64, + #[serde( + default, + deserialize_with = "ethrex_common::serde_utils::u256::deser_hex_str_opt" + )] + pub current_base_fee: Option, + #[serde(default)] + pub current_random: Option, + /// Excess blob gas for EIP-4844 blob fee computation. Present in Cancun+ vectors. + #[serde(default, deserialize_with = "deser_u64_hex_or_dec_opt")] + pub current_excess_blob_gas: Option, +} + +/// The transaction template for a state test. Indexes are per-subtest. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TestTransaction { + /// Per-subtest calldata bodies, each a hex-encoded byte string. Without + /// the custom deserializer, the default `Vec` parse would treat + /// `"0x"` as the literal ASCII bytes `'0','x'` (intrinsic-gas accounting + /// would then over-charge 32 gas for what should be empty calldata). + #[serde(deserialize_with = "deser_vec_hex_bytes")] + pub data: Vec, + #[serde(deserialize_with = "deser_vec_u64_hex_dec")] + pub gas_limit: Vec, + pub gas_price: Option, + #[serde(deserialize_with = "ethrex_common::serde_utils::u64::deser_hex_or_dec_str")] + pub nonce: u64, + /// Private key for sender derivation. Optional; some vectors supply `sender` directly. + #[serde(default)] + pub secret_key: Option, + /// Pre-derived sender address. When present, used directly without key derivation. + #[serde(default)] + pub sender: Option
, + pub to: Option
, + pub value: Vec, + pub max_fee_per_gas: Option, + pub max_priority_fee_per_gas: Option, + #[serde(default)] + pub access_lists: Vec, + /// EIP-7702 authorization list. Each entry delegates to a target address. + #[serde(default)] + pub authorization_list: Option>, + /// EIP-4844 blob versioned hashes for blob transactions. + #[serde(default)] + pub blob_versioned_hashes: Option>, + /// EIP-4844 max fee per blob gas. + #[serde(default)] + pub max_fee_per_blob_gas: Option, +} + +/// Deserializes a JSON array of hex strings (`["0x", "0xdeadbeef"]`) into a +/// `Vec`. `"0x"` decodes to an empty `Bytes`. +fn deser_vec_hex_bytes<'de, D: serde::Deserializer<'de>>(d: D) -> Result, D::Error> { + use serde::de::Error; + let raw: Vec = Vec::deserialize(d)?; + raw.into_iter() + .map(|s| { + let stripped = s.strip_prefix("0x").unwrap_or(&s); + hex::decode(stripped) + .map(Bytes::from) + .map_err(D::Error::custom) + }) + .collect() +} + +/// Deserializes a JSON array of hex-or-decimal strings (`["0x5208", "21000"]`) +/// into a `Vec`. EF tests encode per-subtest gas limits this way. +fn deser_vec_u64_hex_dec<'de, D: serde::Deserializer<'de>>(d: D) -> Result, D::Error> { + use serde::de::Error; + let raw: Vec = Vec::deserialize(d)?; + raw.into_iter() + .map(|s| { + let trimmed = s.trim_start_matches("0x"); + if trimmed.len() != s.len() { + u64::from_str_radix(trimmed, 16) + } else { + trimmed.parse::() + } + .map_err(D::Error::custom) + }) + .collect() +} + +/// A single post-state vector entry (one subtest). +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct PostStateVector { + /// Expected post-state root hash. + pub hash: H256, + /// Keccak of the RLP-encoded logs. + pub logs: H256, + #[serde(default)] + pub expect_exception: Option, + /// Indexes selecting which data/gas/value item from the transaction template to use. + pub indexes: SubtestIndexes, +} + +/// Indexes into the transaction template arrays. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct SubtestIndexes { + pub data: usize, + pub gas: usize, + pub value: usize, +} + +/// Deserializes an optional `u64` from a JSON string that is either `"0x..."` hex +/// or a decimal integer. Used for `currentExcessBlobGas`. +fn deser_u64_hex_or_dec_opt<'de, D: serde::Deserializer<'de>>( + d: D, +) -> Result, D::Error> { + use serde::de::Error; + let opt: Option = Option::deserialize(d)?; + match opt { + None => Ok(None), + Some(s) if s.is_empty() => Ok(None), + Some(s) => { + let trimmed = s.trim_start_matches("0x"); + let v = if trimmed.len() != s.len() { + u64::from_str_radix(trimmed, 16) + } else { + trimmed.parse::() + }; + v.map(Some).map_err(D::Error::custom) + } + } +} + +/// The chain config used when constructing Genesis from a StateTest. +#[allow(dead_code)] +pub fn default_statetest_chain_config() -> ChainConfig { + crate::statetest::state_root::minimal_chain_config() +} diff --git a/cmd/ethrex-evm/tests/error_map_tests.rs b/cmd/ethrex-evm/tests/error_map_tests.rs new file mode 100644 index 00000000000..b119a818682 --- /dev/null +++ b/cmd/ethrex-evm/tests/error_map_tests.rs @@ -0,0 +1,103 @@ +//! Unit tests for `statetest::error_map::vm_error_to_geth_string`. +//! +//! One assertion per mapped variant, verifying the exact geth error string. + +use ethrex_evm::statetest::error_map::vm_error_to_geth_string; +use ethrex_levm::errors::{ExceptionalHalt, VMError}; + +#[test] +fn revert_opcode_maps_to_execution_reverted() { + let err = VMError::RevertOpcode; + assert_eq!(vm_error_to_geth_string(&err), "execution reverted"); +} + +#[test] +fn out_of_gas_maps_correctly() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::OutOfGas); + assert_eq!(vm_error_to_geth_string(&err), "out of gas"); +} + +#[test] +fn stack_underflow_uses_display() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::StackUnderflow { + stack_len: 0, + required: 2, + }); + let s = vm_error_to_geth_string(&err); + // Phase 4a made Display geth-compatible: "stack underflow (N <=> M)" + assert!( + s.contains("stack underflow"), + "expected 'stack underflow' in: {s}" + ); + assert!(s.contains("0"), "expected stack_len in: {s}"); + assert!(s.contains("2"), "expected required in: {s}"); +} + +#[test] +fn stack_overflow_uses_display() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::StackOverflow { + stack_len: 1024, + limit: 1024, + }); + let s = vm_error_to_geth_string(&err); + // Phase 4a Display: "stack limit reached L (N)" + assert!( + s.contains("stack limit reached"), + "expected 'stack limit reached' in: {s}" + ); +} + +#[test] +fn invalid_jump_maps_correctly() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::InvalidJump); + assert_eq!(vm_error_to_geth_string(&err), "invalid jump destination"); +} + +#[test] +fn static_context_maps_correctly() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::OpcodeNotAllowedInStaticContext); + assert_eq!(vm_error_to_geth_string(&err), "write protection"); +} + +#[test] +fn invalid_contract_prefix_maps_correctly() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::InvalidContractPrefix); + assert_eq!( + vm_error_to_geth_string(&err), + "invalid code: must not begin with 0xef" + ); +} + +#[test] +fn invalid_opcode_maps_correctly() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::InvalidOpcode); + let s = vm_error_to_geth_string(&err); + assert!( + s.contains("invalid opcode"), + "expected 'invalid opcode' in: {s}" + ); +} + +#[test] +fn address_already_occupied_maps_correctly() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::AddressAlreadyOccupied); + assert_eq!(vm_error_to_geth_string(&err), "contract address collision"); +} + +#[test] +fn contract_output_too_big_maps_correctly() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::ContractOutputTooBig); + assert_eq!(vm_error_to_geth_string(&err), "max code size exceeded"); +} + +#[test] +fn out_of_bounds_maps_correctly() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::OutOfBounds); + assert_eq!(vm_error_to_geth_string(&err), "return data out of bounds"); +} + +#[test] +fn very_large_number_maps_correctly() { + let err = VMError::ExceptionalHalt(ExceptionalHalt::VeryLargeNumber); + assert_eq!(vm_error_to_geth_string(&err), "gas uint64 overflow"); +} diff --git a/cmd/ethrex-evm/tests/state_root.rs b/cmd/ethrex-evm/tests/state_root.rs new file mode 100644 index 00000000000..cdeb4957c4c --- /dev/null +++ b/cmd/ethrex-evm/tests/state_root.rs @@ -0,0 +1,114 @@ +//! Integration tests for `compute_post_state_root`. +//! +//! The expected H256 in `test_compute_post_state_root_stable_value` is pinned +//! from a first successful run so any future change to +//! `apply_account_updates_batch` or state-trie hashing fails this test visibly. +//! See `EXPECTED_ROOT` below for the actual value and the recapture procedure. + +use ethrex_common::{ + Address, H256, + constants::EMPTY_KECCACK_HASH, + types::{Account, AccountInfo, AccountUpdate, Code}, +}; +use ethrex_evm::compute_post_state_root; +use rustc_hash::FxHashMap; + +fn addr(byte: u8) -> Address { + let mut a = [0u8; 20]; + a[19] = byte; + Address::from(a) +} + +fn eoa(balance_wei: u64) -> Account { + Account { + info: AccountInfo { + balance: balance_wei.into(), + nonce: 0, + code_hash: *EMPTY_KECCACK_HASH, + }, + code: Code::from_bytecode(Default::default(), ðrex_common::NativeCrypto), + storage: Default::default(), + } +} + +/// Build the pre-state and a set of updates that simulate a transfer of 10 wei +/// from address 0x01 (balance 100) to new address 0x03. +/// +/// After the transfer: +/// - 0x01 balance: 90 +/// - 0x03 balance: 10 (new account) +fn build_transfer_scenario() -> (FxHashMap, Vec) { + // Pre-state: one EOA at 0x01 with 100 wei. + let mut pre_state = FxHashMap::default(); + pre_state.insert(addr(0x01), eoa(100)); + + // Updates: reduce 0x01 to 90, create 0x03 with 10. + let mut update_01 = AccountUpdate::new(addr(0x01)); + update_01.info = Some(AccountInfo { + balance: 90u64.into(), + nonce: 0, + code_hash: *EMPTY_KECCACK_HASH, + }); + + let mut update_03 = AccountUpdate::new(addr(0x03)); + update_03.info = Some(AccountInfo { + balance: 10u64.into(), + nonce: 0, + code_hash: *EMPTY_KECCACK_HASH, + }); + + let updates = vec![update_01, update_03]; + (pre_state, updates) +} + +#[test] +fn test_compute_post_state_root_determinism() { + let (pre_state, updates) = build_transfer_scenario(); + + let root1 = compute_post_state_root(&pre_state, &updates).expect("first call must succeed"); + let root2 = compute_post_state_root(&pre_state, &updates).expect("second call must succeed"); + + assert_eq!( + root1, root2, + "compute_post_state_root must be deterministic" + ); +} + +#[test] +fn test_compute_post_state_root_stable_value() { + // Expected root pinned from first run. + // If this value changes, apply_account_updates_batch or the trie hashing has changed. + // + // To recapture: comment-out the assert below, run the test with --nocapture, + // observe the printed root, and paste it here. + // + // Pinned value (captured 2026-05-12): + const EXPECTED_ROOT: &str = + "0xbd7c0251e2981c57d98315641bda8051736eb8ac16705049fb2ae592af24e1c5"; + + let (pre_state, updates) = build_transfer_scenario(); + let root = compute_post_state_root(&pre_state, &updates) + .expect("compute_post_state_root must succeed"); + + // Print for easy recapture if the value changes. + eprintln!("post-state root = {root:?}"); + + let expected = EXPECTED_ROOT + .parse::() + .expect("EXPECTED_ROOT must be a valid 0x-prefixed H256 literal"); + assert_eq!(root, expected, "post-state root must match pinned value"); +} + +#[test] +fn test_empty_updates_is_deterministic() { + // Two calls with the same pre-state and no updates must produce the same root. + // This isn't a pre-state-root identity check (the function still runs the trie + // pipeline); it just pins idempotency of the empty-updates path. + let mut pre_state = FxHashMap::default(); + pre_state.insert(addr(0x01), eoa(42)); + + let root1 = compute_post_state_root(&pre_state, &[]).expect("empty updates must succeed"); + let root2 = compute_post_state_root(&pre_state, &[]).expect("second call must succeed"); + + assert_eq!(root1, root2, "empty-updates root must be stable"); +} diff --git a/crates/blockchain/tracing.rs b/crates/blockchain/tracing.rs index 1730c3b1db4..17c511a7bd5 100644 --- a/crates/blockchain/tracing.rs +++ b/crates/blockchain/tracing.rs @@ -5,10 +5,11 @@ use std::{ use ethrex_common::{ H256, - tracing::{CallTrace, PrestateResult}, + tracing::{CallTrace, OpcodeTraceResult, PrestateResult}, types::Block, }; use ethrex_storage::Store; +use ethrex_vm::tracing::OpcodeTracerConfig; use ethrex_vm::{Evm, EvmError}; use crate::{Blockchain, error::ChainError, vm::StoreVmDatabase}; @@ -157,6 +158,66 @@ impl Blockchain { Ok(traces) } + /// Outputs the per-opcode (EIP-3155) trace for the given transaction. + /// May need to re-execute blocks in order to rebuild the transaction's prestate, up to the amount given by `reexec`. + pub async fn trace_transaction_opcodes( + &self, + tx_hash: H256, + reexec: u32, + timeout: Duration, + cfg: OpcodeTracerConfig, + ) -> Result { + let Some((_, block_hash, tx_index)) = + self.storage.get_transaction_location(tx_hash).await? + else { + return Err(ChainError::Custom("Transaction not Found".to_string())); + }; + let tx_index = tx_index as usize; + let Some(block) = self.storage.get_block_by_hash(block_hash).await? else { + return Err(ChainError::Custom("Block not Found".to_string())); + }; + let mut vm = self + .rebuild_parent_state(block.header.parent_hash, reexec) + .await?; + vm.rerun_block(&block, Some(tx_index))?; + timeout_trace_operation(timeout, move || vm.trace_tx_opcodes(&block, tx_index, cfg)).await + } + + /// Outputs the opcode (EIP-3155) trace for each transaction in the block along with + /// the transaction's hash. + /// May need to re-execute blocks in order to rebuild the block's prestate, up to the amount + /// given by `reexec`. + /// Returns traces from oldest to newest transaction. + pub async fn trace_block_opcodes( + &self, + block: Block, + reexec: u32, + timeout: Duration, + cfg: OpcodeTracerConfig, + ) -> Result, ChainError> { + let mut vm = self + .rebuild_parent_state(block.header.parent_hash, reexec) + .await?; + vm.rerun_block(&block, Some(0))?; + let vm = Arc::new(Mutex::new(vm)); + let block = Arc::new(block); + let mut traces = vec![]; + for index in 0..block.body.transactions.len() { + let block = block.clone(); + let vm = vm.clone(); + let tx_hash = block.as_ref().body.transactions[index].hash(); + let cfg = cfg.clone(); + let result = timeout_trace_operation(timeout, move || { + vm.lock() + .map_err(|_| EvmError::Custom("Unexpected Runtime Error".to_string()))? + .trace_tx_opcodes(block.as_ref(), index, cfg) + }) + .await?; + traces.push((tx_hash, result)); + } + Ok(traces) + } + /// Rebuild the parent state for a block given its parent hash, returning an `Evm` instance with all changes cached /// Will re-execute all ancestor block's which's state is not stored up to a maximum given by `reexec` async fn rebuild_parent_state( diff --git a/crates/common/tracing.rs b/crates/common/tracing.rs index 94fdea12c29..a582862c126 100644 --- a/crates/common/tracing.rs +++ b/crates/common/tracing.rs @@ -125,3 +125,479 @@ pub struct PrePostState { fn is_zero_nonce(n: &u64) -> bool { *n == 0 } + +// ─── OpcodeTracer types ────────────────────────────────────────────────────── + +/// Per-opcode trace entry (EIP-3155 step content), emitted under the de-facto +/// cross-client `structLogger` wrapper. +/// +/// Wrapper keys: `{failed, gas, returnValue, structLogs}`. Per-step `gas`, +/// `gasCost`, `refund` are numeric; `op` is the opcode mnemonic string. +/// `memSize`, `returnData`, and `refund` are always emitted (an extension +/// beyond the common minimal step shape); consumers ignoring extra fields are +/// unaffected. +#[derive(Debug)] +pub struct OpcodeStep { + pub pc: u64, + /// Raw opcode byte value (e.g. 0x60 for PUSH1). Serialized as its mnemonic + /// string (`"PUSH1"`); unassigned bytes serialize as + /// `"opcode 0xNN not defined"`. + pub op: u8, + pub gas: u64, + pub gas_cost: u64, + /// Current memory size in bytes (always emitted). + pub mem_size: u64, + pub depth: u32, + /// Return data from the previous sub-call (always emitted; `"0x"` when disabled or empty). + pub return_data: bytes::Bytes, + /// Gas refund counter (always emitted). + pub refund: u64, + /// `Some(vec)` when stack capture is enabled (bottom-first); `None` when disabled (emits JSON null). + pub stack: Option>, + /// `Some(chunks)` when memory capture is enabled; `None` when disabled (field omitted). + pub memory: Option>, + /// `Some(map)` at SLOAD/SSTORE steps when storage capture is enabled (single entry); `None` otherwise. + pub storage: Option>, + pub error: Option, +} + +/// A 32-byte chunk of EVM memory, serialized as `"0x" + 64 lowercase hex chars`. +/// The *caller* zero-pads the last partial chunk before constructing this type. +#[derive(Debug)] +pub struct MemoryChunk(pub [u8; 32]); + +/// Top-level result returned by an opcode (EIP-3155) trace. +/// +/// Wraps per-step entries as `{failed, gas, returnValue, structLogs}` matching +/// the de-facto `debug_traceTransaction` response shape used across major +/// execution clients. +#[derive(Debug)] +pub struct OpcodeTraceResult { + pub gas_used: u64, + /// True iff the transaction completed without error. Serialized as the + /// inverted `failed` field on the wire. + pub pass: bool, + pub output: bytes::Bytes, + pub steps: Vec, +} + +// ─── Helpers ────────────────────────────────────────────────────────────── + +/// Returns the opcode mnemonic for `byte`. +/// +/// Known opcodes β†’ their uppercase name (`"PUSH1"`, `"ADD"`, `"INVALID"` for +/// 0xFE). Unassigned bytes β†’ `None`; callers wanting the conventional unknown +/// string should fall back to `format!("opcode 0x{:02x} not defined", byte)`. +pub fn opcode_name(byte: u8) -> Option<&'static str> { + match byte { + 0x00 => Some("STOP"), + 0x01 => Some("ADD"), + 0x02 => Some("MUL"), + 0x03 => Some("SUB"), + 0x04 => Some("DIV"), + 0x05 => Some("SDIV"), + 0x06 => Some("MOD"), + 0x07 => Some("SMOD"), + 0x08 => Some("ADDMOD"), + 0x09 => Some("MULMOD"), + 0x0A => Some("EXP"), + 0x0B => Some("SIGNEXTEND"), + 0x10 => Some("LT"), + 0x11 => Some("GT"), + 0x12 => Some("SLT"), + 0x13 => Some("SGT"), + 0x14 => Some("EQ"), + 0x15 => Some("ISZERO"), + 0x16 => Some("AND"), + 0x17 => Some("OR"), + 0x18 => Some("XOR"), + 0x19 => Some("NOT"), + 0x1A => Some("BYTE"), + 0x1B => Some("SHL"), + 0x1C => Some("SHR"), + 0x1D => Some("SAR"), + 0x1E => Some("CLZ"), + 0x20 => Some("KECCAK256"), + 0x30 => Some("ADDRESS"), + 0x31 => Some("BALANCE"), + 0x32 => Some("ORIGIN"), + 0x33 => Some("CALLER"), + 0x34 => Some("CALLVALUE"), + 0x35 => Some("CALLDATALOAD"), + 0x36 => Some("CALLDATASIZE"), + 0x37 => Some("CALLDATACOPY"), + 0x38 => Some("CODESIZE"), + 0x39 => Some("CODECOPY"), + 0x3A => Some("GASPRICE"), + 0x3B => Some("EXTCODESIZE"), + 0x3C => Some("EXTCODECOPY"), + 0x3D => Some("RETURNDATASIZE"), + 0x3E => Some("RETURNDATACOPY"), + 0x3F => Some("EXTCODEHASH"), + 0x40 => Some("BLOCKHASH"), + 0x41 => Some("COINBASE"), + 0x42 => Some("TIMESTAMP"), + 0x43 => Some("NUMBER"), + 0x44 => Some("PREVRANDAO"), + 0x45 => Some("GASLIMIT"), + 0x46 => Some("CHAINID"), + 0x47 => Some("SELFBALANCE"), + 0x48 => Some("BASEFEE"), + 0x49 => Some("BLOBHASH"), + 0x4A => Some("BLOBBASEFEE"), + 0x4B => Some("SLOTNUM"), + 0x50 => Some("POP"), + 0x51 => Some("MLOAD"), + 0x52 => Some("MSTORE"), + 0x53 => Some("MSTORE8"), + 0x54 => Some("SLOAD"), + 0x55 => Some("SSTORE"), + 0x56 => Some("JUMP"), + 0x57 => Some("JUMPI"), + 0x58 => Some("PC"), + 0x59 => Some("MSIZE"), + 0x5A => Some("GAS"), + 0x5B => Some("JUMPDEST"), + 0x5C => Some("TLOAD"), + 0x5D => Some("TSTORE"), + 0x5E => Some("MCOPY"), + 0x5F => Some("PUSH0"), + 0x60 => Some("PUSH1"), + 0x61 => Some("PUSH2"), + 0x62 => Some("PUSH3"), + 0x63 => Some("PUSH4"), + 0x64 => Some("PUSH5"), + 0x65 => Some("PUSH6"), + 0x66 => Some("PUSH7"), + 0x67 => Some("PUSH8"), + 0x68 => Some("PUSH9"), + 0x69 => Some("PUSH10"), + 0x6A => Some("PUSH11"), + 0x6B => Some("PUSH12"), + 0x6C => Some("PUSH13"), + 0x6D => Some("PUSH14"), + 0x6E => Some("PUSH15"), + 0x6F => Some("PUSH16"), + 0x70 => Some("PUSH17"), + 0x71 => Some("PUSH18"), + 0x72 => Some("PUSH19"), + 0x73 => Some("PUSH20"), + 0x74 => Some("PUSH21"), + 0x75 => Some("PUSH22"), + 0x76 => Some("PUSH23"), + 0x77 => Some("PUSH24"), + 0x78 => Some("PUSH25"), + 0x79 => Some("PUSH26"), + 0x7A => Some("PUSH27"), + 0x7B => Some("PUSH28"), + 0x7C => Some("PUSH29"), + 0x7D => Some("PUSH30"), + 0x7E => Some("PUSH31"), + 0x7F => Some("PUSH32"), + 0x80 => Some("DUP1"), + 0x81 => Some("DUP2"), + 0x82 => Some("DUP3"), + 0x83 => Some("DUP4"), + 0x84 => Some("DUP5"), + 0x85 => Some("DUP6"), + 0x86 => Some("DUP7"), + 0x87 => Some("DUP8"), + 0x88 => Some("DUP9"), + 0x89 => Some("DUP10"), + 0x8A => Some("DUP11"), + 0x8B => Some("DUP12"), + 0x8C => Some("DUP13"), + 0x8D => Some("DUP14"), + 0x8E => Some("DUP15"), + 0x8F => Some("DUP16"), + 0x90 => Some("SWAP1"), + 0x91 => Some("SWAP2"), + 0x92 => Some("SWAP3"), + 0x93 => Some("SWAP4"), + 0x94 => Some("SWAP5"), + 0x95 => Some("SWAP6"), + 0x96 => Some("SWAP7"), + 0x97 => Some("SWAP8"), + 0x98 => Some("SWAP9"), + 0x99 => Some("SWAP10"), + 0x9A => Some("SWAP11"), + 0x9B => Some("SWAP12"), + 0x9C => Some("SWAP13"), + 0x9D => Some("SWAP14"), + 0x9E => Some("SWAP15"), + 0x9F => Some("SWAP16"), + 0xA0 => Some("LOG0"), + 0xA1 => Some("LOG1"), + 0xA2 => Some("LOG2"), + 0xA3 => Some("LOG3"), + 0xA4 => Some("LOG4"), + 0xE6 => Some("DUPN"), + 0xE7 => Some("SWAPN"), + 0xE8 => Some("EXCHANGE"), + 0xF0 => Some("CREATE"), + 0xF1 => Some("CALL"), + 0xF2 => Some("CALLCODE"), + 0xF3 => Some("RETURN"), + 0xF4 => Some("DELEGATECALL"), + 0xF5 => Some("CREATE2"), + 0xFA => Some("STATICCALL"), + 0xFD => Some("REVERT"), + 0xFE => Some("INVALID"), + 0xFF => Some("SELFDESTRUCT"), + _ => None, + } +} + +/// Converts a `U256` to geth's `uint256.Int.Hex()` form: `"0x"` followed by +/// lowercase hex with leading zeros stripped. Zero β†’ `"0x0"` (not `"0x"`). +pub fn geth_uint256_hex(v: &U256) -> String { + if v.is_zero() { + return "0x0".to_string(); + } + // U256 words are little-endian; convert to big-endian bytes. + let bytes = crate::utils::u256_to_big_endian(*v); + let hex_str = hex::encode(bytes); + let stripped = hex_str.trim_start_matches('0'); + format!("0x{}", stripped) +} + +// ─── Serialize impls ────────────────────────────────────────────────────── + +impl serde::Serialize for MemoryChunk { + fn serialize(&self, serializer: S) -> Result { + serializer.serialize_str(&format!("0x{}", hex::encode(self.0))) + } +} + +impl serde::Serialize for OpcodeStep { + fn serialize(&self, serializer: S) -> Result { + use serde::ser::SerializeMap; + + // Base fields: pc, op, gas, gasCost, depth, stack, memSize, returnData, refund = 9 + // Optional: error, memory, storage + let mut field_count = 9; + if self.error.is_some() { + field_count += 1; + } + if self.memory.is_some() { + field_count += 1; + } + if self.storage.is_some() { + field_count += 1; + } + + let mut map = serializer.serialize_map(Some(field_count))?; + + map.serialize_entry("pc", &self.pc)?; + // op: emit the mnemonic string. Unknown bytes use the conventional + // "opcode 0xNN not defined" fallback. + match opcode_name(self.op) { + Some(name) => map.serialize_entry("op", name)?, + None => map.serialize_entry("op", &format!("opcode 0x{:02x} not defined", self.op))?, + } + map.serialize_entry("gas", &self.gas)?; + map.serialize_entry("gasCost", &self.gas_cost)?; + map.serialize_entry("depth", &self.depth)?; + + // stack: Some β†’ array of hex strings; None β†’ JSON null (when disabled) + struct StackSerializer<'a>(&'a Option>); + impl serde::Serialize for StackSerializer<'_> { + fn serialize(&self, serializer: S) -> Result { + use serde::ser::SerializeSeq; + match self.0 { + None => serializer.serialize_none(), + Some(vec) => { + let mut seq = serializer.serialize_seq(Some(vec.len()))?; + for v in vec { + seq.serialize_element(&geth_uint256_hex(v))?; + } + seq.end() + } + } + } + } + map.serialize_entry("stack", &StackSerializer(&self.stack))?; + + map.serialize_entry("memSize", &self.mem_size)?; + map.serialize_entry( + "returnData", + &format!("0x{}", hex::encode(&self.return_data)), + )?; + map.serialize_entry("refund", &self.refund)?; + + if let Some(err) = &self.error { + map.serialize_entry("error", err)?; + } + + if let Some(mem) = &self.memory { + map.serialize_entry("memory", mem)?; + } + + if let Some(storage) = &self.storage { + struct StorageSerializer<'a>(&'a BTreeMap); + impl serde::Serialize for StorageSerializer<'_> { + fn serialize( + &self, + serializer: S, + ) -> Result { + use serde::ser::SerializeMap; + let mut m = serializer.serialize_map(Some(self.0.len()))?; + for (k, v) in self.0 { + let k_str = format!("0x{}", hex::encode(k.as_bytes())); + let v_str = format!("0x{}", hex::encode(v.as_bytes())); + m.serialize_entry(&k_str, &v_str)?; + } + m.end() + } + } + map.serialize_entry("storage", &StorageSerializer(storage))?; + } + + map.end() + } +} + +// ─── Streaming JSON helpers ─────────────────────────────────────────────────── + +/// Options controlling which optional fields are included in streaming JSON +/// output. The polarity is "disable"-prefixed to mirror the conventional +/// `--trace.nomemory` / `--trace.nostack` / `--trace.noreturndata` CLI flags. +#[derive(Debug, Clone, Default)] +pub struct StreamingOpts { + pub disable_stack: bool, + pub disable_memory: bool, + pub disable_storage: bool, + pub disable_return_data: bool, +} + +/// Emits one JSON object for `step` to `w`, terminated with `\n`. +/// +/// Field order matches the conventional streaming JSON shape used by EIP-3155 +/// compatible tracers: +/// `pc`, `op`, `gas`, `gasCost`, `memory`?, `memSize`, `stack`?, `returnData`?, +/// `depth`, `refund`, `opName`, `error`? +/// +/// `op` is a plain decimal integer. `gas`/`gasCost` are hex strings. `memSize` +/// and `refund` are plain decimal integers. The function writes directly to `w` +/// without intermediate allocation beyond the memory reassembly hex string. +pub fn write_streaming_step( + w: &mut W, + step: &OpcodeStep, + opts: &StreamingOpts, +) -> std::io::Result<()> { + write!(w, "{{\"pc\":{},\"op\":{}", step.pc, step.op)?; + write!(w, ",\"gas\":\"0x{:x}\"", step.gas)?; + write!(w, ",\"gasCost\":\"0x{:x}\"", step.gas_cost)?; + + // memory β€” single contiguous hex blob reassembled from chunks + let emit_memory = + !opts.disable_memory && step.memory.as_ref().map(|m| !m.is_empty()).unwrap_or(false); + if emit_memory && let Some(chunks) = &step.memory { + write!(w, ",\"memory\":\"0x")?; + for chunk in chunks { + write!(w, "{}", hex::encode(chunk.0))?; + } + write!(w, "\"")?; + } + + write!(w, ",\"memSize\":{}", step.mem_size)?; + + // stack β€” array of hex strings, bottom-first; omit when disabled or absent + if !opts.disable_stack + && let Some(stack) = &step.stack + { + write!(w, ",\"stack\":[")?; + for (i, v) in stack.iter().enumerate() { + if i > 0 { + write!(w, ",")?; + } + write!(w, "\"{}\"", geth_uint256_hex(v))?; + } + write!(w, "]")?; + } + + // returnData β€” omit when disabled or empty + if !opts.disable_return_data && !step.return_data.is_empty() { + write!( + w, + ",\"returnData\":\"0x{}\"", + hex::encode(&step.return_data) + )?; + } + + write!(w, ",\"depth\":{}", step.depth)?; + write!(w, ",\"refund\":{}", step.refund)?; + + // opName + let op_name = match opcode_name(step.op) { + Some(name) => name.to_string(), + None => format!("opcode 0x{:02x} not defined", step.op), + }; + // Use serde_json to produce a correctly escaped JSON string value + let op_name_json = + serde_json::to_string(&op_name).map_err(|e| std::io::Error::other(e.to_string()))?; + write!(w, ",\"opName\":{}", op_name_json)?; + + // error β€” omit when None; use serde_json for correct escaping + if let Some(err) = &step.error { + let err_json = + serde_json::to_string(err).map_err(|e| std::io::Error::other(e.to_string()))?; + write!(w, ",\"error\":{}", err_json)?; + } + + writeln!(w, "}}") +} + +/// Emits the final summary JSON line to `w`, terminated with `\n`. +/// +/// Field order: `output`, `gasUsed`, `error`? +/// +/// `output` is lowercase hex without a `0x` prefix (empty bytes β†’ `""`). +/// `gasUsed` is a hex string (`"0x{:x}"`). `error` is omitted when `None`. +pub fn write_streaming_summary( + w: &mut W, + output: &[u8], + gas_used: u64, + error: Option<&str>, +) -> std::io::Result<()> { + write!( + w, + "{{\"output\":\"{}\",\"gasUsed\":\"0x{:x}\"", + hex::encode(output), + gas_used + )?; + if let Some(err) = error { + let err_json = + serde_json::to_string(err).map_err(|e| std::io::Error::other(e.to_string()))?; + write!(w, ",\"error\":{}", err_json)?; + } + writeln!(w, "}}") +} + +/// Emits a `stateRoot` JSON line to `w`, terminated with `\n`. +/// +/// The space after the colon is intentional β€” tooling that byte-parses this +/// line expects the literal string `"stateRoot": "` (with the space). +pub fn write_streaming_state_root( + w: &mut W, + state_root: ethereum_types::H256, +) -> std::io::Result<()> { + writeln!( + w, + "{{\"stateRoot\": \"0x{}\"}}", + hex::encode(state_root.as_bytes()) + ) +} + +impl serde::Serialize for OpcodeTraceResult { + fn serialize(&self, serializer: S) -> Result { + use serde::ser::SerializeMap; + let mut map = serializer.serialize_map(Some(4))?; + // `failed` is the inverse of `pass` β€” matches the conventional wire shape. + map.serialize_entry("failed", &!self.pass)?; + map.serialize_entry("gas", &self.gas_used)?; + map.serialize_entry("returnValue", &format!("0x{}", hex::encode(&self.output)))?; + map.serialize_entry("structLogs", &self.steps)?; + map.end() + } +} diff --git a/crates/networking/rpc/tracing.rs b/crates/networking/rpc/tracing.rs index 5c23745a457..76d9d7e8443 100644 --- a/crates/networking/rpc/tracing.rs +++ b/crates/networking/rpc/tracing.rs @@ -5,6 +5,7 @@ use ethrex_common::{ serde_utils, tracing::{CallTraceFrame, PrestateResult}, }; +use ethrex_vm::tracing::OpcodeTracerConfig; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -39,12 +40,25 @@ struct TraceConfig { reexec: Option, } +/// The tracer variant to use for a debug trace request. +/// +/// **Divergence from geth**: geth's default (when no `tracer` field is provided) is the +/// per-opcode tracer. ethrex keeps `CallTracer` as the default for compatibility with +/// Blockscout-style clients that rely on the no-tracer-specified β†’ callTracer behaviour. #[derive(Default, Deserialize)] #[serde(rename_all = "camelCase")] +// The wire-format names (`callTracer`, `prestateTracer`, `opcodeTracer`) are +// fixed by client convention; variants must keep the `Tracer` suffix to +// serialize correctly via `rename_all = "camelCase"`. +#[allow(clippy::enum_variant_names)] enum TracerType { #[default] CallTracer, PrestateTracer, + /// Per-opcode tracer emitting EIP-3155 step content under the de-facto + /// `structLogger` wrapper shape (`{failed, gas, returnValue, structLogs}`). + /// Selected via `"tracer": "opcodeTracer"`. + OpcodeTracer, } #[derive(Deserialize, Default)] @@ -171,6 +185,21 @@ impl RpcHandler for TraceTransactionRequest { PrestateResult::Diff(diff) => Ok(serde_json::to_value(diff)?), } } + TracerType::OpcodeTracer => { + let cfg: OpcodeTracerConfig = self + .trace_config + .tracer_config + .as_ref() + .map(|v| serde_json::from_value(v.clone())) + .transpose()? + .unwrap_or_default(); + let result = context + .blockchain + .trace_transaction_opcodes(self.tx_hash, reexec, timeout, cfg) + .await + .map_err(|err| RpcErr::Internal(err.to_string()))?; + Ok(serde_json::to_value(result)?) + } } } } @@ -282,6 +311,25 @@ impl RpcHandler for TraceBlockByNumberRequest { .collect::>()?; Ok(serde_json::to_value(block_trace)?) } + TracerType::OpcodeTracer => { + let cfg: OpcodeTracerConfig = self + .trace_config + .tracer_config + .as_ref() + .map(|v| serde_json::from_value(v.clone())) + .transpose()? + .unwrap_or_default(); + let opcode_traces = context + .blockchain + .trace_block_opcodes(block, reexec, timeout, cfg) + .await + .map_err(|err| RpcErr::Internal(err.to_string()))?; + let block_trace: BlockTrace<_> = opcode_traces + .into_iter() + .map(|(hash, result)| (hash, result).into()) + .collect(); + Ok(serde_json::to_value(block_trace)?) + } } } } diff --git a/crates/vm/backends/levm/tracing.rs b/crates/vm/backends/levm/tracing.rs index 10de316d90a..a5e316e5abc 100644 --- a/crates/vm/backends/levm/tracing.rs +++ b/crates/vm/backends/levm/tracing.rs @@ -1,12 +1,20 @@ use ethrex_common::constants::EMPTY_KECCACK_HASH; use ethrex_common::tracing::{PrePostState, PrestateAccountState, PrestateResult, PrestateTrace}; use ethrex_common::types::{Block, Transaction}; -use ethrex_common::{Address, BigEndianHash, H256, U256, tracing::CallTrace, types::BlockHeader}; +use ethrex_common::{ + Address, BigEndianHash, H256, U256, + tracing::{CallTrace, OpcodeTraceResult}, + types::BlockHeader, +}; use ethrex_crypto::Crypto; use ethrex_levm::account::{AccountStatus, LevmAccount}; use ethrex_levm::db::gen_db::CacheDB; use ethrex_levm::vm::VMType; -use ethrex_levm::{db::gen_db::GeneralizedDatabase, tracing::LevmCallTracer, vm::VM}; +use ethrex_levm::{ + db::gen_db::GeneralizedDatabase, + tracing::{LevmCallTracer, LevmOpcodeTracer, OpcodeTracerConfig}, + vm::VM, +}; use crate::{EvmError, backends::levm::LEVM}; @@ -91,6 +99,30 @@ impl LEVM { } } + /// Run transaction with opcode (EIP-3155) tracer activated. + pub fn trace_tx_opcodes( + db: &mut GeneralizedDatabase, + block_header: &BlockHeader, + tx: &Transaction, + cfg: OpcodeTracerConfig, + vm_type: VMType, + crypto: &dyn Crypto, + ) -> Result { + let env = Self::setup_env( + tx, + tx.sender(crypto).map_err(|error| { + EvmError::Transaction(format!("Couldn't recover addresses with error: {error}")) + })?, + block_header, + db, + vm_type, + )?; + let mut vm = VM::new(env, db, tx, LevmCallTracer::disabled(), vm_type, crypto)?; + vm.opcode_tracer = LevmOpcodeTracer::new(cfg); + vm.execute()?; + Ok(vm.opcode_tracer.take_result()) + } + /// Run transaction with callTracer activated. pub fn trace_tx_calls( db: &mut GeneralizedDatabase, diff --git a/crates/vm/levm/Cargo.toml b/crates/vm/levm/Cargo.toml index 834650b5c4b..df3b71f0461 100644 --- a/crates/vm/levm/Cargo.toml +++ b/crates/vm/levm/Cargo.toml @@ -58,3 +58,4 @@ manual_saturating_arithmetic = "warn" [lib] path = "./src/lib.rs" + diff --git a/crates/vm/levm/src/call_frame.rs b/crates/vm/levm/src/call_frame.rs index 5468bd3171e..ae7e6d2f0fd 100644 --- a/crates/vm/levm/src/call_frame.rs +++ b/crates/vm/levm/src/call_frame.rs @@ -55,7 +55,10 @@ impl Stack { self.values .get_unchecked(self.offset..) .first_chunk::() - .ok_or(ExceptionalHalt::StackUnderflow)? + .ok_or(ExceptionalHalt::StackUnderflow { + stack_len: self.len(), + required: N, + })? }; // Due to previous error check in first_chunk, next_offset is guaranteed to be < STACK_LIMIT self.offset = next_offset; @@ -68,7 +71,10 @@ impl Stack { let value = *self .values .get(self.offset) - .ok_or(ExceptionalHalt::StackUnderflow)?; + .ok_or(ExceptionalHalt::StackUnderflow { + stack_len: self.len(), + required: 1, + })?; // The following operation can never overflow as both `self.offset` and N are within // STACK_LIMIT (1024). self.offset = self.offset.wrapping_add(1); @@ -84,7 +90,10 @@ impl Stack { let next_offset = self .offset .checked_sub(1) - .ok_or(ExceptionalHalt::StackOverflow)?; + .ok_or(ExceptionalHalt::StackOverflow { + stack_len: self.len(), + limit: STACK_LIMIT, + })?; // The following index cannot fail because `next_offset` has already been checked and // `self.offset` is known to be within `STACK_LIMIT`. @@ -112,7 +121,10 @@ impl Stack { let next_offset = self .offset .checked_sub(1) - .ok_or(ExceptionalHalt::StackOverflow)?; + .ok_or(ExceptionalHalt::StackOverflow { + stack_len: self.len(), + limit: STACK_LIMIT, + })?; // The following index cannot fail because `next_offset` has already been checked and // `self.offset` is known to be within `STACK_LIMIT`. @@ -154,7 +166,11 @@ impl Stack { let index = self.offset + N; if index >= self.values.len() { - return Err(ExceptionalHalt::StackUnderflow); + return Err(ExceptionalHalt::StackUnderflow { + stack_len: self.len(), + // swap requires top element plus the N-th element + required: N.saturating_add(1), + }); } #[expect(unsafe_code, reason = "self.offset always < STACK_LIMIT")] @@ -181,13 +197,20 @@ impl Stack { #[expect(clippy::arithmetic_side_effects)] let index = self.offset + N; if index >= self.values.len() { - return Err(ExceptionalHalt::StackUnderflow); + return Err(ExceptionalHalt::StackUnderflow { + stack_len: self.len(), + // dup requires N+1 items (0-indexed depth N means index N+1) + required: N.saturating_add(1), + }); } self.offset = self .offset .checked_sub(1) - .ok_or(ExceptionalHalt::StackOverflow)?; + .ok_or(ExceptionalHalt::StackOverflow { + stack_len: self.len(), + limit: STACK_LIMIT, + })?; #[expect(unsafe_code, reason = "index < size, offset-1 >= 0")] unsafe { diff --git a/crates/vm/levm/src/errors.rs b/crates/vm/levm/src/errors.rs index 4386848b0c2..8b45e23255f 100644 --- a/crates/vm/levm/src/errors.rs +++ b/crates/vm/levm/src/errors.rs @@ -54,10 +54,10 @@ impl From for VMError { #[derive(Debug, Clone, PartialEq, Eq, thiserror::Error, Serialize, Deserialize)] pub enum ExceptionalHalt { - #[error("Stack Underflow")] - StackUnderflow, - #[error("Stack Overflow")] - StackOverflow, + #[error("stack underflow ({stack_len} <=> {required})")] + StackUnderflow { stack_len: usize, required: usize }, + #[error("stack limit reached {limit} ({stack_len})")] + StackOverflow { stack_len: usize, limit: usize }, #[error("Invalid Jump")] InvalidJump, #[error("Opcode Not Allowed In Static Context")] diff --git a/crates/vm/levm/src/lib.rs b/crates/vm/levm/src/lib.rs index 625ef3a4b4d..9f2369f9e7b 100644 --- a/crates/vm/levm/src/lib.rs +++ b/crates/vm/levm/src/lib.rs @@ -75,6 +75,7 @@ pub mod gas_cost; pub mod hooks; pub mod memory; pub mod opcode_handlers; +pub mod opcode_tracer; pub mod opcodes; pub mod precompiles; pub mod tracing; diff --git a/crates/vm/levm/src/memory.rs b/crates/vm/levm/src/memory.rs index 4c763d8ff65..8132e813f92 100644 --- a/crates/vm/levm/src/memory.rs +++ b/crates/vm/levm/src/memory.rs @@ -65,6 +65,19 @@ impl Memory { self.len() == 0 } + /// Returns a copy of the live byte slice for this frame (from `current_base` to + /// `current_base + len`). Used by the struct-log tracer for memory capture. + pub fn live_bytes(&self) -> Vec { + if self.len == 0 { + return Vec::new(); + } + let buf = self.buffer.borrow(); + let end = self.current_base.saturating_add(self.len); + buf.get(self.current_base..end) + .map(<[u8]>::to_vec) + .unwrap_or_default() + } + /// Resizes the from the current base to fit the memory specified at new_memory_size. /// /// Note: new_memory_size is increased to the next 32 byte multiple. diff --git a/crates/vm/levm/src/opcode_handlers/dup.rs b/crates/vm/levm/src/opcode_handlers/dup.rs index 9523f20b5b8..a4d2dd4aca4 100644 --- a/crates/vm/levm/src/opcode_handlers/dup.rs +++ b/crates/vm/levm/src/opcode_handlers/dup.rs @@ -52,16 +52,25 @@ impl OpcodeHandler for OpDupNHandler { // Stack grows downwards, so we add the offset to get deeper elements // relative_offset is 1-indexed stack depth (17-235), convert to 0-indexed for array access // The n-th element (1-indexed) is at array index offset + (n-1) + let stack_len = vm.current_call_frame.stack.len(); + let required = usize::from(relative_offset); let absolute_offset = vm .current_call_frame .stack .offset .checked_add(usize::from(relative_offset).wrapping_sub(1)) - .ok_or(ExceptionalHalt::StackUnderflow)?; + .ok_or(ExceptionalHalt::StackUnderflow { + stack_len, + required, + })?; // Verify the offset is within stack bounds if absolute_offset >= vm.current_call_frame.stack.values.len() { - return Err(ExceptionalHalt::StackUnderflow.into()); + return Err(ExceptionalHalt::StackUnderflow { + stack_len, + required, + } + .into()); } #[expect(unsafe_code, reason = "bound already checked")] diff --git a/crates/vm/levm/src/opcode_handlers/exchange.rs b/crates/vm/levm/src/opcode_handlers/exchange.rs index e598dcdf6bc..b07188460ab 100644 --- a/crates/vm/levm/src/opcode_handlers/exchange.rs +++ b/crates/vm/levm/src/opcode_handlers/exchange.rs @@ -54,16 +54,25 @@ impl OpcodeHandler for OpSwapNHandler { // Stack grows downwards, so we add the offset to get deeper elements // SWAPN swaps top with the (n+1)th element where n = decoded relative_offset // The (n+1)th element (1-indexed) is at array index offset + n + let stack_len = vm.current_call_frame.stack.len(); + let required = usize::from(relative_offset).saturating_add(1); let absolute_offset = vm .current_call_frame .stack .offset .checked_add(usize::from(relative_offset)) - .ok_or(ExceptionalHalt::StackUnderflow)?; + .ok_or(ExceptionalHalt::StackUnderflow { + stack_len, + required, + })?; // Verify the offset is within stack bounds if absolute_offset >= STACK_LIMIT { - return Err(ExceptionalHalt::StackUnderflow.into()); + return Err(ExceptionalHalt::StackUnderflow { + stack_len, + required, + } + .into()); } let top_offset = vm.current_call_frame.stack.offset; @@ -131,17 +140,30 @@ impl OpcodeHandler for OpExchangeHandler { // Stack grows downwards, so we add the offsets to get deeper elements let absolute_offset = { let stack_offset = vm.current_call_frame.stack.offset; + let stack_len = vm.current_call_frame.stack.len(); + // EXCHANGE needs the deeper of the two elements, so required = max(r0, r1) + 1 + let required = usize::from(relative_offset.1).saturating_add(1); let q = stack_offset .checked_add(usize::from(relative_offset.0)) - .ok_or(ExceptionalHalt::StackUnderflow)?; + .ok_or(ExceptionalHalt::StackUnderflow { + stack_len, + required, + })?; let r = stack_offset .checked_add(usize::from(relative_offset.1)) - .ok_or(ExceptionalHalt::StackUnderflow)?; + .ok_or(ExceptionalHalt::StackUnderflow { + stack_len, + required, + })?; // Verify both offsets are within stack bounds if q >= STACK_LIMIT || r >= STACK_LIMIT { - return Err(ExceptionalHalt::StackUnderflow.into()); + return Err(ExceptionalHalt::StackUnderflow { + stack_len, + required, + } + .into()); } (q, r) diff --git a/crates/vm/levm/src/opcode_handlers/stack_memory_storage_flow.rs b/crates/vm/levm/src/opcode_handlers/stack_memory_storage_flow.rs index 7cf1fa89d03..52f19f6b6db 100644 --- a/crates/vm/levm/src/opcode_handlers/stack_memory_storage_flow.rs +++ b/crates/vm/levm/src/opcode_handlers/stack_memory_storage_flow.rs @@ -390,7 +390,7 @@ impl OpcodeHandler for OpJumpHandler { .increase_consumed_gas(gas_cost::JUMP)?; let target = vm.current_call_frame.stack.pop1()?; - jump(vm, target.try_into().unwrap_or(usize::MAX))?; + jump(vm, target.try_into().unwrap_or(usize::MAX), gas_cost::JUMP)?; Ok(OpcodeResult::Continue) } @@ -406,14 +406,22 @@ impl OpcodeHandler for OpJumpIHandler { let [target, condition] = *vm.current_call_frame.stack.pop()?; if !condition.is_zero() { - jump(vm, target.try_into().unwrap_or(usize::MAX))?; + jump(vm, target.try_into().unwrap_or(usize::MAX), gas_cost::JUMPI)?; } Ok(OpcodeResult::Continue) } } -fn jump(vm: &mut VM<'_>, target: usize) -> Result<(), VMError> { +/// Validate and take a jump. Fuses the destination JUMPDEST (advances PC past +/// it and charges its 1 gas inline) to save a dispatch cycle on the hot path. +/// +/// When the tracer is active we keep the fusion for performance and *synthesize* +/// a JUMPDEST entry in the trace log: `parent_gas_cost` is recorded as the +/// override for the parent JUMP/JUMPI step (so its `gasCost` doesn't absorb the +/// JUMPDEST charge), and the JUMPDEST step is pushed directly via +/// `synthesize_step` after the gas is charged. +fn jump(vm: &mut VM<'_>, target: usize, parent_gas_cost: u64) -> Result<(), VMError> { // Check target address validity. // - Target bytecode has to be a JUMPDEST. // - Target address must not be blacklisted (aka. the JUMPDEST must not be part of a literal). @@ -433,14 +441,97 @@ fn jump(vm: &mut VM<'_>, target: usize) -> Result<(), VMError> { .is_ok() }) { - // Update PC and skip the JUMPDEST instruction. - vm.current_call_frame.pc = target.wrapping_add(1); - vm.current_call_frame - .increase_consumed_gas(gas_cost::JUMPDEST)?; - + if vm.opcode_tracer.active { + // Override the parent JUMP/JUMPI's gasCost so the dispatch loop + // doesn't roll the upcoming JUMPDEST charge into it. + vm.opcode_tracer.last_opcode_gas_cost = Some(parent_gas_cost); + + // Capture the synthetic JUMPDEST step's state BEFORE charging its gas. + let synth = build_jumpdest_step(vm, target); + + // Fuse: charge JUMPDEST + advance PC past it. + vm.current_call_frame.pc = target.wrapping_add(1); + vm.current_call_frame + .increase_consumed_gas(gas_cost::JUMPDEST)?; + + vm.opcode_tracer.synthesize_step(synth); + } else { + // Hot path: fuse JUMP/JUMPI + JUMPDEST without any trace bookkeeping. + vm.current_call_frame.pc = target.wrapping_add(1); + vm.current_call_frame + .increase_consumed_gas(gas_cost::JUMPDEST)?; + } Ok(()) } else { // Target address is invalid. Err(ExceptionalHalt::InvalidJump.into()) } } + +/// Builds a synthetic JUMPDEST trace entry. Captures gas/stack/memory/storage +/// state at the moment of the call (i.e. *before* the JUMPDEST gas has been +/// charged), mirroring what `pre_step_capture` would have produced if JUMPDEST +/// were dispatched normally. +#[expect( + clippy::as_conversions, + reason = "pc/depth/mem_size bounded; fit in target types" +)] +fn build_jumpdest_step(vm: &VM<'_>, target: usize) -> ethrex_common::tracing::OpcodeStep { + use bytes::Bytes; + use ethrex_common::tracing::{MemoryChunk, OpcodeStep}; + + let cfg = &vm.opcode_tracer.cfg; + let gas = vm.current_call_frame.gas_remaining.max(0) as u64; + let depth = (vm.call_frames.len() as u32).saturating_add(1); + let refund = vm.substate.refunded_gas; + let mem_size = vm.current_call_frame.memory.len() as u64; + + let stack = if cfg.disable_stack { + None + } else { + Some(vm.collect_stack_for_trace()) + }; + + let memory = if cfg.enable_memory { + let bytes = vm.collect_memory_for_trace(); + if bytes.is_empty() { + Some(Vec::new()) + } else { + Some( + bytes + .chunks(32) + .map(|c| { + let mut arr = [0u8; 32]; + if let Some(dst) = arr.get_mut(..c.len()) { + dst.copy_from_slice(c); + } + MemoryChunk(arr) + }) + .collect(), + ) + } + } else { + None + }; + + let return_data = if cfg.enable_return_data { + vm.current_call_frame.sub_return_data.clone() + } else { + Bytes::new() + }; + + OpcodeStep { + pc: target as u64, + op: Opcode::JUMPDEST as u8, + gas, + gas_cost: gas_cost::JUMPDEST, + mem_size, + depth, + return_data, + refund, + stack, + memory, + storage: None, + error: None, + } +} diff --git a/crates/vm/levm/src/opcode_handlers/system.rs b/crates/vm/levm/src/opcode_handlers/system.rs index 70dd5faed6d..b48322f4134 100644 --- a/crates/vm/levm/src/opcode_handlers/system.rs +++ b/crates/vm/levm/src/opcode_handlers/system.rs @@ -132,6 +132,16 @@ impl OpcodeHandler for OpCallHandler { vm.increase_state_gas(STATE_GAS_NEW_ACCOUNT)?; } + // Struct-log: record the geth-compatible CALL gasCost. + // Geth's gasCost for CALL family = intrinsic_overhead + callGasTemp (forwarded gas + // WITHOUT stipend). LEVM's `gas_cost` already equals `call_gas_costs + gas_forwarded`, + // i.e. `intrinsic + callGasTemp`. Stipend is added later inside the child frame, after + // the tracer fires, so it is NOT part of the reported gasCost. + if vm.opcode_tracer.active { + let geth_cost = gas_cost.saturating_add(eip7702_gas_consumed); + vm.opcode_tracer.last_opcode_gas_cost = Some(geth_cost); + } + // Resize memory: this is necessary for multiple reasons: // - Make sure the memory is expanded. // - When there is return data, preallocate it because it won't be possible while the next @@ -228,6 +238,12 @@ impl OpcodeHandler for OpCallCodeHandler { .ok_or(ExceptionalHalt::OutOfGas)?, )?; + // Struct-log: geth-compatible CALLCODE gasCost (intrinsic + forwarded, no stipend). + if vm.opcode_tracer.active { + let geth_cost = gas_cost.saturating_add(eip7702_gas_consumed); + vm.opcode_tracer.last_opcode_gas_cost = Some(geth_cost); + } + // Resize memory: this is necessary for multiple reasons: // - Make sure the memory is expanded. // - When there is return data, preallocate it because it won't be possible while the next @@ -317,10 +333,16 @@ impl OpcodeHandler for OpDelegateCallHandler { .ok_or(ExceptionalHalt::OutOfGas)?, )?; + // Struct-log: geth-compatible DELEGATECALL gasCost (intrinsic + forwarded). + if vm.opcode_tracer.active { + let geth_cost = gas_cost.saturating_add(eip7702_gas_consumed); + vm.opcode_tracer.last_opcode_gas_cost = Some(geth_cost); + } + // Resize memory: this is necessary for multiple reasons: // - Make sure the memory is expanded. // - When there is return data, preallocate it because it won't be possible while the next - // call frame is active. + // call frame is available. vm.current_call_frame.memory.resize(new_memory_size)?; // Trace CALL operation. @@ -408,6 +430,12 @@ impl OpcodeHandler for OpStaticCallHandler { .ok_or(ExceptionalHalt::OutOfGas)?, )?; + // Struct-log: geth-compatible STATICCALL gasCost (intrinsic + forwarded). + if vm.opcode_tracer.active { + let geth_cost = gas_cost.saturating_add(eip7702_gas_consumed); + vm.opcode_tracer.last_opcode_gas_cost = Some(geth_cost); + } + // Resize memory: this is necessary for multiple reasons: // - Make sure the memory is expanded. // - When there is return data, preallocate it because it won't be possible while the next @@ -477,13 +505,18 @@ impl OpcodeHandler for OpCreateHandler { let [value_in_wei, code_offset, code_len] = *vm.current_call_frame.stack.pop()?; let (code_len, code_offset) = size_offset_to_usize(code_len, code_offset)?; - vm.current_call_frame - .increase_consumed_gas(gas_cost::create( - calculate_memory_size(code_offset, code_len)?, - vm.current_call_frame.memory.len(), - code_len, - vm.env.config.fork, - )?)?; + let create_gas = gas_cost::create( + calculate_memory_size(code_offset, code_len)?, + vm.current_call_frame.memory.len(), + code_len, + vm.env.config.fork, + )?; + vm.current_call_frame.increase_consumed_gas(create_gas)?; + + // Struct-log: record the opcode-level gas before generic_create charges forwarded gas. + if vm.opcode_tracer.active { + vm.opcode_tracer.last_opcode_gas_cost = Some(create_gas); + } vm.generic_create(value_in_wei, code_offset, code_len, None) } @@ -502,13 +535,18 @@ impl OpcodeHandler for OpCreate2Handler { let [value_in_wei, code_offset, code_len, salt] = *vm.current_call_frame.stack.pop()?; let (code_len, code_offset) = size_offset_to_usize(code_len, code_offset)?; - vm.current_call_frame - .increase_consumed_gas(gas_cost::create_2( - calculate_memory_size(code_offset, code_len)?, - vm.current_call_frame.memory.len(), - code_len, - vm.env.config.fork, - )?)?; + let create2_gas = gas_cost::create_2( + calculate_memory_size(code_offset, code_len)?, + vm.current_call_frame.memory.len(), + code_len, + vm.env.config.fork, + )?; + vm.current_call_frame.increase_consumed_gas(create2_gas)?; + + // Struct-log: record the opcode-level gas before generic_create charges forwarded gas. + if vm.opcode_tracer.active { + vm.opcode_tracer.last_opcode_gas_cost = Some(create2_gas); + } vm.generic_create(value_in_wei, code_offset, code_len, Some(salt)) } diff --git a/crates/vm/levm/src/opcode_tracer.rs b/crates/vm/levm/src/opcode_tracer.rs new file mode 100644 index 00000000000..b01041d906b --- /dev/null +++ b/crates/vm/levm/src/opcode_tracer.rs @@ -0,0 +1,393 @@ +use bytes::Bytes; +use ethrex_common::{ + H256, U256, + tracing::{ + MemoryChunk, OpcodeStep, OpcodeTraceResult, StreamingOpts, write_streaming_state_root, + write_streaming_step, write_streaming_summary, + }, +}; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; + +/// Configuration for the per-opcode (EIP-3155) tracer. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(rename_all = "camelCase", default)] +pub struct OpcodeTracerConfig { + /// When true, stack values are not included in each step. + pub disable_stack: bool, + /// When true, memory contents are included in each step. + pub enable_memory: bool, + /// When true, storage diffs at SLOAD/SSTORE steps are not captured. + pub disable_storage: bool, + /// When true, return data from the previous sub-call is included. + pub enable_return_data: bool, + /// Maximum number of log entries to collect. 0 = unlimited. + pub limit: usize, +} + +/// Per-opcode (EIP-3155) tracer, emitted under the de-facto cross-client +/// `structLogger` wrapper shape. +/// +/// Use `LevmOpcodeTracer::disabled()` when tracing is not wanted; +/// the dispatch-loop guard is a single `if self.opcode_tracer.active` branch +/// with no other overhead on the fast path. +pub struct LevmOpcodeTracer { + /// Whether this tracer is active. + pub active: bool, + /// Configuration. + pub cfg: OpcodeTracerConfig, + /// Collected per-step entries. + pub logs: Vec, + /// Final output bytes (from RETURN / REVERT). + pub output: Bytes, + /// Top-level error string, if the transaction reverted. + pub error: Option, + /// Gas used by the transaction. + pub gas_used: u64, + /// Explicit gas cost written by CALL/CALLCODE/DELEGATECALL/STATICCALL/CREATE/CREATE2 + /// handlers before invoking the child frame, and by `jump()` when JUMP/JUMPI is + /// fused with JUMPDEST under active tracing. The dispatch loop prefers this value + /// over the (incorrect) gas-diff that would include forwarded gas. + pub last_opcode_gas_cost: Option, + /// Index in `logs` of the entry that the next `finalize_step` should patch. + /// `Some(i)` is set by `pre_step_capture` after a push; `None` after the + /// `limit` cap is reached (so `finalize_step` is a no-op). Synthesized + /// steps (e.g. fused JUMPDEST) push directly without touching this index, + /// preserving the parent opcode's pending finalize target. + pub last_step_index: Option, + /// When `Some`, each finalized step is written to this sink and the entry is + /// dropped from `logs` (streaming mode, O(1) peak memory). When `None`, steps + /// accumulate in `logs` (RPC mode). Setting this makes the tracer non-Clone. + pub stream: Option>, + /// EIP-3155 emission options for the streaming sink. Mirrors `cfg` polarity- + /// inverted (enableβ†’disable) at construction. + pub stream_opts: StreamingOpts, + /// Counts steps that have been streamed (so cap checks include them). + pub streamed_count: u64, + /// Stores the last write error encountered when streaming. Cleared by + /// `take_stream_error`. + pub stream_error: Option, +} + +impl std::fmt::Debug for LevmOpcodeTracer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LevmOpcodeTracer") + .field("active", &self.active) + .field("cfg", &self.cfg) + .field("logs", &self.logs) + .field("output", &self.output) + .field("error", &self.error) + .field("gas_used", &self.gas_used) + .field("last_opcode_gas_cost", &self.last_opcode_gas_cost) + .field("last_step_index", &self.last_step_index) + .field("stream", &self.stream.as_ref().map(|_| "")) + .field("stream_opts", &self.stream_opts) + .field("streamed_count", &self.streamed_count) + .field("stream_error", &self.stream_error) + .finish() + } +} + +impl LevmOpcodeTracer { + /// Returns an inactive tracer. No allocations; zero overhead on the hot path. + pub fn disabled() -> Self { + Self { + active: false, + cfg: OpcodeTracerConfig::default(), + logs: Vec::new(), + output: Bytes::new(), + error: None, + gas_used: 0, + last_opcode_gas_cost: None, + last_step_index: None, + stream: None, + stream_opts: StreamingOpts::default(), + streamed_count: 0, + stream_error: None, + } + } + + /// Returns an active tracer with the given config. Steps accumulate in + /// `logs` (RPC mode). + pub fn new(cfg: OpcodeTracerConfig) -> Self { + Self { + active: true, + cfg, + logs: Vec::new(), + output: Bytes::new(), + error: None, + gas_used: 0, + last_opcode_gas_cost: None, + last_step_index: None, + stream: None, + stream_opts: StreamingOpts::default(), + streamed_count: 0, + stream_error: None, + } + } + + /// Returns an active tracer that writes each finalized step directly to + /// `sink` (streaming mode). Peak memory is O(1) regardless of trace + /// length. The RPC `logs` accumulator is not used. + pub fn streaming(cfg: OpcodeTracerConfig, sink: Box) -> Self { + let stream_opts = StreamingOpts { + disable_stack: cfg.disable_stack, + disable_memory: !cfg.enable_memory, + disable_storage: cfg.disable_storage, + disable_return_data: !cfg.enable_return_data, + }; + Self { + active: true, + cfg, + logs: Vec::new(), + output: Bytes::new(), + error: None, + gas_used: 0, + last_opcode_gas_cost: None, + last_step_index: None, + stream: Some(sink), + stream_opts, + streamed_count: 0, + stream_error: None, + } + } + + /// Captures pre-step state, building and buffering an `OpcodeStep` entry. + /// + /// Called BEFORE the opcode executes. `pc` must be the address of the + /// current opcode (before `advance_pc(1)`). + /// + /// `stack_view` must already be bottom-first (caller reverses LEVM's top-first + /// layout) and empty when `cfg.disable_stack` is true. + /// + /// `memory_view` is the live byte slice for the current frame (caller provides + /// this only when `cfg.enable_memory` is true; otherwise pass `&[]`). + /// + /// `storage_kv` is pre-fetched by the caller via `read_storage_for_trace`; it is + /// `None` for all opcodes except SLOAD/SSTORE (or when storage capture is disabled). + #[expect( + clippy::too_many_arguments, + reason = "all fields are required per-step state from the dispatch-loop hook" + )] + #[expect( + clippy::as_conversions, + clippy::arithmetic_side_effects, + reason = "streamed_count fits in usize on supported 64-bit targets; addition bounded by VM step count" + )] + pub fn pre_step_capture( + &mut self, + pc: u64, + opcode: u8, + gas: u64, + depth: u32, + refund: u64, + stack_view: &[U256], + memory_view: &[u8], + mem_size: u64, + return_data: &Bytes, + storage_kv: Option<(H256, H256)>, + ) { + // After a streaming write failure, stop accumulating β€” the caller is + // expected to surface `take_stream_error` and abort. Without this guard + // `logs` would silently grow into RPC-mode behavior on a stream sink. + if self.stream_error.is_some() { + self.last_step_index = None; + return; + } + + // Enforce limit: stop appending once the cap is reached (counting both + // buffered and already-streamed steps). Clearing the patch index ensures + // `finalize_step` does not clobber the last retained step. + let total = self.streamed_count as usize + self.logs.len(); + if self.cfg.limit > 0 && total >= self.cfg.limit { + self.last_step_index = None; + return; + } + + // Stack: Some(vec) when capture enabled; None when disabled (emits JSON null). + let stack = if !self.cfg.disable_stack { + Some(stack_view.to_vec()) + } else { + None + }; + + // Memory: chunked 32-byte slices when enabled; field omitted otherwise. + // When enabled and memory is empty, emit `Some(vec![])` so the field + // stays present (an empty array signals "captured, just empty"). + let memory = if self.cfg.enable_memory { + if memory_view.is_empty() { + Some(vec![]) + } else { + let chunks = memory_view + .chunks(32) + .map(|c| { + let mut arr = [0u8; 32]; + if let Some(dst) = arr.get_mut(..c.len()) { + dst.copy_from_slice(c); + } + MemoryChunk(arr) + }) + .collect(); + Some(chunks) + } + } else { + None + }; + + // Storage: single-entry map for this step only (no accumulation). + let storage = storage_kv.map(|(key, value)| { + let mut m = BTreeMap::new(); + m.insert(key, value); + m + }); + + // returnData: actual bytes when enabled; empty Bytes otherwise. + let return_data_field = if self.cfg.enable_return_data { + return_data.clone() + } else { + Bytes::new() + }; + + let log = OpcodeStep { + pc, + op: opcode, + gas, + gas_cost: 0, // patched in finalize_step + mem_size, + depth, + return_data: return_data_field, + refund, + stack, + memory, + storage, + error: None, // patched in finalize_step + }; + + self.last_step_index = Some(self.logs.len()); + self.logs.push(log); + } + + /// Patches the entry recorded by the most recent `pre_step_capture` with the + /// actual gas cost and any step-level error string. Called immediately after + /// the opcode handler returns. + /// + /// No-op when the most recent `pre_step_capture` did not push (limit reached). + /// Synthesized entries (e.g. fused JUMPDEST) push directly into `logs` without + /// updating `last_step_index`, so this still patches the correct parent entry. + /// + /// In streaming mode, flushes the patched entry AND any synthetic steps that + /// were appended after it (e.g. fused JUMPDEST) in order, then drops them + /// from `logs`. + #[expect( + clippy::as_conversions, + clippy::arithmetic_side_effects, + clippy::indexing_slicing, + reason = "idx..end range is valid by construction; usizeβ†’u64 fits on 64-bit; step count addition bounded by limit" + )] + pub fn finalize_step(&mut self, gas_cost: u64, error: Option<&str>) { + let Some(idx) = self.last_step_index else { + return; + }; + if let Some(log) = self.logs.get_mut(idx) { + log.gas_cost = gas_cost; + log.error = error.map(str::to_owned); + } + + // Streaming mode: flush the patched parent step plus any synthetic steps + // appended after it (e.g. fused JUMPDEST), then drop them from `logs`. + if self.stream.is_some() { + let end = self.logs.len(); + for i in idx..end { + // Safety: we only enter this branch when stream is Some, and we + // reborrow inside the loop to satisfy the borrow checker. + if let Some(sink) = self.stream.as_mut() { + match write_streaming_step(sink, &self.logs[i], &self.stream_opts) { + Ok(()) => {} + Err(e) => { + self.stream_error = Some(e); + self.stream = None; + // Truncate whatever we already iterated up to (i entries from idx). + let flushed = i - idx; + self.streamed_count += flushed as u64; + self.logs.truncate(idx); + self.last_step_index = None; + return; + } + } + } + } + let flushed = end - idx; + self.streamed_count += flushed as u64; + self.logs.truncate(idx); + self.last_step_index = None; + } + } + + /// Pushes a fully-formed synthetic step (used for fused JUMPDEST under JUMP/JUMPI). + /// + /// Does **not** update `last_step_index`, so the pending `finalize_step` for the + /// parent opcode continues to patch the parent's entry. The limit cap is honored + /// β€” synthetic pushes are dropped once `cfg.limit` is reached. + /// + /// In streaming mode the step is buffered in `logs` exactly like in RPC mode; + /// `finalize_step` then flushes both the parent and all following synthetic + /// steps in order, ensuring correct ordering in the output. + #[expect( + clippy::as_conversions, + clippy::arithmetic_side_effects, + reason = "streamed_count fits in usize on supported 64-bit targets; addition bounded by VM step count" + )] + pub fn synthesize_step(&mut self, step: OpcodeStep) { + // In streaming mode `logs` is truncated after every `finalize_step`, so + // a `logs.len()`-only check would never fire. Include `streamed_count` + // to honor the cap across both modes uniformly. + let total = self.streamed_count as usize + self.logs.len(); + if self.cfg.limit > 0 && total >= self.cfg.limit { + return; + } + self.logs.push(step); + } + + /// Assembles the final `OpcodeTraceResult` after the transaction finishes. + pub fn take_result(&mut self) -> OpcodeTraceResult { + OpcodeTraceResult { + pass: self.error.is_none(), + gas_used: self.gas_used, + output: std::mem::take(&mut self.output), + steps: std::mem::take(&mut self.logs), + } + } + + /// Writes the streaming summary line `{output, gasUsed, error?}` if a sink + /// is attached and not failed. Also flushes the underlying writer. + /// No-op when no sink is attached. + pub fn flush_summary( + &mut self, + output: &[u8], + gas_used: u64, + error: Option<&str>, + ) -> std::io::Result<()> { + if let Some(sink) = self.stream.as_mut() { + write_streaming_summary(sink, output, gas_used, error)?; + sink.flush()?; + } + Ok(()) + } + + /// Writes the `{"stateRoot": "0x..."}` line. Called by the statetest CLI + /// after `flush_summary` for conventional streaming shape parity. + /// No-op when no sink is attached. + pub fn flush_state_root(&mut self, state_root: H256) -> std::io::Result<()> { + if let Some(sink) = self.stream.as_mut() { + write_streaming_state_root(sink, state_root)?; + sink.flush()?; + } + Ok(()) + } + + /// Returns the last write error encountered during streaming, clearing the + /// stored error. The binary can check this after `vm.execute()` completes. + pub fn take_stream_error(&mut self) -> Option { + self.stream_error.take() + } +} diff --git a/crates/vm/levm/src/tracing.rs b/crates/vm/levm/src/tracing.rs index b10ddfba53d..5c3983a2a55 100644 --- a/crates/vm/levm/src/tracing.rs +++ b/crates/vm/levm/src/tracing.rs @@ -1,3 +1,4 @@ +pub use crate::opcode_tracer::{LevmOpcodeTracer, OpcodeTracerConfig}; use crate::{ errors::{ContextResult, InternalError, TxResult, VMError}, vm::VM, diff --git a/crates/vm/levm/src/vm.rs b/crates/vm/levm/src/vm.rs index df15dbc1d09..d1ef694b31e 100644 --- a/crates/vm/levm/src/vm.rs +++ b/crates/vm/levm/src/vm.rs @@ -13,6 +13,7 @@ use crate::{ hook::{Hook, get_hooks}, }, memory::Memory, + opcode_tracer::LevmOpcodeTracer, opcodes::OpCodeFn, precompiles::{ self, SIZE_PRECOMPILES_CANCUN, SIZE_PRECOMPILES_PRAGUE, SIZE_PRECOMPILES_PRE_CANCUN, @@ -21,7 +22,7 @@ use crate::{ }; use bytes::Bytes; use ethrex_common::{ - Address, H160, H256, U256, + Address, BigEndianHash, H160, H256, U256, tracing::CallType, types::{AccessListEntry, Code, Fork, Log, Transaction, fee_config::FeeConfig}, }; @@ -435,6 +436,8 @@ pub struct VM<'a> { pub storage_original_values: FxHashMap>, /// Call tracer for execution tracing. pub tracer: LevmCallTracer, + /// Opcode (EIP-3155) tracer. Disabled by default; zero overhead when inactive. + pub opcode_tracer: LevmOpcodeTracer, /// Debug mode for development diagnostics. pub debug_mode: DebugMode, /// Pool of reusable stacks to reduce allocations. @@ -481,6 +484,7 @@ impl<'a> VM<'a> { hooks: get_hooks(&vm_type), storage_original_values: FxHashMap::default(), tracer, + opcode_tracer: LevmOpcodeTracer::disabled(), debug_mode: DebugMode::disabled(), stack_pool: Vec::new(), vm_type, @@ -647,9 +651,63 @@ impl<'a> VM<'a> { let mut timings = crate::timings::OPCODE_TIMINGS.lock().expect("poison"); loop { + // Capture pc BEFORE advance_pc(1) β€” this is the address of the current opcode. + let pc_of_current_op = self.current_call_frame.pc; let opcode = self.current_call_frame.next_opcode(); self.advance_pc(1)?; + // Hoist the active flag to avoid reading it twice per opcode. + let tracer_active = self.opcode_tracer.active; + + // Struct-log pre-step capture (single branch on the fast path when disabled). + let gas_before_op = if tracer_active { + #[expect( + clippy::as_conversions, + reason = "gas_remaining is i64; clamp to 0 before converting to u64" + )] + let gas_before = self.current_call_frame.gas_remaining.max(0) as u64; + #[expect( + clippy::as_conversions, + reason = "call depth bounded by STACK_LIMIT=1024, fits in u32" + )] + let depth = (self.call_frames.len() as u32).saturating_add(1); + let refund = self.substate.refunded_gas; + let stack_view = self.collect_stack_for_trace(); + let mem_view = self.collect_memory_for_trace(); + // mem_size always reflects actual memory size, regardless of enable_memory. + #[expect( + clippy::as_conversions, + reason = "memory size is bounded by gas; fits in u64" + )] + let mem_size_for_trace = self.current_call_frame.memory.len() as u64; + let storage_kv = self.read_storage_for_trace(opcode); + let return_data = if self.opcode_tracer.cfg.enable_return_data { + self.current_call_frame.sub_return_data.clone() + } else { + Bytes::new() + }; + #[expect( + clippy::as_conversions, + reason = "pc is usize, fits in u64 on supported targets" + )] + let pc_u64 = pc_of_current_op as u64; + self.opcode_tracer.pre_step_capture( + pc_u64, + opcode, + gas_before, + depth, + refund, + &stack_view, + &mem_view, + mem_size_for_trace, + &return_data, + storage_kv, + ); + gas_before + } else { + 0 + }; + #[cfg(feature = "perf_opcode_timings")] let opcode_time_start = std::time::Instant::now(); @@ -663,6 +721,25 @@ impl<'a> VM<'a> { timings.update(opcode, time); } + // Struct-log post-step: patch gas_cost and error into the buffered entry. + if tracer_active { + #[expect( + clippy::as_conversions, + reason = "gas_remaining is i64; clamp to 0 before converting to u64" + )] + let gas_after = self.current_call_frame.gas_remaining.max(0) as u64; + // Prefer the explicit opcode-overhead cost written by CALL/CREATE handlers; + // fall back to the gas diff for all other opcodes. + let gas_cost = self + .opcode_tracer + .last_opcode_gas_cost + .take() + .unwrap_or_else(|| gas_before_op.saturating_sub(gas_after)); + let err_str = error.get().map(|e| e.to_string()); + self.opcode_tracer + .finalize_step(gas_cost, err_str.as_deref()); + } + let result = match op_result { OpcodeResult::Continue => continue, OpcodeResult::Halt => match error.take() { @@ -740,6 +817,17 @@ impl<'a> VM<'a> { self.tracer.exit_context(&ctx_result, true)?; + // Struct-log end-of-tx capture: record final output, gas used, and revert error. + // gas matches geth's `executionResult.Gas` which is post-refund (`receipt.GasUsed`). + if self.opcode_tracer.active { + self.opcode_tracer.output = ctx_result.output.clone(); + self.opcode_tracer.gas_used = ctx_result.gas_spent; + self.opcode_tracer.error = match ctx_result.result { + TxResult::Revert(ref err) => Some(err.to_string()), + _ => None, + }; + } + // Only include logs if transaction succeeded. When a transaction reverts, // no logs should be emitted (including EIP-7708 Transfer logs). let logs = if ctx_result.is_success() { @@ -762,6 +850,92 @@ impl<'a> VM<'a> { Ok(report) } + + // ── Struct-log helper methods ───────────────────────────────────────────── + + /// Collects the current stack in bottom-first order for struct-log emission. + /// + /// LEVM stack is top-first in memory (`values[offset]` = top), so we reverse + /// the active slice to produce the bottom-first wire format geth uses. + /// Returns an empty `Vec` when `cfg.disable_stack` is true. + pub fn collect_stack_for_trace(&self) -> Vec { + use crate::constants::STACK_LIMIT; + if self.opcode_tracer.cfg.disable_stack { + return Vec::new(); + } + let s = &self.current_call_frame.stack; + // offset <= STACK_LIMIT by stack invariant. + s.values + .get(s.offset..STACK_LIMIT) + .map(|slice| slice.iter().rev().copied().collect()) + .unwrap_or_default() + } + + /// Collects the live memory bytes for the current frame. + /// + /// Returns an empty `Vec` when `cfg.enable_memory` is false or memory is empty. + pub fn collect_memory_for_trace(&self) -> Vec { + if !self.opcode_tracer.cfg.enable_memory { + return Vec::new(); + } + self.current_call_frame.memory.live_bytes() + } + + /// Pre-reads the storage key/value for the current SLOAD or SSTORE opcode. + /// + /// Returns `None` when: + /// - `cfg.disable_storage` is set, or + /// - `opcode` is not SLOAD (0x54) or SSTORE (0x55), or + /// - the stack is empty (guard against underflow before the handler runs), or + /// - the storage read fails for any reason (including `AccountNotFound` β€” + /// the trace omits the entry rather than emitting an ambiguous zero). + /// + /// For SLOAD: key = `stack.top`; value = the *current* stored value read from the DB. + /// For SSTORE: key = `stack.top`, value = `stack[top-1]` (the new value being written). + pub fn read_storage_for_trace(&mut self, opcode: u8) -> Option<(H256, H256)> { + const SLOAD: u8 = 0x54; + const SSTORE: u8 = 0x55; + + if self.opcode_tracer.cfg.disable_storage { + return None; + } + if opcode != SLOAD && opcode != SSTORE { + return None; + } + + // Need at least one element on stack for SLOAD, two for SSTORE. + use crate::constants::STACK_LIMIT; + let offset = self.current_call_frame.stack.offset; + if offset >= STACK_LIMIT { + return None; // stack empty + } + + // SLOAD/SSTORE operate on the call's storage context (`to`), not the code's + // address. Under DELEGATECALL/CALLCODE these differ. + let addr = self.current_call_frame.to; + + let stack_values = &self.current_call_frame.stack.values; + let key_u256 = *stack_values.get(offset)?; + let key = BigEndianHash::from_uint(&key_u256); + + if opcode == SLOAD { + // Omit the entry on any read failure (incl. account not yet cached); + // a zero value would be indistinguishable from a legitimate never-written slot. + let v = self.get_storage_value(addr, key).ok()?; + let value = BigEndianHash::from_uint(&v); + Some((key, value)) + } else { + // SSTORE: need two stack elements. + let next_offset = offset.checked_add(1)?; + if next_offset >= STACK_LIMIT { + return None; + } + // values[offset+1] is the new value being written (second from top = stack[top-1]). + let value_u256 = *self.current_call_frame.stack.values.get(next_offset)?; + let value = BigEndianHash::from_uint(&value_u256); + Some((key, value)) + } + } } impl Substate { diff --git a/crates/vm/tracing.rs b/crates/vm/tracing.rs index b2255ad71d0..bd342ac5805 100644 --- a/crates/vm/tracing.rs +++ b/crates/vm/tracing.rs @@ -1,6 +1,7 @@ use crate::backends::levm::LEVM; -use ethrex_common::tracing::{CallTrace, PrestateResult}; +use ethrex_common::tracing::{CallTrace, OpcodeTraceResult, PrestateResult}; use ethrex_common::types::Block; +pub use ethrex_levm::tracing::OpcodeTracerConfig; use crate::{Evm, EvmError}; @@ -63,6 +64,32 @@ impl Evm { ) } + /// Executes a single tx and captures the per-opcode (EIP-3155) trace. + /// Assumes that the received state already contains changes from previous transactions. + pub fn trace_tx_opcodes( + &mut self, + block: &Block, + tx_index: usize, + cfg: OpcodeTracerConfig, + ) -> Result { + let tx = block + .body + .transactions + .get(tx_index) + .ok_or(EvmError::Custom( + "Missing Transaction for Trace".to_string(), + ))?; + + LEVM::trace_tx_opcodes( + &mut self.db, + &block.header, + tx, + cfg, + self.vm_type, + self.crypto.as_ref(), + ) + } + /// Reruns the given block, saving the changes on the state, doesn't output any results or receipts. /// If the optional argument `stop_index` is set, the run will stop just before executing the transaction at that index /// and won't process the withdrawals afterwards. diff --git a/fixtures/networks/default.yaml b/fixtures/networks/default.yaml index 0c77992d089..ea4193a8f70 100644 --- a/fixtures/networks/default.yaml +++ b/fixtures/networks/default.yaml @@ -11,21 +11,22 @@ participants: # cl_image: sigp/lighthouse:v8.0.0-rc.1 # validator_count: 32 - el_type: besu - el_image: ethpandaops/besu:main-142a5e6 + el_image: ethpandaops/besu:main-6d54451 cl_type: lighthouse - cl_image: sigp/lighthouse:v8.0.0-rc.1 + cl_image: sigp/lighthouse:v8.1.3 validator_count: 32 - el_type: geth - el_image: ethereum/client-go:v1.15.2 + el_image: ethereum/client-go:v1.17.3 cl_type: lighthouse - cl_image: sigp/lighthouse:v8.0.0-rc.1 + cl_image: sigp/lighthouse:v8.1.3 validator_count: 32 count: 1 - el_type: ethrex el_image: ethrex:local cl_type: lighthouse - cl_image: sigp/lighthouse:v8.0.0-rc.1 + cl_image: sigp/lighthouse:v8.1.3 validator_count: 32 + supernode: true # snooper_enabled: true ethereum_metrics_exporter_enabled: true diff --git a/test/tests/common/mod.rs b/test/tests/common/mod.rs index 748a09c9d7c..f01367c8e07 100644 --- a/test/tests/common/mod.rs +++ b/test/tests/common/mod.rs @@ -3,4 +3,5 @@ mod base64_tests; mod blobs_bundle_tests; mod rkyv_utils_tests; mod serde_utils_tests; +mod tracing_streaming_tests; mod utils_tests; diff --git a/test/tests/common/tracing_streaming_tests.rs b/test/tests/common/tracing_streaming_tests.rs new file mode 100644 index 00000000000..3f581a3b345 --- /dev/null +++ b/test/tests/common/tracing_streaming_tests.rs @@ -0,0 +1,213 @@ +//! Wire-format tests for the EIP-3155 streaming serializer in +//! `ethrex_common::tracing` β€” pins each per-step / summary / state-root field +//! against a captured `evm v1.17.3 run --json` reference. + +use bytes::Bytes; +use ethereum_types::{H256, U256}; +use ethrex_common::tracing::{ + MemoryChunk, OpcodeStep, StreamingOpts, write_streaming_state_root, write_streaming_step, + write_streaming_summary, +}; + +// Mirrors the third step of `evm v1.17.3 run --json 0x6001600101`, used to +// anchor byte-exact format parity with the geth reference output. +fn add_step() -> OpcodeStep { + OpcodeStep { + pc: 4, + op: 0x01, // ADD + gas: 9_999_999_994, // 0x2540be3fa + gas_cost: 3, + mem_size: 0, + depth: 1, + return_data: Bytes::new(), + refund: 0, + stack: Some(vec![U256::one(), U256::one()]), + memory: None, + storage: None, + error: None, + } +} + +// 1.4a β€” ADD step byte-exact match against the streaming format +#[test] +fn test_1_4a_streaming_add_step() { + let step = add_step(); + let opts = StreamingOpts::default(); + let mut buf = Vec::new(); + write_streaming_step(&mut buf, &step, &opts).unwrap(); + let output = String::from_utf8(buf).unwrap(); + let expected = "{\"pc\":4,\"op\":1,\"gas\":\"0x2540be3fa\",\"gasCost\":\"0x3\",\"memSize\":0,\"stack\":[\"0x1\",\"0x1\"],\"depth\":1,\"refund\":0,\"opName\":\"ADD\"}\n"; + assert_eq!(output, expected, "streaming ADD step mismatch"); +} + +// 1.4b β€” MSTORE step with memory enabled; memory reassembled as single hex blob +#[test] +fn test_1_4b_streaming_memory() { + let step = OpcodeStep { + pc: 0, + op: 0x52, // MSTORE + gas: 100, + gas_cost: 3, + mem_size: 64, + depth: 1, + return_data: Bytes::new(), + refund: 0, + stack: None, + memory: Some(vec![ + MemoryChunk([0u8; 32]), + MemoryChunk({ + let mut b = [0u8; 32]; + b[31] = 0x01; + b + }), + ]), + storage: None, + error: None, + }; + let opts = StreamingOpts { + disable_stack: true, + disable_memory: false, + ..StreamingOpts::default() + }; + let mut buf = Vec::new(); + write_streaming_step(&mut buf, &step, &opts).unwrap(); + let output = String::from_utf8(buf).unwrap(); + // 32 zero bytes + 31 zero bytes + 0x01 + let expected_mem = format!("0x{}{}01", "00".repeat(32), "00".repeat(31)); + assert!( + output.contains(&format!("\"memory\":\"{}\"", expected_mem)), + "memory field mismatch, got: {output}" + ); + // confirm it is a single string, not an array + assert!( + !output.contains("\"memory\":["), + "memory must not be an array" + ); +} + +// 1.4c β€” REVERT step with error field +#[test] +fn test_1_4c_streaming_error() { + let step = OpcodeStep { + pc: 10, + op: 0xfd, // REVERT + gas: 500, + gas_cost: 0, + mem_size: 0, + depth: 1, + return_data: Bytes::new(), + refund: 0, + stack: Some(vec![]), + memory: None, + storage: None, + error: Some("execution reverted".to_string()), + }; + let opts = StreamingOpts::default(); + let mut buf = Vec::new(); + write_streaming_step(&mut buf, &step, &opts).unwrap(); + let output = String::from_utf8(buf).unwrap(); + assert!( + output.contains("\"error\":\"execution reverted\""), + "error field missing or wrong, got: {output}" + ); +} + +// 1.4d β€” summary line, no error +#[test] +fn test_1_4d_streaming_summary_no_error() { + let mut buf = Vec::new(); + write_streaming_summary(&mut buf, &[0xde, 0xad], 42, None).unwrap(); + let output = String::from_utf8(buf).unwrap(); + let expected = "{\"output\":\"dead\",\"gasUsed\":\"0x2a\"}\n"; + assert_eq!(output, expected, "summary no-error mismatch"); +} + +// 1.4e β€” summary line with error +#[test] +fn test_1_4e_streaming_summary_with_error() { + let mut buf = Vec::new(); + write_streaming_summary(&mut buf, &[], 0, Some("out of gas")).unwrap(); + let output = String::from_utf8(buf).unwrap(); + let expected = "{\"output\":\"\",\"gasUsed\":\"0x0\",\"error\":\"out of gas\"}\n"; + assert_eq!(output, expected, "summary with-error mismatch"); +} + +// 1.4f β€” disable_stack omits stack field +#[test] +fn test_1_4f_disable_stack() { + let step = add_step(); + let opts = StreamingOpts { + disable_stack: true, + ..StreamingOpts::default() + }; + let mut buf = Vec::new(); + write_streaming_step(&mut buf, &step, &opts).unwrap(); + let output = String::from_utf8(buf).unwrap(); + assert!( + !output.contains("\"stack\""), + "stack should be absent when disable_stack=true, got: {output}" + ); +} + +// 1.4g β€” unknown opcode 0xee +#[test] +fn test_1_4g_unknown_opcode() { + let step = OpcodeStep { + pc: 0, + op: 0xee, + gas: 100, + gas_cost: 0, + mem_size: 0, + depth: 1, + return_data: Bytes::new(), + refund: 0, + stack: Some(vec![]), + memory: None, + storage: None, + error: None, + }; + let opts = StreamingOpts::default(); + let mut buf = Vec::new(); + write_streaming_step(&mut buf, &step, &opts).unwrap(); + let output = String::from_utf8(buf).unwrap(); + assert!( + output.contains("\"op\":238"), + "op should be decimal 238 for 0xee, got: {output}" + ); + assert!( + output.contains("\"opName\":\"opcode 0xee not defined\""), + "opName mismatch for unknown opcode, got: {output}" + ); +} + +// 1.4h β€” write_streaming_state_root with H256::zero(); the colon-space is +// required because goevmlab does a literal byte search for `"stateRoot": "`. +#[test] +fn test_1_4h_state_root() { + let mut buf = Vec::new(); + write_streaming_state_root(&mut buf, H256::zero()).unwrap(); + let output = String::from_utf8(buf).unwrap(); + let expected = + "{\"stateRoot\": \"0x0000000000000000000000000000000000000000000000000000000000000000\"}\n"; + assert_eq!(output, expected, "state root line mismatch"); + assert!( + output.contains("\"stateRoot\": \""), + "must contain colon-space after stateRoot key" + ); +} + +// 1.5 β€” snapshot test: existing Serialize for OpcodeStep (legacy RPC shape). +// Pinning this guards against accidental drift in the RPC `debug_traceTransaction` +// wire format while we evolve the streaming serializer alongside it. +#[test] +fn test_1_5_legacy_rpc_serialize_snapshot() { + let step = add_step(); + let json = serde_json::to_string(&step).unwrap(); + // Legacy shape: op is mnemonic string, gas/gasCost/refund are numeric, + // memSize is numeric, returnData is "0x", stack is array of hex strings. + let expected = r#"{"pc":4,"op":"ADD","gas":9999999994,"gasCost":3,"depth":1,"stack":["0x1","0x1"],"memSize":0,"returnData":"0x","refund":0}"#; + assert_eq!( + json, expected, + "legacy RPC OpcodeStep serialization changed" + ); +} diff --git a/test/tests/levm/error_format_tests.rs b/test/tests/levm/error_format_tests.rs new file mode 100644 index 00000000000..6c7da66fa37 --- /dev/null +++ b/test/tests/levm/error_format_tests.rs @@ -0,0 +1,19 @@ +use ethrex_levm::errors::ExceptionalHalt; + +#[test] +fn stack_underflow_display_is_geth_compatible() { + let err = ExceptionalHalt::StackUnderflow { + stack_len: 2, + required: 3, + }; + assert_eq!(err.to_string(), "stack underflow (2 <=> 3)"); +} + +#[test] +fn stack_overflow_display_is_geth_compatible() { + let err = ExceptionalHalt::StackOverflow { + stack_len: 1024, + limit: 1024, + }; + assert_eq!(err.to_string(), "stack limit reached 1024 (1024)"); +} diff --git a/test/tests/levm/mod.rs b/test/tests/levm/mod.rs index 55b2325127e..7990e5dfb08 100644 --- a/test/tests/levm/mod.rs +++ b/test/tests/levm/mod.rs @@ -5,12 +5,15 @@ mod eip7702_tests; mod eip7708_tests; mod eip7778_tests; mod eip7928_tests; +mod error_format_tests; mod l2_fee_token_ratio_tests; mod l2_fee_token_tests; mod l2_gas_reservation_tests; mod l2_hook_tests; mod l2_privileged_tx_tests; mod memory_tests; +mod opcode_tracer_streaming_tests; +mod opcode_tracer_tests; mod precompile_tests; mod prestate_tracer_tests; mod stack_tests; diff --git a/test/tests/levm/opcode_tracer_streaming_tests.rs b/test/tests/levm/opcode_tracer_streaming_tests.rs new file mode 100644 index 00000000000..e3efa54bbda --- /dev/null +++ b/test/tests/levm/opcode_tracer_streaming_tests.rs @@ -0,0 +1,390 @@ +//! Tests for the streaming sink feature of `LevmOpcodeTracer` (Phase 2). +//! +//! These tests exercise `LevmOpcodeTracer::streaming()` directly, without +//! going through the full VM pipeline. All tests assert on the bytes written +//! to the sink rather than on internal state, matching the boundary used by +//! the EIP-3155 streaming shape. + +use bytes::Bytes; +use ethereum_types::H256; +use ethrex_common::{U256, tracing::OpcodeStep}; +use ethrex_levm::tracing::{LevmOpcodeTracer, OpcodeTracerConfig}; +use std::sync::{Arc, Mutex}; + +// ── Shared in-memory sink ───────────────────────────────────────────────────── + +/// A `Write` impl backed by a shared `Vec`, so both the sink (passed into +/// the tracer) and the test (asserting on content) can access the same buffer. +struct SharedSink(Arc>>); + +impl std::io::Write for SharedSink { + fn write(&mut self, b: &[u8]) -> std::io::Result { + self.0.lock().unwrap().extend_from_slice(b); + Ok(b.len()) + } + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +/// A sink that always fails on the first write. +struct FailingSink; + +impl std::io::Write for FailingSink { + fn write(&mut self, _b: &[u8]) -> std::io::Result { + Err(std::io::Error::new( + std::io::ErrorKind::BrokenPipe, + "test failure", + )) + } + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +// ── Helper constructors ─────────────────────────────────────────────────────── + +fn make_buf() -> (Arc>>, SharedSink) { + let buf = Arc::new(Mutex::new(Vec::new())); + let sink = SharedSink(Arc::clone(&buf)); + (buf, sink) +} + +fn default_cfg() -> OpcodeTracerConfig { + OpcodeTracerConfig::default() +} + +/// Builds a minimal `OpcodeStep` for the given opcode byte. Gas and gas_cost +/// are set to sentinel values so tests can assert on them. +fn make_step(op: u8, pc: u64, gas: u64) -> OpcodeStep { + OpcodeStep { + pc, + op, + gas, + gas_cost: 0, // patched by finalize_step + mem_size: 0, + depth: 1, + return_data: Bytes::new(), + refund: 0, + stack: Some(vec![]), + memory: None, + storage: None, + error: None, + } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +/// 2.7a β€” basic streaming: one step is written to the sink, not kept in `logs`. +#[test] +fn test_2_7a_streaming_basic() { + let (buf, sink) = make_buf(); + let mut tracer = LevmOpcodeTracer::streaming(default_cfg(), Box::new(sink)); + + // Simulate pre-step capture for ADD (0x01), pc=5, gas=1000. + tracer.pre_step_capture( + 5, // pc + 0x01, // ADD + 1000, // gas + 1, // depth + 0, // refund + &[], // stack_view (no stack values for ADD pre-execution in this mini-test) + &[], // memory_view + 0, // mem_size + &Bytes::new(), + None, // storage_kv + ); + assert_eq!(tracer.logs.len(), 1, "step buffered before finalize"); + + tracer.finalize_step(3, None); + + // After finalize in streaming mode, logs must be empty. + assert!(tracer.logs.is_empty(), "step flushed: logs must be empty"); + assert_eq!(tracer.streamed_count, 1); + + let bytes = buf.lock().unwrap(); + let output = std::str::from_utf8(&bytes).expect("valid UTF-8"); + // Must be exactly one newline-terminated line. + let lines: Vec<&str> = output.lines().collect(); + assert_eq!(lines.len(), 1, "exactly one step line"); + + // Parse as JSON and check shape. + let v: serde_json::Value = serde_json::from_str(lines[0]).expect("valid JSON"); + assert_eq!(v["pc"], serde_json::json!(5)); + assert_eq!(v["op"], serde_json::json!(0x01_u64)); // streaming emits raw opcode byte + assert_eq!(v["gas"], serde_json::json!("0x3e8")); // 1000 hex + assert_eq!(v["gasCost"], serde_json::json!("0x3")); // patched gas_cost = 3 + assert_eq!(v["depth"], serde_json::json!(1)); + + // Trailing newline must be present. + assert!(output.ends_with('\n'), "line must end with newline"); +} + +/// 2.7b β€” JUMP + synthetic JUMPDEST ordering: parent line before synth line. +/// +/// Real flow: JUMP pre_step_capture β†’ handler calls synthesize_step (JUMPDEST +/// pushed into logs) β†’ dispatch loop calls finalize_step(JUMP). +/// finalize_step flushes logs[idx..] in order: JUMP first, then JUMPDEST. +#[test] +fn test_2_7b_synthetic_ordering() { + let (buf, sink) = make_buf(); + let mut tracer = LevmOpcodeTracer::streaming(default_cfg(), Box::new(sink)); + + // 1. Simulate pre-step capture for JUMPI (0x57), pc=2. + tracer.pre_step_capture( + 2, // pc + 0x57, // JUMPI + 5000, // gas + 1, + 0, + &[U256::from(10), U256::from(1)], // stack: [target=10, cond=1] + &[], + 0, + &Bytes::new(), + None, + ); + let jumpi_idx = tracer.last_step_index.unwrap(); + + // 2. Handler calls synthesize_step for JUMPDEST (0x5b), pc=10. + let jumpdest = make_step(0x5b, 10, 4992); // gas after JUMPI charge + tracer.synthesize_step(jumpdest); + + // Both parent and synthetic are buffered. + assert_eq!(tracer.logs.len(), 2); + + // 3. Dispatch loop calls finalize_step for JUMPI. + tracer.finalize_step(8, None); // JUMP costs 8 + + // After flush, logs must be empty. + assert!(tracer.logs.is_empty()); + assert_eq!(tracer.streamed_count, 2); + + let bytes = buf.lock().unwrap(); + let output = std::str::from_utf8(&bytes).expect("valid UTF-8"); + let lines: Vec<&str> = output.lines().collect(); + assert_eq!(lines.len(), 2, "two lines: JUMPI then JUMPDEST"); + + let jumpi_v: serde_json::Value = serde_json::from_str(lines[0]).expect("valid JSON line 0"); + let jumpdest_v: serde_json::Value = serde_json::from_str(lines[1]).expect("valid JSON line 1"); + + // JUMPI is first. + assert_eq!(jumpi_v["pc"], serde_json::json!(2)); + assert_eq!(jumpi_v["op"], serde_json::json!(0x57_u64)); // JUMPI + assert_eq!(jumpi_v["gasCost"], serde_json::json!("0x8")); // patched + + // JUMPDEST is second. + assert_eq!(jumpdest_v["pc"], serde_json::json!(10)); + assert_eq!(jumpdest_v["op"], serde_json::json!(0x5b_u64)); // JUMPDEST + + // Verify jumpi_idx is correct. + let _ = jumpi_idx; +} + +/// 2.7c β€” cap is honored across both real and synthetic steps. +/// +/// With limit=2, the third pre_step_capture should be rejected. +#[test] +fn test_2_7c_cap_honored() { + let (buf, sink) = make_buf(); + let cfg = OpcodeTracerConfig { + limit: 2, + ..Default::default() + }; + let mut tracer = LevmOpcodeTracer::streaming(cfg, Box::new(sink)); + + // Step 1 β€” accepted. + tracer.pre_step_capture(0, 0x60, 1000, 1, 0, &[], &[], 0, &Bytes::new(), None); + tracer.finalize_step(3, None); + assert_eq!(tracer.streamed_count, 1); + + // Step 2 β€” accepted. + tracer.pre_step_capture(2, 0x60, 997, 1, 0, &[], &[], 0, &Bytes::new(), None); + tracer.finalize_step(3, None); + assert_eq!(tracer.streamed_count, 2); + + // Step 3 β€” rejected by cap (total == limit). + tracer.pre_step_capture(4, 0x01, 994, 1, 0, &[], &[], 0, &Bytes::new(), None); + assert!( + tracer.last_step_index.is_none(), + "cap: pre_step should set last_step_index=None" + ); + // finalize_step should be a no-op. + tracer.finalize_step(3, None); + assert_eq!(tracer.streamed_count, 2, "third step not counted"); + + let bytes = buf.lock().unwrap(); + let output = std::str::from_utf8(&bytes).expect("UTF-8"); + let lines: Vec<&str> = output.lines().collect(); + assert_eq!(lines.len(), 2, "only 2 lines emitted"); +} + +/// 2.7d β€” write failure: stream set to None, error stored, subsequent calls are no-ops. +#[test] +fn test_2_7d_write_failure() { + let mut tracer = LevmOpcodeTracer::streaming(default_cfg(), Box::new(FailingSink)); + + tracer.pre_step_capture(0, 0x60, 1000, 1, 0, &[], &[], 0, &Bytes::new(), None); + // finalize_step will attempt to write and fail on the first step. + tracer.finalize_step(3, None); + + assert!( + tracer.stream.is_none(), + "stream must be cleared after write error" + ); + assert_eq!( + tracer.streamed_count, 0, + "no entry should be counted as streamed when the first write fails" + ); + + // Take the error and confirm it's a single-shot accessor. + let err = tracer.take_stream_error(); + assert!(err.is_some(), "error must be stored"); + assert!( + tracer.take_stream_error().is_none(), + "error cleared after take" + ); + + // After failure, pre_step_capture must NOT keep accumulating into `logs` + // (otherwise a streaming tracer silently degrades into RPC-mode behavior). + // The post-`take` state still has stream=None, but stream_error was just + // taken β€” re-arm the failure marker by triggering another failed flush. + // Since the sink is gone, we simulate by directly verifying the early-out + // path: push a real step on a fresh streaming tracer that fails, then + // assert that AFTER failure `pre_step_capture` is a no-op. + let mut tracer2 = LevmOpcodeTracer::streaming(default_cfg(), Box::new(FailingSink)); + tracer2.pre_step_capture(0, 0x60, 1000, 1, 0, &[], &[], 0, &Bytes::new(), None); + tracer2.finalize_step(3, None); + assert!(tracer2.stream_error.is_some()); + // logs was truncated on flush; next pre_step_capture must not re-grow it. + tracer2.pre_step_capture(2, 0x01, 997, 1, 0, &[], &[], 0, &Bytes::new(), None); + assert!( + tracer2.logs.is_empty(), + "pre_step_capture must be a no-op once a stream failure has occurred" + ); + assert!( + tracer2.last_step_index.is_none(), + "last_step_index must be cleared after stream failure" + ); +} + +/// 2.7d-bis β€” `synthesize_step` honors the cap across streamed entries. +/// +/// Regression for a missed `streamed_count` check: in streaming mode `logs` is +/// emptied after every `finalize_step`, so a `logs.len()`-only cap would never +/// fire and synthetic steps would keep leaking past the limit. +#[test] +fn test_2_7d_bis_synthesize_step_cap() { + let (buf, sink) = make_buf(); + let cfg = OpcodeTracerConfig { + limit: 2, + ..Default::default() + }; + let mut tracer = LevmOpcodeTracer::streaming(cfg, Box::new(sink)); + + // Stream two real steps to hit the cap. + tracer.pre_step_capture(0, 0x60, 1000, 1, 0, &[], &[], 0, &Bytes::new(), None); + tracer.finalize_step(3, None); + tracer.pre_step_capture(2, 0x60, 997, 1, 0, &[], &[], 0, &Bytes::new(), None); + tracer.finalize_step(3, None); + assert_eq!(tracer.streamed_count, 2); + + // Synthesize one more β€” must be rejected by the cap. + let synth = OpcodeStep { + pc: 4, + op: 0x5b, + gas: 994, + gas_cost: 1, + mem_size: 0, + depth: 1, + return_data: Bytes::new(), + refund: 0, + stack: Some(vec![]), + memory: None, + storage: None, + error: None, + }; + tracer.synthesize_step(synth); + assert!( + tracer.logs.is_empty(), + "synthetic step must be dropped once the cap is reached" + ); + + let bytes = buf.lock().unwrap(); + assert_eq!( + std::str::from_utf8(&bytes).unwrap().lines().count(), + 2, + "only 2 lines emitted; synthetic step did not slip past the cap" + ); +} + +/// 2.7e β€” flush_summary appends the summary line after step lines. +#[test] +fn test_2_7e_flush_summary() { + let (buf, sink) = make_buf(); + let mut tracer = LevmOpcodeTracer::streaming(default_cfg(), Box::new(sink)); + + // Stream two steps. + tracer.pre_step_capture(0, 0x60, 1000, 1, 0, &[], &[], 0, &Bytes::new(), None); + tracer.finalize_step(3, None); + tracer.pre_step_capture(2, 0x60, 997, 1, 0, &[], &[], 0, &Bytes::new(), None); + tracer.finalize_step(3, None); + assert_eq!(tracer.streamed_count, 2); + + tracer + .flush_summary(&[0xde, 0xad], 42, None) + .expect("flush_summary must succeed"); + + let bytes = buf.lock().unwrap(); + let output = std::str::from_utf8(&bytes).expect("UTF-8"); + let lines: Vec<&str> = output.lines().collect(); + // 2 step lines + 1 summary line. + assert_eq!(lines.len(), 3); + assert_eq!( + lines[2], r#"{"output":"dead","gasUsed":"0x2a"}"#, + "summary line must match expected shape" + ); +} + +/// 2.7f β€” flush_state_root appends `{"stateRoot": "0x..."}` after summary. +#[test] +fn test_2_7f_flush_state_root() { + let (buf, sink) = make_buf(); + let mut tracer = LevmOpcodeTracer::streaming(default_cfg(), Box::new(sink)); + + // One step. + tracer.pre_step_capture(0, 0x00, 1000, 1, 0, &[], &[], 0, &Bytes::new(), None); + tracer.finalize_step(0, None); + + tracer + .flush_summary(&[], 0, None) + .expect("flush_summary must succeed"); + tracer + .flush_state_root(H256::zero()) + .expect("flush_state_root must succeed"); + + let bytes = buf.lock().unwrap(); + let output = std::str::from_utf8(&bytes).expect("UTF-8"); + let lines: Vec<&str> = output.lines().collect(); + // 1 step + summary + stateRoot. + assert_eq!(lines.len(), 3); + + let last = lines[2]; + assert_eq!( + last, + r#"{"stateRoot": "0x0000000000000000000000000000000000000000000000000000000000000000"}"#, + "stateRoot line must have colon-space and full zero hash" + ); +} + +/// 2.8 β€” disabled / RPC mode unchanged: logs accumulate, no sink. +#[test] +fn test_2_8_rpc_mode_unchanged() { + let mut tracer = LevmOpcodeTracer::new(default_cfg()); + + tracer.pre_step_capture(0, 0x60, 1000, 1, 0, &[], &[], 0, &Bytes::new(), None); + tracer.finalize_step(3, None); + + assert_eq!(tracer.logs.len(), 1, "RPC mode: step buffered in logs"); + assert!(tracer.stream.is_none(), "RPC mode: no sink"); + assert_eq!(tracer.streamed_count, 0, "RPC mode: streamed_count stays 0"); +} diff --git a/test/tests/levm/opcode_tracer_tests.rs b/test/tests/levm/opcode_tracer_tests.rs new file mode 100644 index 00000000000..31661784d1f --- /dev/null +++ b/test/tests/levm/opcode_tracer_tests.rs @@ -0,0 +1,285 @@ +//! End-to-end tests for the EIP-3155 `opcodeTracer`. +//! +//! Each test deploys a small bytecode through the full RPC pipeline +//! (`LEVM::trace_tx_opcodes` -> `serde_json::to_value`) and asserts on the +//! resulting JSON shape. Behaviour is verified at the wire-format boundary, +//! not on internal Rust types. Per-step content is EIP-3155 (`pc`, `op`, +//! `gas`, `gasCost`, `stack`, `depth`, `memory`, `storage`, `refund`, +//! `memSize`, `returnData`), emitted under the de-facto cross-client +//! `structLogger` wrapper. + +use super::test_db::TestDatabase; +use bytes::Bytes; +use ethrex_common::{ + Address, U256, + types::{Account, BlockHeader, Code, EIP1559Transaction, Transaction, TxKind}, +}; +use ethrex_crypto::NativeCrypto; +use ethrex_levm::db::gen_db::GeneralizedDatabase; +use ethrex_levm::tracing::OpcodeTracerConfig; +use ethrex_levm::vm::VMType; +use ethrex_vm::backends::levm::LEVM; +use once_cell::sync::OnceCell; +use rustc_hash::FxHashMap; +use serde_json::Value; +use std::sync::Arc; + +// ── Helpers ────────────────────────────────────────────────────────────────── + +fn default_header() -> BlockHeader { + BlockHeader { + coinbase: Address::from_low_u64_be(0xCCC), + base_fee_per_gas: Some(1), + gas_limit: 30_000_000, + ..Default::default() + } +} + +fn make_tx(contract: Address, sender: Address) -> Transaction { + Transaction::EIP1559Transaction(EIP1559Transaction { + chain_id: 1, + nonce: 0, + max_priority_fee_per_gas: 1, + max_fee_per_gas: 10, + gas_limit: 100_000, + to: TxKind::Call(contract), + value: U256::zero(), + data: Bytes::new(), + access_list: vec![], + signature_y_parity: false, + signature_r: U256::one(), + signature_s: U256::one(), + inner_hash: OnceCell::new(), + sender_cache: { + let cell = OnceCell::new(); + let _ = cell.set(sender); + cell + }, + cached_canonical: OnceCell::new(), + }) +} + +/// Runs `bytecode` under a contract account with `cfg` and returns the +/// serialized `OpcodeTraceResult` as a `serde_json::Value`. +fn trace_to_json(bytecode: Vec, cfg: OpcodeTracerConfig) -> Value { + let contract_addr = Address::from_low_u64_be(0xC000); + let sender_addr = Address::from_low_u64_be(0x1000); + + let mut accounts = FxHashMap::default(); + accounts.insert( + contract_addr, + Account::new( + U256::zero(), + Code::from_bytecode(Bytes::from(bytecode), &NativeCrypto), + 1, + FxHashMap::default(), + ), + ); + accounts.insert( + sender_addr, + Account::new( + U256::from(10u64) * U256::from(10u64).pow(U256::from(18)), + Code::default(), + 0, + FxHashMap::default(), + ), + ); + + let mut db = GeneralizedDatabase::new(Arc::new(TestDatabase { accounts })); + let header = default_header(); + let tx = make_tx(contract_addr, sender_addr); + + let result = LEVM::trace_tx_opcodes(&mut db, &header, &tx, cfg, VMType::L1, &NativeCrypto) + .expect("trace should succeed"); + serde_json::to_value(&result).expect("serialize") +} + +// ── Tests ──────────────────────────────────────────────────────────────────── + +/// `PUSH1 0x01 PUSH1 0x02 ADD STOP` +/// +/// Pins the wrapper (`failed`/`gas`/`returnValue`/`structLogs`) and per-step +/// fields: `op` string mnemonic, numeric `gas`/`gasCost`/`refund`, decimal +/// `pc`/`memSize`/`depth`, bottom-first `stack`, always-present `returnData`. +#[test] +fn opcode_tracer_basic_execution() { + let bytecode = vec![0x60, 0x01, 0x60, 0x02, 0x01, 0x00]; + let j = trace_to_json(bytecode, OpcodeTracerConfig::default()); + + assert_eq!(j["failed"], Value::Bool(false)); + assert!(j["gas"].is_number(), "gas is a number"); + assert_eq!(j["returnValue"], Value::String("0x".to_string())); + + let steps = j["structLogs"].as_array().expect("structLogs is array"); + assert_eq!(steps.len(), 4, "PUSH1 PUSH1 ADD STOP"); + + // PUSH1 0x01 β€” first step, empty stack pre-execution. + assert_eq!(steps[0]["pc"], Value::Number(0.into())); + assert_eq!(steps[0]["op"].as_str(), Some("PUSH1")); + assert!(steps[0]["gas"].is_number(), "gas is a number"); + assert_eq!(steps[0]["gasCost"].as_u64(), Some(3)); + assert_eq!(steps[0]["depth"].as_u64(), Some(1)); + assert_eq!(steps[0]["refund"].as_u64(), Some(0)); + assert_eq!(steps[0]["returnData"].as_str(), Some("0x")); + assert_eq!(steps[0]["memSize"].as_u64(), Some(0)); + assert_eq!(steps[0]["stack"], Value::Array(vec![])); + assert!(steps[0].get("opName").is_none(), "opName field is removed"); + + // ADD β€” third step, stack bottom-first [0x1, 0x2] pre-execution. + assert_eq!(steps[2]["op"].as_str(), Some("ADD")); + let add_stack = steps[2]["stack"].as_array().expect("stack array"); + assert_eq!(add_stack[0], Value::String("0x1".to_string())); + assert_eq!(add_stack[1], Value::String("0x2".to_string())); + + // STOP β€” final step, stack collapsed to [0x3]. + assert_eq!(steps[3]["op"].as_str(), Some("STOP")); + let stop_stack = steps[3]["stack"].as_array().expect("stack array"); + assert_eq!(stop_stack, &vec![Value::String("0x3".to_string())]); +} + +/// `PUSH1 0x2a PUSH1 0x01 SSTORE STOP` +/// +/// SSTORE step's `storage` map must be a **single-entry** object (no +/// accumulation across the transaction). Non-SLOAD/SSTORE steps omit the +/// field entirely. +#[test] +fn opcode_tracer_sstore_single_entry_storage() { + let bytecode = vec![0x60, 0x2a, 0x60, 0x01, 0x55, 0x00]; + let j = trace_to_json(bytecode, OpcodeTracerConfig::default()); + let steps = j["structLogs"].as_array().expect("structLogs"); + assert_eq!(steps.len(), 4); + + // PUSH1 / PUSH1 β€” no storage field. + assert!(steps[0].get("storage").is_none()); + assert!(steps[1].get("storage").is_none()); + + // SSTORE β€” exactly one entry, key=0x01, value=0x2a. + let sstore = &steps[2]; + assert_eq!(sstore["op"].as_str(), Some("SSTORE")); + let storage = sstore["storage"].as_object().expect("storage object"); + assert_eq!(storage.len(), 1, "single entry, no accumulation"); + let key = format!("0x{:0>64}", "1"); + let val = format!("0x{:0>64}", "2a"); + assert_eq!( + storage.get(&key).and_then(Value::as_str), + Some(val.as_str()) + ); + + // STOP β€” no storage field. + assert!(steps[3].get("storage").is_none()); +} + +/// `PUSH1 0x20 PUSH1 0x00 MSTORE STOP` with `enableMemory=true` +/// +/// Memory grows by one 32-byte word after MSTORE. The STOP step (captured +/// after MSTORE executes) carries `memory: ["0x000...0020"]` and `memSize: 32`. +#[test] +fn opcode_tracer_memory_capture_when_enabled() { + let bytecode = vec![0x60, 0x20, 0x60, 0x00, 0x52, 0x00]; + let cfg = OpcodeTracerConfig { + enable_memory: true, + ..Default::default() + }; + let j = trace_to_json(bytecode, cfg); + let steps = j["structLogs"].as_array().expect("structLogs"); + + let stop = steps.last().expect("at least one step"); + assert_eq!(stop["op"].as_str(), Some("STOP")); + assert_eq!(stop["memSize"].as_u64(), Some(32)); + let mem = stop["memory"].as_array().expect("memory array"); + assert_eq!(mem.len(), 1); + let expected = format!("0x{:0>64}", "20"); + assert_eq!(mem[0].as_str(), Some(expected.as_str())); +} + +/// `MSTORE8 + STATICCALL 0x04 (identity) + STOP` with `enableReturnData=true` +/// +/// Identity precompile echoes its input. After STATICCALL returns, the +/// subsequent STOP step surfaces `returnData: "0x01"`. +#[test] +fn opcode_tracer_return_data_capture_when_enabled() { + let bytecode = vec![ + 0x60, 0x01, 0x60, 0x00, 0x53, // PUSH1 0x01 PUSH1 0x00 MSTORE8 + 0x60, 0x01, 0x60, 0x00, // retLen=1 retOff=0 + 0x60, 0x01, 0x60, 0x00, // argsLen=1 argsOff=0 + 0x60, 0x04, // identity precompile addr + 0x5a, 0xfa, // GAS STATICCALL + 0x00, // STOP + ]; + let cfg = OpcodeTracerConfig { + enable_return_data: true, + ..Default::default() + }; + let j = trace_to_json(bytecode, cfg); + let steps = j["structLogs"].as_array().expect("structLogs"); + + let stop = steps.last().expect("at least one step"); + assert_eq!(stop["op"].as_str(), Some("STOP")); + assert_eq!(stop["returnData"].as_str(), Some("0x01")); +} + +/// `PUSH1 0x01 PUSH1 0x02 ADD STOP` with `disableStack=true` +/// +/// When stack capture is off, the field is JSON `null` β€” neither omitted nor an +/// empty array. The field is always present; its value signals "disabled". +#[test] +fn opcode_tracer_stack_disabled_is_null() { + let bytecode = vec![0x60, 0x01, 0x60, 0x02, 0x01, 0x00]; + let cfg = OpcodeTracerConfig { + disable_stack: true, + ..Default::default() + }; + let j = trace_to_json(bytecode, cfg); + let steps = j["structLogs"].as_array().expect("structLogs"); + + for step in steps { + assert_eq!( + step["stack"], + Value::Null, + "stack must serialize as JSON null when disabled" + ); + } +} + +/// `PUSH1 0x04 JUMP JUMPDEST STOP` +/// +/// Verifies the fused JUMP + JUMPDEST optimization synthesizes a JUMPDEST trace +/// entry: the JUMP step's `gasCost` is exactly 8 (not 9, which would include +/// the absorbed JUMPDEST charge), and a JUMPDEST step follows it with +/// `gasCost = 1`. +#[test] +fn opcode_tracer_jumpdest_synthesized_after_jump() { + // pc=0: PUSH1 0x04 + // pc=2: JUMP + // pc=3: INVALID (padding, never executed) + // pc=4: JUMPDEST + // pc=5: STOP + let bytecode = vec![0x60, 0x04, 0x56, 0xfe, 0x5b, 0x00]; + let j = trace_to_json(bytecode, OpcodeTracerConfig::default()); + let steps = j["structLogs"].as_array().expect("structLogs"); + + assert_eq!(steps.len(), 4, "PUSH1 / JUMP / JUMPDEST / STOP"); + + assert_eq!(steps[0]["op"].as_str(), Some("PUSH1")); + + assert_eq!(steps[1]["op"].as_str(), Some("JUMP")); + assert_eq!( + steps[1]["gasCost"].as_u64(), + Some(8), + "JUMP gasCost must not absorb the JUMPDEST charge" + ); + + assert_eq!(steps[2]["op"].as_str(), Some("JUMPDEST")); + assert_eq!(steps[2]["pc"].as_u64(), Some(4)); + assert_eq!(steps[2]["gasCost"].as_u64(), Some(1)); + assert_eq!(steps[2]["depth"].as_u64(), Some(1)); + // Gas remaining at JUMPDEST = gas at JUMP minus JUMP's 8. + let jump_gas = steps[1]["gas"].as_u64().expect("JUMP gas"); + let jumpdest_gas = steps[2]["gas"].as_u64().expect("JUMPDEST gas"); + assert_eq!(jumpdest_gas, jump_gas - 8); + + assert_eq!(steps[3]["op"].as_str(), Some("STOP")); + // STOP gas reflects the JUMPDEST charge having been consumed. + let stop_gas = steps[3]["gas"].as_u64().expect("STOP gas"); + assert_eq!(stop_gas, jumpdest_gas - 1); +}