From 4ba46ee89aec311f35040e0d8f4c54b4f4b6c077 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Fri, 26 Jun 2026 01:32:39 +0200 Subject: [PATCH 1/4] feat(chain): immediate RPC failover for reads and writes (multi-RPC) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On any retryable RPC error (429/5xx/network), multi-RPC adapters now fail over to the next configured endpoint IMMEDIATELY instead of burning ~7.5s of same-endpoint retry. Reads and writes both. Single-RPC is unchanged (per-endpoint retries=5, #894). - Multi-RPC: per-endpoint FetchRequest retries=0 and the ethers FallbackProvider REMOVED. All reads route through explicit readWithFailover / contractReadWithFailover loops over the bare this.providers[]. (The FallbackProvider's quorum:1 only advances on a 4s STALL, not a fast error, so retries=0 alone would have broken read failover — the explicit loop is what makes immediate failover real.) - Writes: a bounded set-retry inside sendSignedTransactionAndWait re-broadcasts the SAME signed tx across endpoints (signer-free seam, so re-sign is structurally impossible; WAL onBroadcast fires once upstream; the per-wallet KeyedSerializer lock is held across all passes). - OBS-1: V10 populate/sign fails over via populateAndSignAcrossProviders with a SHARED forcedReapprove latch (#888) — exactly one forced approve per publish across every endpoint, zero new on-chain txs from failover. - Per-attempt stall caps: 4s for point reads; RPC_LOG_SCAN_TIMEOUT_MS=30s for wide getLogs/queryFilter scans via a multiAttemptTimeoutMs opt that caps multi-RPC attempts ONLY — single-RPC stays uncapped so a cold-start backfill getLogs can't permanently stall the publisher poller (#894). - Process-wide host-only failover logging extended to reads. Tx-safety: 9 invariants + 3 OBS-1 guards verified on the real dispatch seam (no re-sign, WAL once, single shared approve latch). Chain suite 701 pass / 1 skip (S3 circuit-breaker staged as a fast-follow). Full runtime build green. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_012DGBSrmN6Y8Vsgb7x46amp --- packages/chain/src/evm-adapter-ack-sign.ts | 27 +- packages/chain/src/evm-adapter-base.ts | 506 +++++++++++++----- packages/chain/src/evm-adapter-constants.ts | 32 ++ .../chain/src/evm-adapter-context-graph.ts | 38 +- packages/chain/src/evm-adapter-conviction.ts | 40 +- packages/chain/src/evm-adapter-events.ts | 59 +- packages/chain/src/evm-adapter-identity.ts | 16 +- packages/chain/src/evm-adapter-publish.ts | 76 ++- .../chain/src/evm-adapter-random-sampling.ts | 16 +- packages/chain/src/evm-adapter-rpc.ts | 32 +- .../chain/src/evm-adapter-storage-reads.ts | 24 +- packages/chain/src/rpc-failover-log.ts | 14 +- .../chain/test/bounded-retry-fetch.test.ts | 103 ++++ packages/chain/test/evm-adapter.unit.test.ts | 127 ++++- .../test/getmaxkanumberforauthor.unit.test.ts | 69 ++- packages/chain/test/loopback-rpc-harness.ts | 102 ++++ .../chain/test/mock-adapter-parity.test.ts | 9 + .../test/multi-rpc-provider-shape.test.ts | 26 +- .../test/multi-rpc-read-failover.test.ts | 202 +++++++ .../test/multi-rpc-write-failover.test.ts | 129 +++++ .../test/readwithfailover-loop.unit.test.ts | 216 ++++++++ .../v10-update-ack-digest-parity.unit.test.ts | 11 +- .../write-tx-safety-invariants.unit.test.ts | 311 +++++++++++ 23 files changed, 1949 insertions(+), 236 deletions(-) create mode 100644 packages/chain/test/bounded-retry-fetch.test.ts create mode 100644 packages/chain/test/loopback-rpc-harness.ts create mode 100644 packages/chain/test/multi-rpc-read-failover.test.ts create mode 100644 packages/chain/test/multi-rpc-write-failover.test.ts create mode 100644 packages/chain/test/readwithfailover-loop.unit.test.ts create mode 100644 packages/chain/test/write-tx-safety-invariants.unit.test.ts diff --git a/packages/chain/src/evm-adapter-ack-sign.ts b/packages/chain/src/evm-adapter-ack-sign.ts index 5fc46c26be..9702252b5b 100644 --- a/packages/chain/src/evm-adapter-ack-sign.ts +++ b/packages/chain/src/evm-adapter-ack-sign.ts @@ -37,7 +37,11 @@ export class AckSignMethods extends EVMChainAdapterBase { 'Verify cannot enforce ACK quorum without a real chain read — fix the adapter wiring or pass an explicit override.', ); } - const value = Number(await this.contracts.parametersStorage.minimumRequiredSignatures()); + const value = Number(await this.contractReadWithFailover( + 'parametersStorage.minimumRequiredSignatures', + this.contracts.parametersStorage, + (c) => c.minimumRequiredSignatures(), + )); this.cachedMinRequiredSignatures = { value, cachedAt: now }; return value; } @@ -52,7 +56,9 @@ export class AckSignMethods extends EVMChainAdapterBase { 'Verify path cannot enforce sharding-table eligibility without it.', ); } - return Boolean(await storage.nodeExists(identityId)); + return Boolean(await this.contractReadWithFailover( + 'shardingTableStorage.nodeExists', storage, (c) => c.nodeExists(identityId), + )); } /** @@ -80,16 +86,18 @@ export class AckSignMethods extends EVMChainAdapterBase { if (!identityStorage) return { valid: false, reason: 'rpc-error' }; const keyHash = ethers.keccak256(ethers.solidityPacked(['address'], [recoveredAddress])); - const hasPurpose: boolean = await identityStorage.keyHasPurpose( - claimedIdentityId, - keyHash, - OPERATIONAL_KEY_PURPOSE, + const hasPurpose: boolean = await this.contractReadWithFailover( + 'identityStorage.keyHasPurpose', identityStorage, + (c) => c.keyHasPurpose(claimedIdentityId, keyHash, OPERATIONAL_KEY_PURPOSE), ); if (!hasPurpose) return { valid: false, reason: 'key-not-registered' }; const shardingTableStorage = await this.resolveContract('ShardingTableStorage'); if (!shardingTableStorage) return { valid: false, reason: 'rpc-error' }; - const inST: boolean = Boolean(await shardingTableStorage.nodeExists(claimedIdentityId)); + const inST: boolean = Boolean(await this.contractReadWithFailover( + 'shardingTableStorage.nodeExists', shardingTableStorage, + (c) => c.nodeExists(claimedIdentityId), + )); if (!inST) return { valid: false, reason: 'not-in-sharding-table' }; return { valid: true }; } catch { @@ -122,7 +130,10 @@ export class AckSignMethods extends EVMChainAdapterBase { if (!identityStorage) return false; const keyHash = ethers.keccak256(ethers.solidityPacked(['address'], [recoveredAddress])); - return identityStorage.keyHasPurpose(claimedIdentityId, keyHash, OPERATIONAL_KEY_PURPOSE); + return this.contractReadWithFailover( + 'identityStorage.keyHasPurpose', identityStorage, + (c) => c.keyHasPurpose(claimedIdentityId, keyHash, OPERATIONAL_KEY_PURPOSE), + ); } async signACKDigest(digest: Uint8Array): Promise<{ r: Uint8Array; vs: Uint8Array } | undefined> { diff --git a/packages/chain/src/evm-adapter-base.ts b/packages/chain/src/evm-adapter-base.ts index 54b0809d83..af6655464c 100644 --- a/packages/chain/src/evm-adapter-base.ts +++ b/packages/chain/src/evm-adapter-base.ts @@ -11,7 +11,7 @@ * the external public API is unchanged. */ -import { JsonRpcProvider, FallbackProvider, Wallet, Contract, ethers } from 'ethers'; +import { JsonRpcProvider, Wallet, Contract, ethers } from 'ethers'; import { createFilterErrorSilencer, installFilterNotFoundConsoleSuppressor, formatProviderError } from './filter-error-silencer.js'; import type { FilterErrorSilencer } from './filter-error-silencer.js'; import { DEFAULT_APPROVAL_POLICY } from './chain-adapter.js'; @@ -27,7 +27,7 @@ import { ChainRpcTransportError, createRpcTimeoutError } from './chain-rpc-trans import { computeApprovalAction, effectivePublishAllowance, V10_PUBLISH_ONCHAIN_MIN_ALLOWANCE } from './evm-adapter-allowance.js'; import { formatProviderContext } from './evm-adapter-types.js'; import type { ContractCache, EVMAdapterConfig } from './evm-adapter-types.js'; -import { RPC_READ_STALL_TIMEOUT_MS, DEFAULT_RANDOM_SAMPLING_HUB_REFRESH_MS, RPC_BROADCAST_ATTEMPT_TIMEOUT_MS, RPC_RECEIPT_ATTEMPT_TIMEOUT_MS, RPC_RECEIPT_TIMEOUT_MS, RPC_RECEIPT_POLL_INTERVAL_MS, RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS, ADMIN_KEY_PURPOSE, OPERATIONAL_KEY_PURPOSE, PUBLISHER_FUNDING_CACHE_TTL_MS } from './evm-adapter-constants.js'; +import { RPC_READ_STALL_TIMEOUT_MS, DEFAULT_RANDOM_SAMPLING_HUB_REFRESH_MS, RPC_BROADCAST_ATTEMPT_TIMEOUT_MS, RPC_RECEIPT_ATTEMPT_TIMEOUT_MS, RPC_RECEIPT_TIMEOUT_MS, RPC_RECEIPT_POLL_INTERVAL_MS, RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS, RPC_ENDPOINT_SET_RETRIES, RPC_ENDPOINT_SET_RETRY_BACKOFF_MS, ADMIN_KEY_PURPOSE, OPERATIONAL_KEY_PURPOSE, PUBLISHER_FUNDING_CACHE_TTL_MS } from './evm-adapter-constants.js'; /** * Maps a Hub-registered contract name to the function that invalidates @@ -317,7 +317,16 @@ export class EVMChainAdapterBase { readonly chainId: string; - protected readonly provider: JsonRpcProvider | FallbackProvider; + /** + * The bare primary RPC provider (== `primaryProvider`). The nominal runner + * that signers, boot-bound contract handles, and the Hub-rotation event + * subscription bind to — NOT the read-failover surface. Every read reconnects + * to a per-endpoint provider via `readWithFailover`; the `FallbackProvider` + * was removed (see the constructor). Kept as a distinct field name for the + * binding sites; reads must never call `this.provider.()` directly + * (route through `readWithFailover`). + */ + protected readonly provider: JsonRpcProvider; protected readonly primaryProvider: JsonRpcProvider; @@ -597,26 +606,37 @@ export class EVMChainAdapterBase { // (gossip-publish-handler / finalization-handler `verifyOnChain`). Batching // is a transport optimisation only — disabling it is semantically inert and // does not change the number of `eth_getLogs` operations issued. + // Immediate-failover (R1): per-endpoint retries are 0 when ≥2 endpoints are + // configured, so the FIRST retryable failure propagates at once and the + // explicit per-provider failover loops (reads: `readWithFailover`; writes: + // `sendContractTransaction` / broadcast / receipt / the V10 populate loop) + // advance to the next endpoint immediately instead of burning ~7.5s of + // same-endpoint backoff on an endpoint we already know is failing. A + // single-RPC node keeps the bounded `RPC_REQUEST_MAX_RETRIES` retry (its + // only resilience; #894) via the default. See `boundedRetryFetchRequest`. + const perEndpointRetries = this.rpcUrls.length > 1 ? 0 : undefined; this.providers = this.rpcUrls.map( - (url) => new JsonRpcProvider(boundedRetryFetchRequest(url), undefined, { + (url) => new JsonRpcProvider(boundedRetryFetchRequest(url, perEndpointRetries), undefined, { cacheTimeout: -1, polling: true, batchMaxCount: 1, }), ); this.primaryProvider = this.providers[0]; - this.provider = this.providers.length === 1 - ? this.primaryProvider - : new FallbackProvider( - this.providers.map((provider, index) => ({ - provider, - priority: index + 1, - stallTimeout: RPC_READ_STALL_TIMEOUT_MS, - weight: 1, - })), - undefined, - { quorum: 1 }, - ); + // No `FallbackProvider`: reads route through `readWithFailover` over the bare + // `this.providers[]` for TRUE immediate failover. ethers' quorum:1 + // FallbackProvider threw a fast error straight to the caller WITHOUT + // consulting a backup (it advanced only on a ~4s stall) — empirically + // unreliable read failover even with per-endpoint retries > 0 — and its + // sub-providers shared this same `this.providers[]` array, so the + // multi-RPC `retries=0` above would have disabled its staller-based failover + // anyway. Removing it also eliminates the sticky `_lastFatalError` / + // one-shot `#initialSync` latch. `this.provider` is now just the bare + // primary: the nominal runner that signers, boot-bound contract handles, and + // the Hub-rotation event subscription bind to. Every actual READ reconnects + // to the loop provider (`readWithFailover`) and every WRITE reconnects + // per-endpoint explicitly, so this binding is never the failover surface. + this.provider = this.primaryProvider; const providerContext = formatProviderContext(config); // PR-8: install the filter-not-found silencer. Without this, RPC // nodes that GC filters faster than ethers' polling cadence @@ -792,6 +812,142 @@ export class EVMChainAdapterBase { return null; } + /** + * R1 read-failover primitive. Runs `fn` against the FIRST configured RPC + * provider; on a RETRYABLE error (429 / 5xx / network — `isRetryableRpcError`) + * advances IMMEDIATELY to the next provider, logging each hop host-only via + * `noteRpcFailover`. A NON-retryable error (a real revert / decoded custom + * error / bad argument) throws at once — failing over a deterministic chain + * error would be pointless and could mask it. When every provider is + * exhausted, throw the typed `RPC_ENDPOINTS_EXHAUSTED` transport error so the + * HTTP boundary maps it to a bounded 503 (the same contract the write loops + + * `init()` use). + * + * This MIRRORS the write loops (`sendContractTransaction`, + * `broadcastSignedTransactionWithFailover`) and the in-tree scan failover + * (`queryEventLogsPage`): explicit per-provider iteration over the BARE + * `this.providers[i]`, NOT a `FallbackProvider`. ethers' quorum:1 + * `FallbackProvider` throws a fast error to the caller WITHOUT consulting a + * backup (it advances only on a ~4s stall), so it never delivered reliable + * read failover — which is why reads route through this explicit loop instead. + * + * A per-attempt `withTimeout` makes a HUNG (pending, not erroring) backend + * yield to the next. This is a NEW hard per-attempt deadline — NOT a + * re-creation of the removed `FallbackProvider`'s `stallTimeout`. That stall + * was a PARALLELIZE trigger (it raced a backup against a slow backend and took + * whichever answered first); it NEVER aborted a healthy-but-slow read. Here the + * deadline DOES abort the attempt and fail it over, so it must be sized to the + * read: + * - MULTI-RPC: every attempt is hard-capped — `RPC_READ_STALL_TIMEOUT_MS` + * (4s) by default, which suits a POINT read (a >4s point read IS a hung + * backend). A WIDE read that legitimately runs longer — a topic-filtered + * `eth_getLogs` over a multi-thousand-block window — MUST raise the cap via + * `opts.multiAttemptTimeoutMs` (e.g. `RPC_LOG_SCAN_TIMEOUT_MS`). Otherwise + * the 4s deadline aborts a slow-but-healthy scan, fails it over across every + * endpoint, and throws a spurious `RPC_ENDPOINTS_EXHAUSTED` (for the event + * poller that escapes before the cursor advances → a permanent stall). + * - SINGLE-RPC: NO cap by default — a slow-but-valid read (init() Hub lookups, + * a wide getLogs) is not killed, and `boundedRetryFetchRequest`'s + * in-FetchRequest retry remains the only bound (#894). `multiAttemptTimeoutMs` + * does NOT cap single-RPC (there is nothing to fail over to). + * Callers that want a HARD bound on EVERY attempt INCLUDING single-RPC — e.g. + * fail-open funding reads that must not stall selection — pass + * `opts.attemptTimeoutMs`, which caps ALL attempts. + * Exhaustion still carries the typed code even for one provider. + * + * `fn` receives the active provider so callers route the actual read through + * it: a direct provider read as `p => p.getCode(addr)`, a contract VIEW read + * as `p => contract.connect(p).someView(args)` (a view-only reconnect — the + * returned Contract is bound to the bare provider; `this.contracts.*` is + * untouched). `fn` MUST be a pure read — no signing / broadcast / WAL side + * effect — because it may execute on more than one provider; writes use the + * dedicated write loops, never this. + */ + protected async readWithFailover( + label: string, + fn: (provider: JsonRpcProvider) => Promise, + opts?: { attemptTimeoutMs?: number; multiAttemptTimeoutMs?: number }, + ): Promise { + let lastRetryable: unknown; + for (let i = 0; i < this.providers.length; i += 1) { + const isLast = i === this.providers.length - 1; + // Per-attempt hard deadline (see the method doc — NOT the old FallbackProvider + // stallTimeout, which parallelized a backup rather than aborting a read): + // - MULTI-RPC: cap every attempt — `attemptTimeoutMs` if given, else + // `multiAttemptTimeoutMs` (raised for WIDE log scans so a slow-but-healthy + // getLogs isn't aborted), else the 4s point-read default. + // - SINGLE-RPC: uncapped UNLESS `attemptTimeoutMs` is given (fail-open + // funding reads); `multiAttemptTimeoutMs` never caps single-RPC (#894 — + // nothing to fail over to). + const capMs = opts?.attemptTimeoutMs + ?? (this.providers.length > 1 + ? (opts?.multiAttemptTimeoutMs ?? RPC_READ_STALL_TIMEOUT_MS) + : undefined); + try { + const attempt = fn(this.providers[i]); + return await (capMs == null + ? attempt + : withTimeout(attempt, capMs, `${label} via RPC #${i + 1}`)); + } catch (err) { + if (!isRetryableRpcError(err)) throw err; + lastRetryable = err; + if (!isLast) { + noteRpcFailover(label, this.rpcUrls[i], err, this.rpcUrls[i + 1]); + } + } + } + if (lastRetryable) noteRpcExhaustion(label, this.rpcUrls); + // Single provider → carry the typed code but keep the original message + // byte-identical (there is no second endpoint, so the raw message reads + // cleaner and any message-inspecting caller keeps seeing it). Multiple + // providers → the host-only "all endpoints" aggregate (never full URLs — + // a configured rpcUrl may carry an API key and this message can reach HTTP + // clients via response paths that echo err.message). Mirrors the write + // preparation loop's single-vs-multi message handling. + const message = this.providers.length <= 1 + ? errorMessage(lastRetryable) + : `${label} read failed on all configured RPC endpoints ` + + `(${this.rpcUrls.map(rpcHost).join(', ')}): ${errorMessage(lastRetryable)}`; + throw new ChainRpcTransportError('RPC_ENDPOINTS_EXHAUSTED', message, { + cause: lastRetryable, + rpcUrls: this.rpcUrls, + }); + } + + /** + * Convenience wrapper over `readWithFailover` for a CONTRACT VIEW read: runs + * `fn` against `contract` reconnected to each provider in turn (so the view + * call executes on the bare per-provider runner, with failover), leaving the + * boot-bound `this.contracts.*` handle untouched. The `.connect(p) as Contract` + * cast (ethers' `BaseContract.connect` loses the dynamic-method index + * signature) lives here once instead of at every call site. Same purity + * contract as `readWithFailover` — `fn` MUST be a pure view read, never a + * state-changing send (writes go through the dedicated write loops). + */ + /** + * Reconnect a contract / signer `handle` to `runner` so a per-endpoint + * read or populate executes against that runner (the failover mechanism). + * Every production handle is an ethers `Contract` / `Wallet` with `.connect`; + * if a handle cannot be rebound (no `.connect`), degrade GRACEFULLY to the + * handle AS-IS rather than throwing. In production that path is never taken; + * it only applies to bare test doubles, where it lets the read still execute + * against the stub. + */ + protected withRunner(handle: T, runner: JsonRpcProvider | Wallet): T { + return typeof (handle as { connect?: unknown })?.connect === 'function' + ? (handle as unknown as { connect(r: unknown): T }).connect(runner) + : handle; + } + + protected contractReadWithFailover( + label: string, + contract: Contract, + fn: (c: Contract) => Promise, + opts?: { attemptTimeoutMs?: number; multiAttemptTimeoutMs?: number }, + ): Promise { + return this.readWithFailover(label, (p) => fn(this.withRunner(contract, p)), opts); + } + protected async waitForReceiptWithFailover( txHash: string, label: string, @@ -852,13 +1008,33 @@ export class EVMChainAdapterBase { tokenAmount: bigint, reapproveLabel: string, ): Promise<{ signedTx: string; txHash: string }> { + // OBS-1 + G-OBS1b (LOCKED layering): the V10 populate+sign reads + // (eth_estimateGas, eth_getTransactionCount nonce, fee data) fail over + // endpoint-to-endpoint via the SHARED `populateAndSignAcrossProviders` loop + // (so a 429ing primary can't fail-fast the publish), while the #888 + // stale-allowance recovery stays a strict ONE-SHOT: + // - OUTER (this loop) owns the SINGLE `forcedReapprove` latch + the ONE + // `ensureV10ApproveTrac(force=true)` call. + // - INNER = `populateAndSignAcrossProviders`, iterating the bare providers. + // A `TooLowAllowance` revert is a CALL_EXCEPTION (non-retryable), so the inner + // loop does NOT fail over on it — it propagates straight up HERE. Because the + // latch lives at the OUTER scope (NEVER reset per endpoint), a stale-allowance + // read fires AT MOST ONE forced approve per publish no matter how many + // endpoints the inner loop tried — immediate failover introduces ZERO new + // on-chain txs. Only the ONE returned `{signedTx,txHash}` is broadcast (INV-1); + // the whole thing runs inside the per-wallet `KeyedSerializer` from + // `dispatchSerializedV10Write`, preserving nonce monotonicity (#953) and + // staying strictly pre-broadcast / pre-WAL. let forcedReapprove = false; for (;;) { try { - const populated = await (kaContract as any)[method].populateTransaction( - methodParams, + return await this.populateAndSignAcrossProviders( + kaContract, + method, + [methodParams], + signer, + `V10 ${method}`, ); - return await this.signPopulatedTransaction(signer, populated); } catch (err) { enrichEvmError(err); if (!forcedReapprove && isTooLowAllowanceError(err)) { @@ -875,9 +1051,9 @@ export class EVMChainAdapterBase { reapproveLabel, true, ); - continue; + continue; // re-run the WHOLE inner per-provider populate loop, allowance now in place } - throw err; + throw err; // any other error, or a SECOND TooLowAllowance, propagates } } } @@ -887,7 +1063,34 @@ export class EVMChainAdapterBase { txHash: string, label: string, ): Promise { - await this.broadcastSignedTransactionWithFailover(signedTx, txHash, label); + // S2 bounded set-retry, scoped to the BROADCAST phase ONLY. After a full + // per-endpoint broadcast pass exhausts with a retryable error (e.g. a brief + // window where every endpoint 429s), re-broadcast the SAME + // already-signed/already-WAL-checkpointed tx up to RPC_ENDPOINT_SET_RETRIES + // extra full passes with a short backoff, then surface the error. + // + // tx-safety: this seam is SIGNER-FREE — it only has `{signedTx,txHash}`, so + // re-signing (a new nonce / a second distinct tx) is STRUCTURALLY impossible + // (INV-2). Re-broadcasting the byte-identical signed tx is idempotent + // (`isKnownTransactionError` → treated as accepted), and the WAL `onBroadcast` + // already fired exactly once UPSTREAM in `dispatchSerializedV10Write` before + // this method was entered (INV-3/INV-6). The whole thing runs inside the + // per-wallet KeyedSerializer lock for the V10 path, so the lock is held + // across the retries (no concurrent same-wallet send slips in). We do NOT + // re-broadcast on a slow-to-mine receipt — `waitForReceiptWithFailover` owns + // its own per-poll failover + 180s deadline — so lock-hold stays bounded. + for (let pass = 0; ; pass += 1) { + try { + await this.broadcastSignedTransactionWithFailover(signedTx, txHash, label); + break; + } catch (err) { + if (isRetryableRpcError(err) && pass < RPC_ENDPOINT_SET_RETRIES) { + await sleep(RPC_ENDPOINT_SET_RETRY_BACKOFF_MS); + continue; + } + throw err; + } + } return this.waitForReceiptWithFailover(txHash, label); } @@ -946,31 +1149,39 @@ export class EVMChainAdapterBase { return this.sendSignedTransactionAndWait(signedTx, txHash, label); } - protected async sendContractTransaction( + /** + * Per-endpoint populate+sign loop SHARED by `sendContractTransaction` and the + * V10 publish/update path (`populateAndSignV10WithAllowanceRecovery`). + * Iterates `this.providers[i]`, connects the signer + contract to each, + * populates (gas/nonce/chainId reads, with optional OOG-buffer gas estimation) + * + signs, and returns the FIRST successful `{signedTx,txHash}`. Advances ONLY + * on `isRetryableRpcError`; a NON-retryable error (a decoded revert — e.g. + * `TooLowAllowance`) propagates AT ONCE so the caller can react (the V10 + * allowance-recovery loop depends on TooLowAllowance surfacing HERE, never + * being failed over). Exhaustion across every endpoint → typed + * `RPC_ENDPOINTS_EXHAUSTED` (single provider keeps the original message + * byte-identical; multiple providers get the host-only aggregate). + * + * STRICTLY pre-broadcast: it signs exactly ONCE on the winning provider and + * does NOT broadcast or fire the WAL — the caller broadcasts the single + * returned signed tx via `sendSignedTransactionAndWait`. This seam is what + * keeps the #684/#241 WAL split intact (onBroadcast sits between sign and + * broadcast), so the V10 path reuses THIS helper instead of routing through + * `sendContractTransaction` (which broadcasts internally). + */ + protected async populateAndSignAcrossProviders( contract: Contract, method: string, args: readonly unknown[], signer: Wallet, label: string, - // Optional gas headroom for methods whose on-chain gas cost depends on - // per-block randomness. ethers fills `gasLimit` from a single - // `eth_estimateGas` with NO margin, but that estimate runs against the - // CURRENT block while the tx is mined in a LATER block with different - // `prevrandao`/`blockhash`/`timestamp`. If the mined block's entropy - // drives a more expensive code path than the estimate's, the tx runs - // out of gas and reverts with empty (`0x`) data. `RandomSampling.createChallenge` - // is exactly this case (weighted CG draw + historical blockhash access): - // observed estimate-vs-execution spread is small here but unbounded in - // production with many CGs/KCs. When set, we estimate once and inflate - // the limit by `gasLimitBufferBps` basis points so the drift can't OOG. opts?: { gasLimitBufferBps?: number }, - ): Promise { + ): Promise<{ signedTx: string; txHash: string }> { let lastRetryable: unknown; for (let i = 0; i < this.providers.length; i += 1) { - const rpcSigner = signer.connect(this.providers[i]); - let prepared: { signedTx: string; txHash: string } | undefined; + const rpcSigner = this.withRunner(signer, this.providers[i]); try { - const connected = contract.connect(rpcSigner) as any; + const connected = this.withRunner(contract, rpcSigner) as any; const populated = await withTimeout( connected[method].populateTransaction(...args) as Promise, RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS, @@ -986,20 +1197,15 @@ export class EVMChainAdapterBase { populated.gasLimit = (est * BigInt(10_000 + opts.gasLimitBufferBps)) / 10_000n; } catch (estErr) { // A RETRYABLE estimate failure must not silently drop the OOG - // headroom: if another RPC is left, re-throw so the outer loop - // fails over to it (it may estimate fine and apply the buffer). - // Swallowing here would sign against the failing provider with no - // headroom and could reintroduce the exact OOG this guards - // against (Codex review). Only on the LAST provider — or for a - // non-retryable estimate error, where failover can't help — do we - // fall back to ethers' own unbuffered estimate during signing. + // headroom: if another RPC is left, re-throw so the loop fails over + // to it (it may estimate fine and apply the buffer). Only on the LAST + // provider — or for a non-retryable estimate error, where failover + // can't help — fall back to ethers' own unbuffered estimate during + // signing, leaving a breadcrumb so a recurring OOG isn't a mystery. const hasMoreProviders = i < this.providers.length - 1; if (isRetryableRpcError(estErr) && hasMoreProviders) { throw estErr; } - // Best-effort fallback, but DON'T swallow silently: leave a - // breadcrumb that the headroom was never applied so a recurring - // intermittent OOG isn't a mystery. console.warn( `[chain] ${label}: buffered gas estimation failed; falling back to ` + `ethers' unbuffered estimate (no OOG headroom applied): ` + @@ -1007,7 +1213,7 @@ export class EVMChainAdapterBase { ); } } - prepared = await withTimeout( + return await withTimeout( this.signPopulatedTransaction(rpcSigner, populated), RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS, `${label} transaction signing via RPC #${i + 1}`, @@ -1018,31 +1224,19 @@ export class EVMChainAdapterBase { if (i < this.providers.length - 1) { noteRpcFailover(`${label} preparation`, this.rpcUrls[i], err, this.rpcUrls[i + 1]); } - continue; } - if (!prepared) continue; - return this.sendSignedTransactionAndWait(prepared.signedTx, prepared.txHash, label); } if (lastRetryable) noteRpcExhaustion(`${label} preparation`, this.rpcUrls); - // A retryable error from the only configured RPC is still an "endpoints - // exhausted" condition: downstream classifiers (e.g. - // `/api/context-graph/register` → `classifyRegisterContextGraphError`) - // key the transient-outage 503 off the `RPC_ENDPOINTS_EXHAUSTED` code, so - // the code MUST be present even for a single-provider adapter (Codex - // PR #901). What we must NOT do for one provider is REWRITE the - // `.message` into the multi-endpoint "failed on all endpoints (url1, - // url2): ..." aggregate — there is no second endpoint, so the original - // message (e.g. a plain `connect ECONNREFUSED`) reads cleaner and any - // message-inspecting caller keeps seeing it verbatim. So: single provider - // → carry the code on a new error but keep the message byte-identical; - // multiple providers → the aggregated "all endpoints" message is - // meaningful and is asserted by evm-adapter.unit.test.ts. + // Single provider → carry the code on a new error but keep the message + // byte-identical (no second endpoint, so the raw message reads cleaner and + // any message-inspecting caller keeps seeing it). Multiple providers → the + // HOST-ONLY aggregate (never full URLs — a configured rpcUrl may carry an API + // key and this message reaches HTTP clients via response paths that echo + // err.message, e.g. the create+publish 207 tail). Asserted by + // evm-adapter.unit.test.ts. const message = this.providers.length <= 1 ? errorMessage(lastRetryable) : `${label} transaction preparation failed on all configured RPC endpoints ` + - // HOST-ONLY: a configured rpcUrl may carry an API key and this message - // is surfaced to HTTP clients via response paths that echo err.message - // (e.g. the create+publish 207 tail), so never embed full RPC URLs. `(${this.rpcUrls.map(rpcHost).join(', ')}): ${errorMessage(lastRetryable)}`; throw new ChainRpcTransportError('RPC_ENDPOINTS_EXHAUSTED', message, { cause: lastRetryable, @@ -1050,6 +1244,34 @@ export class EVMChainAdapterBase { }); } + protected async sendContractTransaction( + contract: Contract, + method: string, + args: readonly unknown[], + signer: Wallet, + label: string, + // Optional gas headroom for methods whose on-chain gas cost depends on + // per-block randomness. ethers fills `gasLimit` from a single + // `eth_estimateGas` with NO margin, but that estimate runs against the + // CURRENT block while the tx is mined in a LATER block with different + // `prevrandao`/`blockhash`/`timestamp`. If the mined block's entropy + // drives a more expensive code path than the estimate's, the tx runs + // out of gas and reverts with empty (`0x`) data. `RandomSampling.createChallenge` + // is exactly this case (weighted CG draw + historical blockhash access): + // observed estimate-vs-execution spread is small here but unbounded in + // production with many CGs/KCs. When set, we estimate once and inflate + // the limit by `gasLimitBufferBps` basis points so the drift can't OOG. + opts?: { gasLimitBufferBps?: number }, + ): Promise { + // Populate+sign with per-endpoint failover (shared with the V10 path), then + // broadcast+confirm the single signed tx. Split so `onBroadcast` (the WAL + // checkpoint) can sit between sign and broadcast for the V10 callers. + const { signedTx, txHash } = await this.populateAndSignAcrossProviders( + contract, method, args, signer, label, opts, + ); + return this.sendSignedTransactionAndWait(signedTx, txHash, label); + } + /** * V10 approval gate shared by `publishV10` and `updateV10`. * @@ -1081,9 +1303,10 @@ export class EVMChainAdapterBase { ): Promise { if (!this.contracts.token) return; const tokenWithSigner = this.contracts.token.connect(signer) as Contract; - const currentAllowance: bigint = await tokenWithSigner.allowance( - signer.address, - kav10Address, + const currentAllowance: bigint = await this.contractReadWithFailover( + 'token.allowance', + tokenWithSigner, + (c) => c.allowance(signer.address, kav10Address), ); const { needsApprove, targetAllowance } = computeApprovalAction( this.approvalPolicy, @@ -1168,10 +1391,11 @@ export class EVMChainAdapterBase { // recovery poll indefinitely. `withTimeout` rejects after // `RPC_READ_STALL_TIMEOUT_MS`, which the catch below treats as a // not-yet-visible read and backs off (same as a thrown read error). - current = (await withTimeout( - token.allowance(owner, spender), - RPC_READ_STALL_TIMEOUT_MS, + current = (await this.contractReadWithFailover( 'allowance visibility poll', + token, + (c) => c.allowance(owner, spender), + { attemptTimeoutMs: RPC_READ_STALL_TIMEOUT_MS }, )) as bigint; } catch { // Transient read failure / stall timeout — treat as not-yet-visible @@ -1223,7 +1447,10 @@ export class EVMChainAdapterBase { } else { authorized = []; for (const signer of ordered) { - if (await this.contracts.contextGraphs.isAuthorizedPublisher(contextGraphId, signer.address)) { + if (await this.contractReadWithFailover( + 'contextGraphs.isAuthorizedPublisher', this.contracts.contextGraphs, + (c) => c.isAuthorizedPublisher(contextGraphId, signer.address), + )) { authorized.push(signer); } } @@ -1371,10 +1598,12 @@ export class EVMChainAdapterBase { private async readNativeBalance(address: string): Promise { try { - return await withTimeout( - this.provider.getBalance(address), - RPC_READ_STALL_TIMEOUT_MS, + return await this.readWithFailover( 'publish wallet native balance', + (p) => p.getBalance(address), + // Fail-open funding read: keep a HARD per-attempt cap even on the + // last / single provider so a hung RPC can't stall wallet selection. + { attemptTimeoutMs: RPC_READ_STALL_TIMEOUT_MS }, ); } catch { return null; @@ -1385,10 +1614,9 @@ export class EVMChainAdapterBase { const token = this.contracts.token; if (!token) return null; // no token contract: TRAC does not gate selection try { - return (await withTimeout( - token.balanceOf(address), - RPC_READ_STALL_TIMEOUT_MS, - 'publish wallet TRAC balance', + return (await this.contractReadWithFailover( + 'token.balanceOf', token, (c) => c.balanceOf(address), + { attemptTimeoutMs: RPC_READ_STALL_TIMEOUT_MS }, )) as bigint; } catch { return null; @@ -1467,7 +1695,10 @@ export class EVMChainAdapterBase { // No ContextGraphs surface ⇒ every operational wallet is a candidate // (mirrors nextAuthorizedSigner); otherwise only authorized wallets are // viable reroutes. - if (contextGraphs && !(await contextGraphs.isAuthorizedPublisher(contextGraphId, s.address))) return false; + if (contextGraphs && !(await this.contractReadWithFailover( + 'contextGraphs.isAuthorizedPublisher', contextGraphs, + (c) => c.isAuthorizedPublisher(contextGraphId, s.address), + ))) return false; return this.isWalletPublishFundable(s.address, await this.getWalletFunding(s.address), requiredTracWei); }), ); @@ -1534,10 +1765,9 @@ export class EVMChainAdapterBase { identityId: bigint, address: string, ): Promise { - return identityStorage.keyHasPurpose( - identityId, - this.walletKeyHash(address), - ADMIN_KEY_PURPOSE, + return this.contractReadWithFailover( + 'identityStorage.keyHasPurpose', identityStorage, + (c) => c.keyHasPurpose(identityId, this.walletKeyHash(address), ADMIN_KEY_PURPOSE), ); } @@ -1546,10 +1776,9 @@ export class EVMChainAdapterBase { identityId: bigint, address: string, ): Promise { - return identityStorage.keyHasPurpose( - identityId, - this.walletKeyHash(address), - OPERATIONAL_KEY_PURPOSE, + return this.contractReadWithFailover( + 'identityStorage.keyHasPurpose', identityStorage, + (c) => c.keyHasPurpose(identityId, this.walletKeyHash(address), OPERATIONAL_KEY_PURPOSE), ); } @@ -1562,7 +1791,11 @@ export class EVMChainAdapterBase { protected async resolveContract(name: string, abiName?: string): Promise { let address: string; try { - address = await this.contracts.hub.getContractAddress(name); + address = await this.contractReadWithFailover( + `Hub.getContractAddress(${name})`, + this.contracts.hub, + (c) => c.getContractAddress(name), + ); } catch (err) { if (this.isContractMissingRevert(err)) { throw new Error(`Contract "${name}" not found in Hub at ${this.hubAddress}`, { cause: err }); @@ -1578,7 +1811,11 @@ export class EVMChainAdapterBase { protected async resolveAssetStorage(name: string, abiName?: string): Promise { let address: string; try { - address = await this.contracts.hub.getAssetStorageAddress(name); + address = await this.contractReadWithFailover( + `Hub.getAssetStorageAddress(${name})`, + this.contracts.hub, + (c) => c.getAssetStorageAddress(name), + ); } catch (err) { if (this.isContractMissingRevert(err)) { throw new Error(`Asset storage "${name}" not found in Hub at ${this.hubAddress}`, { cause: err }); @@ -1721,7 +1958,11 @@ export class EVMChainAdapterBase { await this.startHubRotationListener(); - const tokenAddress: string = this.tokenAddress ?? await this.contracts.hub.getContractAddress('Token'); + const tokenAddress: string = this.tokenAddress ?? await this.contractReadWithFailover( + 'Hub.getContractAddress(Token)', + this.contracts.hub, + (c) => c.getContractAddress('Token'), + ); if (tokenAddress !== ethers.ZeroAddress) { this.contracts.token = new Contract( tokenAddress, @@ -1747,7 +1988,7 @@ export class EVMChainAdapterBase { } protected async getBlockTimestamp(blockNumber: number): Promise { - const block = await this.provider.getBlock(blockNumber); + const block = await this.readWithFailover('getBlock', (p) => p.getBlock(blockNumber)); return block?.timestamp ?? 0; } @@ -1758,7 +1999,9 @@ export class EVMChainAdapterBase { async getIdentityId(): Promise { await this.init(); const identityStorage = await this.getIdentityStorage(); - const id: bigint = await identityStorage.getIdentityId(this.signer.address); + const id: bigint = await this.contractReadWithFailover( + 'identityStorage.getIdentityId', identityStorage, (c) => c.getIdentityId(this.signer.address), + ); return id; } @@ -1821,28 +2064,51 @@ export class EVMChainAdapterBase { let bareRevert: unknown; const getMax = (storage as any).getMaxKaNumberForAuthor; if (typeof getMax?.staticCall === 'function') { - try { - const max = await getMax.staticCall(normalized); - return BigInt(max); - } catch (err) { - // Ordering invariant: isKaHighWaterViewUnavailable runs first and calls - // enrichEvmError(err), which only rewrites a message carrying decodable - // `data=0x…` + the literal "unknown custom error" — neither present on a - // bare "missing revert data" (data=null) error — so it leaves the shape - // isKaHighWaterBareRevert keys on untouched. Preserve that if - // enrichEvmError's rewrite rules change. - if (isKaHighWaterViewUnavailable(err)) { - // Unambiguous absent-view shape → fall through to the bounded scan. - } else if (isKaHighWaterBareRevert(err)) { - bareRevert = err; // confirm against the deployed bytecode below - } else { - throw err; // transient RPC / decoded revert → never crawl + // This view read is NOT routed through `readWithFailover` because its + // error shapes are semantically overloaded: a `BAD_DATA`/`CALL_EXCEPTION` + // here means "the deployed contract lacks the selector (pre-10.0.4)", + // which `isRetryableRpcError` would mis-classify as a transient (BAD_DATA + // is generically retryable) and fail over on — turning a deterministic + // absent-view answer into a spurious `RPC_ENDPOINTS_EXHAUSTED`. So we run + // an explicit per-endpoint loop that distinguishes the two: absent-view + // shapes are DETERMINISTIC across endpoints (don't fail over — hand to the + // pre-10.0.4 classification below), and ONLY a genuine transient RPC error + // advances to the next endpoint. + for (let i = 0; i < this.providers.length; i += 1) { + const isLast = i === this.providers.length - 1; + try { + const connectedGetMax = + (this.withRunner(storage, this.providers[i]) as Contract).getMaxKaNumberForAuthor; + const max = await connectedGetMax.staticCall(normalized); + return BigInt(max); + } catch (err) { + // Ordering invariant: isKaHighWaterViewUnavailable runs first and calls + // enrichEvmError(err), which only rewrites a message carrying decodable + // `data=0x…` + the literal "unknown custom error" — neither present on a + // bare "missing revert data" (data=null) error — so it leaves the shape + // isKaHighWaterBareRevert keys on untouched. Preserve that if + // enrichEvmError's rewrite rules change. + if (isKaHighWaterViewUnavailable(err)) { + break; // unambiguous absent-view shape → fall through to the bounded scan + } + if (isKaHighWaterBareRevert(err)) { + bareRevert = err; // confirm against the deployed bytecode below + break; + } + if (isRetryableRpcError(err) && !isLast) { + noteRpcFailover('getMaxKaNumberForAuthor staticCall', this.rpcUrls[i], err, this.rpcUrls[i + 1]); + continue; // genuine transient → fail over to the next endpoint + } + throw err; // decoded revert, or a transient on the last endpoint → never crawl } } } const storageAddress = await contractAddress(storage); - const code = await this.provider.getCode(storageAddress); + const code = await this.readWithFailover( + 'DKGKnowledgeAssets getCode', + (p) => p.getCode(storageAddress), + ); if (!code || code === '0x') { throw new Error(`DKGKnowledgeAssets resolved to ${storageAddress}, but no contract code is deployed there.`); } @@ -2246,7 +2512,7 @@ export class EVMChainAdapterBase { if (EVMChainAdapterBase.preflightCacheFresh(this.cachedChainId, now)) { return this.cachedChainId!.value; } - const network = await this.provider.getNetwork(); + const network = await this.readWithFailover('getNetwork (chainId)', (p) => p.getNetwork()); this.cachedChainId = { value: network.chainId, cachedAt: now }; return network.chainId; } @@ -2263,7 +2529,7 @@ export class EVMChainAdapterBase { */ async hasContractCode(address: string): Promise { try { - const code = await this.provider.getCode(address); + const code = await this.readWithFailover('hasContractCode getCode', (p) => p.getCode(address)); return code !== undefined && code !== null && code !== '0x' && code.length > 2; } catch { return false; @@ -2305,9 +2571,9 @@ export class EVMChainAdapterBase { ); } if (this.contracts.contextGraphs) { - const authorized = await this.contracts.contextGraphs.isAuthorizedPublisher( - params.contextGraphId, - selected.address, + const authorized = await this.contractReadWithFailover( + 'contextGraphs.isAuthorizedPublisher', this.contracts.contextGraphs, + (c) => c.isAuthorizedPublisher(params.contextGraphId, selected.address), ); if (!authorized) { throw new Error( @@ -2590,14 +2856,14 @@ export class EVMChainAdapterBase { } async getBlockNumber(): Promise { - return this.provider.getBlockNumber(); + return this.readWithFailover('getBlockNumber', (p) => p.getBlockNumber()); } getProvider(): JsonRpcProvider { return this.primaryProvider; } - getReadProvider(): JsonRpcProvider | FallbackProvider { + getReadProvider(): JsonRpcProvider { return this.provider; } diff --git a/packages/chain/src/evm-adapter-constants.ts b/packages/chain/src/evm-adapter-constants.ts index aadf3e7b77..4cd9ecf8bf 100644 --- a/packages/chain/src/evm-adapter-constants.ts +++ b/packages/chain/src/evm-adapter-constants.ts @@ -44,6 +44,21 @@ export const MAX_PROBE_AGE_MS = 30_000; export const RPC_READ_STALL_TIMEOUT_MS = 4_000; +/** + * Per-attempt deadline for WIDE `eth_getLogs` reads (the `evm-adapter-events.ts` + * `queryFilter` scans, which run over `[fromBlock ?? 0, toBlock]` — up to the + * event poller's 9,000-block window, and the full chain on a cold-start + * backfill). These legitimately take tens of seconds on a busy/slow chain, so + * the 4s `RPC_READ_STALL_TIMEOUT_MS` point-read cap would abort a healthy scan + * and fail it over across every endpoint → spurious `RPC_ENDPOINTS_EXHAUSTED` + * (which, in the poller, escapes before the cursor advances → a permanent + * stall). 30s still hard-bounds a genuinely hung backend on a multi-RPC node; + * it is passed as `multiAttemptTimeoutMs`, so single-RPC stays uncapped (#894). + * Larger than `KA_HIGH_WATER_PAGE_TIMEOUT_MS` (15s) because that bounds smaller + * 2,000-block pages, whereas this covers the wider 9,000-block poller window. + */ +export const RPC_LOG_SCAN_TIMEOUT_MS = 30_000; + export const RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS = 10_000; export const RPC_BROADCAST_ATTEMPT_TIMEOUT_MS = 10_000; @@ -54,6 +69,23 @@ export const RPC_RECEIPT_POLL_INTERVAL_MS = 2_000; export const RPC_RECEIPT_TIMEOUT_MS = 180_000; +/** + * Bounded "retry the whole endpoint set" for the BROADCAST phase (S2). After a + * full per-endpoint broadcast pass exhausts with a retryable error (e.g. a brief + * window where ALL endpoints 429), `sendSignedTransactionAndWait` re-broadcasts + * the SAME already-signed/already-WAL-checkpointed tx up to this many extra full + * passes, with `RPC_ENDPOINT_SET_RETRY_BACKOFF_MS` between passes, before + * surfacing `RPC_ENDPOINTS_EXHAUSTED`. Default `1` (one extra pass) keeps the + * "ride out a brief all-down blip" property without per-endpoint latency; `0` + * fails fast on the first full-pass exhaustion. Re-broadcasting the identical + * signed tx is idempotent (`isKnownTransactionError`), so this cannot double- + * submit or change the nonce — it is scoped to broadcast ONLY (receipt waiting + * owns its own deadline), so total lock-hold stays bounded. + */ +export const RPC_ENDPOINT_SET_RETRIES = 1; + +export const RPC_ENDPOINT_SET_RETRY_BACKOFF_MS = 500; + export const ADMIN_KEY_PURPOSE = 1; export const OPERATIONAL_KEY_PURPOSE = 2; diff --git a/packages/chain/src/evm-adapter-context-graph.ts b/packages/chain/src/evm-adapter-context-graph.ts index 4c2e3b1a34..f2e20a3493 100644 --- a/packages/chain/src/evm-adapter-context-graph.ts +++ b/packages/chain/src/evm-adapter-context-graph.ts @@ -238,7 +238,9 @@ export class ContextGraphMethods extends EVMChainAdapterBase { async isContextGraphActiveOnChain(contextGraphId: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - return Boolean(await cgs.isContextGraphActive(contextGraphId)); + return Boolean(await this.contractReadWithFailover( + 'cgStorage.isContextGraphActive', cgs, (c) => c.isContextGraphActive(contextGraphId), + )); } async createOnChainContextGraph(params: CreateOnChainContextGraphParams): Promise { @@ -470,7 +472,7 @@ export class ContextGraphMethods extends EVMChainAdapterBase { // Unreachable below (kept for type-completeness until the mirror is removed); // the unsupported-mirror guard above throws before any on-chain side effect. - const v10ChainId = (await this.provider.getNetwork()).chainId; + const v10ChainId = (await this.readWithFailover('getNetwork (chainId)', (p) => p.getNetwork())).chainId; const v10KavAddress = await this.contracts.knowledgeAssetsLifecycle!.getAddress(); const authorTypedData = buildAuthorAttestationTypedData({ chainId: v10ChainId, @@ -514,21 +516,27 @@ export class ContextGraphMethods extends EVMChainAdapterBase { async getKAContextGraphId(kaId: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - const cgId: bigint = await cgs.kaToContextGraph(kaId); + const cgId: bigint = await this.contractReadWithFailover( + 'cgStorage.kaToContextGraph', cgs, (c) => c.kaToContextGraph(kaId), + ); return BigInt(cgId); } async getContextGraphKCCount(contextGraphId: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - const count: bigint = await cgs.getContextGraphKaCount(contextGraphId); + const count: bigint = await this.contractReadWithFailover( + 'cgStorage.getContextGraphKaCount', cgs, (c) => c.getContextGraphKaCount(contextGraphId), + ); return BigInt(count); } async getContextGraphKCAt(contextGraphId: bigint, index: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - const kaId: bigint = await cgs.getContextGraphKaAt(contextGraphId, index); + const kaId: bigint = await this.contractReadWithFailover( + 'cgStorage.getContextGraphKaAt', cgs, (c) => c.getContextGraphKaAt(contextGraphId, index), + ); return BigInt(kaId); } @@ -544,11 +552,15 @@ export class ContextGraphMethods extends EVMChainAdapterBase { await this.init(); const cgs = this.requireContextGraphStorage(); try { - const raw: bigint = BigInt(await cgs.getAccessPolicy(contextGraphId)); + const raw: bigint = BigInt(await this.contractReadWithFailover( + 'cgStorage.getAccessPolicy', cgs, (c) => c.getAccessPolicy(contextGraphId), + )); return Number(raw); } catch (primaryErr) { try { - const cg = await cgs.getContextGraph(contextGraphId); + const cg = await this.contractReadWithFailover( + 'cgStorage.getContextGraph', cgs, (c) => c.getContextGraph(contextGraphId), + ); const raw = cg?.accessPolicy ?? (Array.isArray(cg) ? cg[5] : undefined); @@ -581,7 +593,9 @@ export class ContextGraphMethods extends EVMChainAdapterBase { }> { await this.init(); const cgs = this.requireContextGraphStorage(); - const result = await cgs.getPublishPolicy(contextGraphId); + const result = await this.contractReadWithFailover( + 'cgStorage.getPublishPolicy', cgs, (c) => c.getPublishPolicy(contextGraphId), + ); // Ethers v6 returns named tuple as both array and object access; // destructure positionally to stay robust against ABI naming // changes. @@ -609,7 +623,9 @@ export class ContextGraphMethods extends EVMChainAdapterBase { async getContextGraphParticipantAgents(contextGraphId: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - const raw: string[] = await cgs.getParticipantAgents(contextGraphId); + const raw: string[] = await this.contractReadWithFailover( + 'cgStorage.getParticipantAgents', cgs, (c) => c.getParticipantAgents(contextGraphId), + ); return raw.map((addr: string) => ethers.getAddress(addr)); } @@ -633,7 +649,9 @@ export class ContextGraphMethods extends EVMChainAdapterBase { async getContextGraphNameHash(contextGraphId: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - const raw: string = await cgs.getNameHash(contextGraphId); + const raw: string = await this.contractReadWithFailover( + 'cgStorage.getNameHash', cgs, (c) => c.getNameHash(contextGraphId), + ); if (!raw || raw === ethers.ZeroHash) return null; return raw.toLowerCase(); } diff --git a/packages/chain/src/evm-adapter-conviction.ts b/packages/chain/src/evm-adapter-conviction.ts index c85f41e277..491cb4f083 100644 --- a/packages/chain/src/evm-adapter-conviction.ts +++ b/packages/chain/src/evm-adapter-conviction.ts @@ -39,7 +39,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction if (!this.contracts.dkgPublishingConvictionNFT) return 0n; if (!ethers.isAddress(agent)) return 0n; try { - const id: bigint = await this.contracts.dkgPublishingConvictionNFT.agentToAccountId(agent); + const id: bigint = await this.contractReadWithFailover( + 'pcaNFT.agentToAccountId', this.contracts.dkgPublishingConvictionNFT, (c) => c.agentToAccountId(agent), + ); return BigInt(id); } catch (err: any) { if (err?.code === 'CALL_EXCEPTION') return 0n; @@ -56,7 +58,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction // (committedTRAC, createdAtEpoch, expiresAtEpoch, createdAtTimestamp, // expiresAtTimestamp, lockDurationEpochs, discountBps, // lastSettledWindow, fullySwept). Pull index 5. - const tuple = await this.contracts.dkgPublishingConvictionNFT.accounts(accountId); + const tuple = await this.contractReadWithFailover( + 'pcaNFT.accounts', this.contracts.dkgPublishingConvictionNFT, (c) => c.accounts(accountId), + ); const lock = tuple[5]; return Number(lock); } catch (err: any) { @@ -110,7 +114,7 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction // wall clock first to mirror the contract exactly — otherwise the SDK // would coerce, then fall through to full-price direct spend. if (info.expiresAtTimestamp > 0) { - const latestBlock = await this.provider.getBlock('latest'); + const latestBlock = await this.readWithFailover('conviction getBlock', (p) => p.getBlock('latest')); const nowTs = latestBlock ? Number(latestBlock.timestamp) : Math.floor(Date.now() / 1000); if (nowTs >= info.expiresAtTimestamp) return false; } @@ -124,10 +128,12 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction if (!this.contracts.chronos) { this.contracts.chronos = await this.resolveContract('Chronos'); } - const currentEpoch: bigint = BigInt(await this.contracts.chronos.getCurrentEpoch()); - const remaining: bigint = await this.contracts.dkgPublishingConvictionNFT.getRemainingAllowance( - accountId, - currentEpoch, + const currentEpoch: bigint = BigInt(await this.contractReadWithFailover( + 'chronos.getCurrentEpoch', this.contracts.chronos, (c) => c.getCurrentEpoch(), + )); + const remaining: bigint = await this.contractReadWithFailover( + 'pcaNFT.getRemainingAllowance', this.contracts.dkgPublishingConvictionNFT, + (c) => c.getRemainingAllowance(accountId, currentEpoch), ); return BigInt(remaining) >= discountedCost; } catch (err: any) { @@ -139,7 +145,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction async getPublishingConvictionAccountOwner(accountId: bigint): Promise { await this.init(); const nft = await this.resolveContract('DKGPublishingConvictionNFT'); - const owner = await nft.ownerOf(accountId); + const owner = await this.contractReadWithFailover( + 'pcaNFT.ownerOf', nft, (c) => c.ownerOf(accountId), + ); return ethers.getAddress(owner); } @@ -203,7 +211,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction // createAccount() does transferFrom(msg.sender → stakingStorage, // committedTRAC) — the signer must allow the NFT to pull the TRAC. if (this.contracts.token) { - const allowance: bigint = await this.contracts.token.allowance(this.signer.address, nftAddress); + const allowance: bigint = await this.contractReadWithFailover( + 'token.allowance', this.contracts.token, (c) => c.allowance(this.signer.address, nftAddress), + ); if (allowance < committedTRAC) { await this.sendContractTransaction( this.contracts.token, @@ -259,7 +269,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction // for a genuine account-missing revert so the route can disambiguate. if (!this.contracts.dkgPublishingConvictionNFT) throw new PcaUnavailableError(); try { - const t = await this.contracts.dkgPublishingConvictionNFT.getAccountInfo(accountId); + const t = await this.contractReadWithFailover( + 'pcaNFT.getAccountInfo', this.contracts.dkgPublishingConvictionNFT, (c) => c.getAccountInfo(accountId), + ); return { owner: ethers.getAddress(t[0]), committedTRAC: BigInt(t[1]), @@ -286,7 +298,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction const nft = this.requireConvictionNFT(); const nftAddress = await nft.getAddress(); if (this.contracts.token) { - const allowance: bigint = await this.contracts.token.allowance(this.signer.address, nftAddress); + const allowance: bigint = await this.contractReadWithFailover( + 'token.allowance', this.contracts.token, (c) => c.allowance(this.signer.address, nftAddress), + ); if (allowance < amount) { await this.sendContractTransaction( this.contracts.token, @@ -358,7 +372,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction if (!this.contracts.dkgPublishingConvictionNFT) return false; if (!ethers.isAddress(agent)) return false; try { - return Boolean(await this.contracts.dkgPublishingConvictionNFT.isAgent(accountId, agent)); + return Boolean(await this.contractReadWithFailover( + 'pcaNFT.isAgent', this.contracts.dkgPublishingConvictionNFT, (c) => c.isAgent(accountId, agent), + )); } catch (err: any) { if (err?.code === 'CALL_EXCEPTION') return false; throw err; diff --git a/packages/chain/src/evm-adapter-events.ts b/packages/chain/src/evm-adapter-events.ts index 3691753733..e8139caaec 100644 --- a/packages/chain/src/evm-adapter-events.ts +++ b/packages/chain/src/evm-adapter-events.ts @@ -12,6 +12,14 @@ import { EVMChainAdapterBase } from './evm-adapter-base.js'; import { ethers } from 'ethers'; import type { EventFilter, ChainEvent } from './chain-adapter.js'; +import { RPC_LOG_SCAN_TIMEOUT_MS } from './evm-adapter-constants.js'; + +// Wide `eth_getLogs` reads (over `[fromBlock ?? 0, toBlock]`, up to the poller's +// 9,000-block window / a cold-start full backfill) legitimately exceed the 4s +// point-read cap, so they fail over only after RPC_LOG_SCAN_TIMEOUT_MS on a +// multi-RPC node (single-RPC stays uncapped, #894). Without this a slow-but- +// healthy scan would abort, exhaust every endpoint, and stall the event poller. +const LOG_SCAN_OPTS = { multiAttemptTimeoutMs: RPC_LOG_SCAN_TIMEOUT_MS } as const; export class EventsMethods extends EVMChainAdapterBase { // ===================================================================== @@ -31,7 +39,11 @@ export class EventsMethods extends EVMChainAdapterBase { continue; } const eventFilter = storage.filters.KnowledgeBatchCreated(); - const logs = await storage.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.contractReadWithFailover( + 'kasV9.queryFilter(KnowledgeBatchCreated)', storage, + (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), + LOG_SCAN_OPTS, + ); for (const log of logs) { const parsed = storage.interface.parseLog({ topics: [...log.topics], data: log.data }); @@ -58,7 +70,11 @@ export class EventsMethods extends EVMChainAdapterBase { const cgStorage = this.contracts.contextGraphStorage; if (cgStorage) { const eventFilter = cgStorage.filters.ContextGraphExpanded(); - const logs = await cgStorage.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.contractReadWithFailover( + 'cgStorage.queryFilter(ContextGraphExpanded)', cgStorage, + (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), + LOG_SCAN_OPTS, + ); for (const log of logs) { const parsed = cgStorage.interface.parseLog({ topics: [...log.topics], data: log.data }); @@ -86,7 +102,11 @@ export class EventsMethods extends EVMChainAdapterBase { const cgStorage = this.contracts.contextGraphStorage; if (cgStorage) { const eventFilter = cgStorage.filters.KnowledgeAssetRegisteredToContextGraph(); - const logs = await cgStorage.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.contractReadWithFailover( + 'cgStorage.queryFilter(KnowledgeAssetRegisteredToContextGraph)', cgStorage, + (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), + LOG_SCAN_OPTS, + ); for (const log of logs) { const parsed = cgStorage.interface.parseLog({ topics: [...log.topics], data: log.data }); @@ -133,7 +153,10 @@ export class EventsMethods extends EVMChainAdapterBase { : 'KnowledgeAssetCreated'; const kcFilter = kaStorage.filters[createEventName](); - const kcLogs = await kaStorage.queryFilter(kcFilter, fromB, toB); + const kcLogs = await this.contractReadWithFailover( + 'kas.queryFilter(KnowledgeAssetCreated)', kaStorage, (c) => c.queryFilter(kcFilter, fromB, toB), + LOG_SCAN_OPTS, + ); // Legacy mint range. `KnowledgeAssetsMinted` is still declared on the // greenfield ABI but never emitted by `createKnowledgeAsset`, so @@ -142,7 +165,10 @@ export class EventsMethods extends EVMChainAdapterBase { const mintByTx = new Map(); if (hasEvent('KnowledgeAssetsMinted')) { const mintFilter = kaStorage.filters.KnowledgeAssetsMinted(); - const mintLogs = await kaStorage.queryFilter(mintFilter, fromB, toB); + const mintLogs = await this.contractReadWithFailover( + 'kas.queryFilter(KnowledgeAssetsMinted)', kaStorage, (c) => c.queryFilter(mintFilter, fromB, toB), + LOG_SCAN_OPTS, + ); for (const ml of mintLogs) { const mp = kaStorage.interface.parseLog({ topics: [...ml.topics], data: ml.data }); if (mp) { @@ -164,7 +190,10 @@ export class EventsMethods extends EVMChainAdapterBase { if (isGreenfield) { try { const transferFilter = kaStorage.filters.Transfer(ethers.ZeroAddress); - const transferLogs = await kaStorage.queryFilter(transferFilter, fromB, toB); + const transferLogs = await this.contractReadWithFailover( + 'kas.queryFilter(Transfer)', kaStorage, (c) => c.queryFilter(transferFilter, fromB, toB), + LOG_SCAN_OPTS, + ); for (const tl of transferLogs) { const tp = kaStorage.interface.parseLog({ topics: [...tl.topics], data: tl.data }); if (tp && tp.args.tokenId != null) { @@ -223,7 +252,11 @@ export class EventsMethods extends EVMChainAdapterBase { const registry = this.contracts.contextGraphNameRegistry; if (registry) { const eventFilter = registry.filters.NameClaimed(); - const logs = await registry.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.contractReadWithFailover( + 'cgNameRegistry.queryFilter(NameClaimed)', registry, + (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), + LOG_SCAN_OPTS, + ); for (const log of logs) { const parsed = registry.interface.parseLog({ topics: [...log.topics], data: log.data }); if (parsed) { @@ -246,7 +279,11 @@ export class EventsMethods extends EVMChainAdapterBase { const cgStorage = this.contracts.contextGraphStorage; if (cgStorage) { const eventFilter = cgStorage.filters.ContextGraphCreated(); - const logs = await cgStorage.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.contractReadWithFailover( + 'cgStorage.queryFilter(ContextGraphCreated)', cgStorage, + (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), + LOG_SCAN_OPTS, + ); for (const log of logs) { const parsed = cgStorage.interface.parseLog({ topics: [...log.topics], data: log.data }); if (parsed) { @@ -279,7 +316,11 @@ export class EventsMethods extends EVMChainAdapterBase { const profileStorage = this.contracts.profileStorage; if (profileStorage) { const eventFilter = profileStorage.filters.RelayCapabilityUpdated(); - const logs = await profileStorage.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.contractReadWithFailover( + 'profileStorage.queryFilter(RelayCapabilityUpdated)', profileStorage, + (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), + LOG_SCAN_OPTS, + ); for (const log of logs) { const parsed = profileStorage.interface.parseLog({ topics: [...log.topics], data: log.data }); if (parsed) { diff --git a/packages/chain/src/evm-adapter-identity.ts b/packages/chain/src/evm-adapter-identity.ts index 8cda8d792f..6ce81d63d4 100644 --- a/packages/chain/src/evm-adapter-identity.ts +++ b/packages/chain/src/evm-adapter-identity.ts @@ -45,7 +45,9 @@ export class IdentityMethods extends EVMChainAdapterBase { } const onChainIds = await Promise.all( - uniqueAddresses.map((addr) => identityStorage.getIdentityId(addr).then(BigInt)), + uniqueAddresses.map((addr) => this.contractReadWithFailover( + 'identityStorage.getIdentityId', identityStorage, (c) => c.getIdentityId(addr), + ).then(BigInt)), ); const missing: string[] = []; for (let i = 0; i < uniqueAddresses.length; i++) { @@ -103,7 +105,9 @@ export class IdentityMethods extends EVMChainAdapterBase { if (!this.contracts.profileStorage) { throw new Error('getRelayCapable: ProfileStorage not deployed on this Hub.'); } - return Boolean(await this.contracts.profileStorage.getRelayCapable(identityId)); + return Boolean(await this.contractReadWithFailover( + 'profileStorage.getRelayCapable', this.contracts.profileStorage, (c) => c.getRelayCapable(identityId), + )); } async setRelayCapable(relayCapable: boolean): Promise { @@ -144,7 +148,9 @@ export class IdentityMethods extends EVMChainAdapterBase { if (cached !== undefined) return cached; await this.init(); const identityStorage = await this.resolveContract('IdentityStorage'); - const id: bigint = await identityStorage.getIdentityId(checksum); + const id: bigint = await this.contractReadWithFailover( + 'identityStorage.getIdentityId', identityStorage, (c) => c.getIdentityId(checksum), + ); if (id > 0n) { // Only memoise positive hits — a 0n result may flip to non-zero // once the operator registers, and we don't want to lock the @@ -215,7 +221,9 @@ export class IdentityMethods extends EVMChainAdapterBase { if (stakeAmount > 0n && this.contracts.token) { try { const stakingNFT = await this.resolveContract('DKGStakingConvictionNFT'); - const stakingV10Addr: string = await this.contracts.hub.getContractAddress('StakingV10'); + const stakingV10Addr: string = await this.contractReadWithFailover( + 'Hub.getContractAddress(StakingV10)', this.contracts.hub, (c) => c.getContractAddress('StakingV10'), + ); if (stakingV10Addr === ethers.ZeroAddress) { throw new Error('StakingV10 not registered in Hub — V10 staking unavailable'); } diff --git a/packages/chain/src/evm-adapter-publish.ts b/packages/chain/src/evm-adapter-publish.ts index 1186387285..56823ce1d7 100644 --- a/packages/chain/src/evm-adapter-publish.ts +++ b/packages/chain/src/evm-adapter-publish.ts @@ -61,7 +61,9 @@ export class PublishMethods extends EVMChainAdapterBase { const kaAddress = await ka.getAddress(); if (this.contracts.token && params.tokenAmount > 0n) { - const currentAllowance: bigint = await this.contracts.token.allowance(this.signer.address, kaAddress); + const currentAllowance: bigint = await this.contractReadWithFailover( + 'token.allowance', this.contracts.token, (c) => c.allowance(this.signer.address, kaAddress), + ); if (currentAllowance < params.tokenAmount) { await this.sendContractTransaction( this.contracts.token, @@ -180,12 +182,18 @@ export class PublishMethods extends EVMChainAdapterBase { let onChainPublisher: string | undefined; if (this.contracts.knowledgeAssetStorage) { try { - onChainPublisher = await this.contracts.knowledgeAssetStorage.getLatestMerkleRootPublisher(batchId); + onChainPublisher = await this.contractReadWithFailover( + 'kas.getLatestMerkleRootPublisher', this.contracts.knowledgeAssetStorage, + (c) => c.getLatestMerkleRootPublisher(batchId), + ); } catch { /* not found in V10 storage */ } } if ((!onChainPublisher || onChainPublisher === ethers.ZeroAddress) && this.contracts.knowledgeAssetsStorage) { try { - onChainPublisher = await this.contracts.knowledgeAssetsStorage.getBatchPublisher(batchId); + onChainPublisher = await this.contractReadWithFailover( + 'kasV9.getBatchPublisher', this.contracts.knowledgeAssetsStorage, + (c) => c.getBatchPublisher(batchId), + ); } catch { /* not found in V9 storage */ } } if (!onChainPublisher || onChainPublisher.toLowerCase() !== publisherAddress.toLowerCase()) { @@ -208,7 +216,9 @@ export class PublishMethods extends EVMChainAdapterBase { if (!this.contracts.askStorage) { throw new Error('AskStorage not available'); } - const ask = await this.contracts.askStorage.getStakeWeightedAverageAsk(); + const ask = await this.contractReadWithFailover( + 'askStorage.getStakeWeightedAverageAsk', this.contracts.askStorage, (c) => c.getStakeWeightedAverageAsk(), + ); return (BigInt(ask) * publicByteSize * BigInt(epochs)) / 1024n; } @@ -225,9 +235,13 @@ export class PublishMethods extends EVMChainAdapterBase { if (!this.contracts.knowledgeAssetsStorage) return false; const storage = this.contracts.knowledgeAssetsStorage; - const count = await storage.getPublisherRangesCount(publisherAddress); + const count = await this.contractReadWithFailover( + 'kasV9.getPublisherRangesCount', storage, (c) => c.getPublisherRangesCount(publisherAddress), + ); for (let i = 0; i < Number(count); i++) { - const [startId, endId] = await storage.getPublisherRange(publisherAddress, i); + const [startId, endId] = await this.contractReadWithFailover( + 'kasV9.getPublisherRange', storage, (c) => c.getPublisherRange(publisherAddress, i), + ); if (startId <= startKAId && endId >= endKAId) return true; } return false; @@ -394,7 +408,9 @@ export class PublishMethods extends EVMChainAdapterBase { const kas = this.contracts.knowledgeAssetStorage; if (!kas) return 0n; try { - return BigInt(await kas.getTokenAmount(kaId)); + return BigInt(await this.contractReadWithFailover( + 'kas.getTokenAmount', kas, (c) => c.getTokenAmount(kaId), + )); } catch { // KA not yet in storage (would fail later on-chain anyway) — return 0. return 0n; @@ -412,7 +428,9 @@ export class PublishMethods extends EVMChainAdapterBase { let endEpoch = 0n; if (kas) { try { - const ctx = await kas.getKnowledgeAssetUpdateContext(params.kaId); + const ctx = await this.contractReadWithFailover( + 'kas.getKnowledgeAssetUpdateContext', kas, (c) => c.getKnowledgeAssetUpdateContext(params.kaId), + ); // Tuple shape from `DKGKnowledgeAssets.getKnowledgeAssetUpdateContext`: // (preUpdateMerkleRootCount, minted, byteSize, endEpoch, tokenAmount, isImmutable, preUpdateMerkleLeafCount) currentByteSize = BigInt(ctx[2]); @@ -433,7 +451,9 @@ export class PublishMethods extends EVMChainAdapterBase { } if (this.contracts.chronos) { try { - currentEpoch = BigInt(await this.contracts.chronos.getCurrentEpoch()); + currentEpoch = BigInt(await this.contractReadWithFailover( + 'chronos.getCurrentEpoch', this.contracts.chronos, (c) => c.getCurrentEpoch(), + )); } catch (err) { throw new Error( `Failed to read Chronos currentEpoch for update tokenAmount sizing: ${(err as Error).message}`, @@ -445,7 +465,9 @@ export class PublishMethods extends EVMChainAdapterBase { let growthCost = 0n; if (params.newByteSize > currentByteSize && this.contracts.askStorage) { try { - const ask = BigInt(await this.contracts.askStorage.getStakeWeightedAverageAsk()); + const ask = BigInt(await this.contractReadWithFailover( + 'askStorage.getStakeWeightedAverageAsk', this.contracts.askStorage, (c) => c.getStakeWeightedAverageAsk(), + )); const byteSizeGrowth = params.newByteSize - currentByteSize; if (remainingEpochs > 0n) { growthCost = (ask * byteSizeGrowth * remainingEpochs) / 1024n; @@ -497,7 +519,7 @@ export class PublishMethods extends EVMChainAdapterBase { const kas = this.contracts.knowledgeAssetStorage; const kav10Address = await this.contracts.knowledgeAssetsLifecycle.getAddress(); - const evmChainId = BigInt((await this.provider.getNetwork()).chainId); + const evmChainId = BigInt((await this.readWithFailover('getNetwork (chainId)', (p) => p.getNetwork())).chainId); const currentTokenAmount = await this.resolveCurrentTokenAmount(params.kaId); @@ -517,7 +539,10 @@ export class PublishMethods extends EVMChainAdapterBase { if (this.contracts.contextGraphStorage) { try { contextGraphId = BigInt( - await this.contracts.contextGraphStorage.kaToContextGraph(params.kaId), + await this.contractReadWithFailover( + 'cgStorage.kaToContextGraph', this.contracts.contextGraphStorage, + (c) => c.kaToContextGraph(params.kaId), + ), ); } catch { /* use 0 */ } } @@ -525,7 +550,9 @@ export class PublishMethods extends EVMChainAdapterBase { let preUpdateMerkleRootCount = 0n; if (kas) { try { - const roots: unknown[] = await kas.getMerkleRoots(params.kaId); + const roots: unknown[] = await this.contractReadWithFailover( + 'kas.getMerkleRoots', kas, (c) => c.getMerkleRoots(params.kaId), + ); preUpdateMerkleRootCount = BigInt(roots.length); } catch { /* use 0 */ } } @@ -584,7 +611,10 @@ export class PublishMethods extends EVMChainAdapterBase { if (this.contracts.contextGraphStorage) { try { contextGraphId = BigInt( - await this.contracts.contextGraphStorage.kaToContextGraph(params.kaId), + await this.contractReadWithFailover( + 'cgStorage.kaToContextGraph', this.contracts.contextGraphStorage, + (c) => c.kaToContextGraph(params.kaId), + ), ); } catch { /* use 0 */ } } @@ -592,7 +622,9 @@ export class PublishMethods extends EVMChainAdapterBase { let preUpdateMerkleRootCount = 0n; if (kas) { try { - const roots: unknown[] = await kas.getMerkleRoots(params.kaId); + const roots: unknown[] = await this.contractReadWithFailover( + 'kas.getMerkleRoots', kas, (c) => c.getMerkleRoots(params.kaId), + ); preUpdateMerkleRootCount = BigInt(roots.length); } catch { /* use 0 */ } } @@ -627,7 +659,9 @@ export class PublishMethods extends EVMChainAdapterBase { const kas = this.contracts.knowledgeAssetStorage; if (kas) { try { - const onChainPublisher: string = await kas.getLatestMerkleRootPublisher(params.kaId); + const onChainPublisher: string = await this.contractReadWithFailover( + 'kas.getLatestMerkleRootPublisher', kas, (c) => c.getLatestMerkleRootPublisher(params.kaId), + ); if (onChainPublisher && onChainPublisher !== ethers.ZeroAddress) { signer = this.signerPool.find( (s) => s.address.toLowerCase() === onChainPublisher.toLowerCase(), @@ -648,7 +682,7 @@ export class PublishMethods extends EVMChainAdapterBase { const ka = this.contracts.knowledgeAssetsLifecycle.connect(signer) as Contract; const kav10Address = await this.contracts.knowledgeAssetsLifecycle.getAddress(); - const evmChainId = (await this.provider.getNetwork()).chainId; + const evmChainId = (await this.readWithFailover('getNetwork (chainId)', (p) => p.getNetwork())).chainId; const identityId = params.publisherNodeIdentityId ?? await this.getIdentityId(); @@ -689,7 +723,9 @@ export class PublishMethods extends EVMChainAdapterBase { let contextGraphId = 0n; if (contextGraphStorage) { try { - contextGraphId = BigInt(await contextGraphStorage.kaToContextGraph(params.kaId)); + contextGraphId = BigInt(await this.contractReadWithFailover( + 'cgStorage.kaToContextGraph', contextGraphStorage, (c) => c.kaToContextGraph(params.kaId), + )); } catch { /* use 0 */ } } @@ -697,7 +733,9 @@ export class PublishMethods extends EVMChainAdapterBase { let preUpdateMerkleRootCount = 0n; if (kas) { try { - const roots: unknown[] = await kas.getMerkleRoots(params.kaId); + const roots: unknown[] = await this.contractReadWithFailover( + 'kas.getMerkleRoots', kas, (c) => c.getMerkleRoots(params.kaId), + ); preUpdateMerkleRootCount = BigInt(roots.length); } catch { /* use 0 */ } } diff --git a/packages/chain/src/evm-adapter-random-sampling.ts b/packages/chain/src/evm-adapter-random-sampling.ts index 36820dd721..8cd6814cfa 100644 --- a/packages/chain/src/evm-adapter-random-sampling.ts +++ b/packages/chain/src/evm-adapter-random-sampling.ts @@ -75,7 +75,9 @@ export class RandomSamplingMethods extends EVMChainAdapterBase { await this.init(); const identityStorage = await this.getIdentityStorage(); - const identityId: bigint = await identityStorage.getIdentityId(this.signer.address); + const identityId: bigint = await this.contractReadWithFailover( + 'identityStorage.getIdentityId', identityStorage, (c) => c.getIdentityId(this.signer.address), + ); return this.withHubStaleRetry(async () => { const { rs, rss } = await this.getRandomSampling(); @@ -126,7 +128,9 @@ export class RandomSamplingMethods extends EVMChainAdapterBase { ); } - const challengeRaw = await rss.getNodeChallenge(identityId); + const challengeRaw = await this.contractReadWithFailover( + 'rss.getNodeChallenge', rss, (c) => c.getNodeChallenge(identityId), + ); const challenge = this.toNodeChallenge(challengeRaw); if (!challenge) { throw new Error( @@ -282,7 +286,9 @@ export class RandomSamplingMethods extends EVMChainAdapterBase { async getNodeChallenge(identityId: bigint): Promise { await this.init(); const { rss } = await this.getRandomSampling(); - const raw = await rss.getNodeChallenge(identityId); + const raw = await this.contractReadWithFailover( + 'rss.getNodeChallenge', rss, (c) => c.getNodeChallenge(identityId), + ); return this.toNodeChallenge(raw); } @@ -293,7 +299,9 @@ export class RandomSamplingMethods extends EVMChainAdapterBase { ): Promise { await this.init(); const { rss } = await this.getRandomSampling(); - const score: bigint = await rss.getNodeEpochProofPeriodScore(identityId, epoch, periodStartBlock); + const score: bigint = await this.contractReadWithFailover( + 'rss.getNodeEpochProofPeriodScore', rss, (c) => c.getNodeEpochProofPeriodScore(identityId, epoch, periodStartBlock), + ); return BigInt(score); } } diff --git a/packages/chain/src/evm-adapter-rpc.ts b/packages/chain/src/evm-adapter-rpc.ts index ccabe3eff5..3132f56b5e 100644 --- a/packages/chain/src/evm-adapter-rpc.ts +++ b/packages/chain/src/evm-adapter-rpc.ts @@ -69,16 +69,34 @@ export function resolveRpcUrls(rpcUrl: string, rpcUrls?: string[]): string[] { } /** - * Build a `FetchRequest` whose retry loop gives up after - * `RPC_REQUEST_MAX_RETRIES` retries. A bare string URL would use ethers' - * unbounded default; we install a bounded `retryFunc` instead. The bound is - * evaluated from `attempt` (per-request), so every request — no matter how - * long the node has been running — gets the same fresh retry budget. + * Build a `FetchRequest` whose retry loop gives up after `maxRetries` retries. + * A bare string URL would use ethers' unbounded default; we install a bounded + * `retryFunc` instead. The bound is evaluated from `attempt` (per-request), so + * every request — no matter how long the node has been running — gets the same + * fresh retry budget. + * + * `maxRetries` is chosen by the adapter from the configured endpoint count + * (`evm-adapter-base.ts` constructor): + * - **Multi-RPC (≥2 endpoints): `0`** — the FIRST retryable failure (429/5xx/ + * network) propagates immediately so the adapter's explicit per-provider + * read/write failover loops advance to the NEXT endpoint at once, instead of + * burning ~7.5s of same-endpoint backoff on an endpoint we already know is + * failing. With ≥2 endpoints the failover loop IS the resilience. + * - **Single-RPC: `RPC_REQUEST_MAX_RETRIES` (5)** — unchanged. There is + * nowhere to fail over to, so the bounded same-endpoint retry is the only + * resilience and rides out a transient blip while still surfacing a + * perpetual error as a bounded `RPC_ENDPOINTS_EXHAUSTED`→503 (#894). + * + * `maxRetries = 0` makes `retryFunc` return `false` on attempt 0 with NO sleep, + * so the failure surfaces synchronously to the failover loop. */ -export function boundedRetryFetchRequest(url: string): FetchRequest { +export function boundedRetryFetchRequest( + url: string, + maxRetries: number = RPC_REQUEST_MAX_RETRIES, +): FetchRequest { const req = new FetchRequest(url); req.retryFunc = async (_req, _response, attempt) => { - if (attempt >= RPC_REQUEST_MAX_RETRIES) return false; + if (attempt >= maxRetries) return false; await sleep(Math.min(500 * (attempt + 1), RPC_REQUEST_RETRY_BACKOFF_CAP_MS)); return true; }; diff --git a/packages/chain/src/evm-adapter-storage-reads.ts b/packages/chain/src/evm-adapter-storage-reads.ts index e8cd77fe03..d4135af374 100644 --- a/packages/chain/src/evm-adapter-storage-reads.ts +++ b/packages/chain/src/evm-adapter-storage-reads.ts @@ -31,42 +31,54 @@ export class StorageReadMethods extends EVMChainAdapterBase { async getLatestMerkleRoot(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const rootHex: string = await kas.getLatestMerkleRoot(kaId); + const rootHex: string = await this.contractReadWithFailover( + 'kas.getLatestMerkleRoot', kas, (c) => c.getLatestMerkleRoot(kaId), + ); return ethers.getBytes(rootHex); } async getMerkleLeafCount(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const count: bigint = BigInt(await kas.getMerkleLeafCount(kaId)); + const count: bigint = BigInt(await this.contractReadWithFailover( + 'kas.getMerkleLeafCount', kas, (c) => c.getMerkleLeafCount(kaId), + )); return Number(count); } async getCatalogRoot(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const rootHex: string = await kas.getCatalogRoot(kaId); + const rootHex: string = await this.contractReadWithFailover( + 'kas.getCatalogRoot', kas, (c) => c.getCatalogRoot(kaId), + ); return ethers.getBytes(rootHex); } async getCatalogLeafCount(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const count: bigint = BigInt(await kas.getCatalogLeafCount(kaId)); + const count: bigint = BigInt(await this.contractReadWithFailover( + 'kas.getCatalogLeafCount', kas, (c) => c.getCatalogLeafCount(kaId), + )); return Number(count); } async getLatestMerkleRootPublisher(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const publisher: string = await kas.getLatestMerkleRootPublisher(kaId); + const publisher: string = await this.contractReadWithFailover( + 'kas.getLatestMerkleRootPublisher', kas, (c) => c.getLatestMerkleRootPublisher(kaId), + ); return publisher; } async getLatestMerkleRootAuthor(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const author: string = await kas.getLatestMerkleRootAuthor(kaId); + const author: string = await this.contractReadWithFailover( + 'kas.getLatestMerkleRootAuthor', kas, (c) => c.getLatestMerkleRootAuthor(kaId), + ); return author; } } diff --git a/packages/chain/src/rpc-failover-log.ts b/packages/chain/src/rpc-failover-log.ts index f3ac854c5e..9e4f0464c9 100644 --- a/packages/chain/src/rpc-failover-log.ts +++ b/packages/chain/src/rpc-failover-log.ts @@ -1,9 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 /** - * Observability for multi-RPC WRITE failover. Records + logs the per-provider - * failovers in the EVM adapter's write loops (and the CLI stack in - * `cli-rpc.ts`), under three invariants the callers depend on: + * Observability for multi-RPC failover. Records + logs the per-provider + * failovers in BOTH the EVM adapter's write loops AND its read-failover loop + * (`readWithFailover` / `contractReadWithFailover` / the V10 populate loop), + * plus the CLI stack in `cli-rpc.ts`, under three invariants the callers + * depend on: * * 1. Dedup-gated logging — at most one line per `host|errorClass` per * `DEFAULT_DEDUP_WINDOW_MS` (5 min) with a suppressed-count rollup, so a @@ -17,8 +19,10 @@ * * Counters are a PROCESS-WIDE singleton: the daemon builds one adapter per * agent + per publisher wallet, and `/api/status` reads the aggregate via a - * direct getter import. Scope is WRITE failover only — read failover is - * internal to the ethers FallbackProvider. + * direct getter import. Scope covers BOTH read and write failover — reads route + * through the adapter's explicit `readWithFailover` loop (the ethers + * `FallbackProvider` was removed; see `evm-adapter-base.ts`), so both halves + * funnel their per-hop failovers and exhaustions through this module. */ import { errorCode, errorMessage, errorStatus } from './evm-adapter-errors.js'; diff --git a/packages/chain/test/bounded-retry-fetch.test.ts b/packages/chain/test/bounded-retry-fetch.test.ts new file mode 100644 index 0000000000..b6205781c1 --- /dev/null +++ b/packages/chain/test/bounded-retry-fetch.test.ts @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * T2 (immediate-RPC-failover): the per-endpoint retry budget contract. + * + * `boundedRetryFetchRequest` installs the `FetchRequest.retryFunc` that decides + * whether ethers retries a 429/5xx on the SAME endpoint. The whole + * immediate-failover change hinges on making that count configurable: + * - multi-RPC => maxRetries = 0 => give up on the first failure so the + * adapter's own failover advances to the next endpoint AT ONCE; + * - single-RPC => maxRetries = RPC_REQUEST_MAX_RETRIES (5) => bounded retry + * preserved (there is nowhere to fail over to — the #894 resilience). + * + * The existing ~170 adapter tests inject bare provider mocks and never touch + * this retryFunc at all, so the retry-COUNT contract is otherwise unproven. + * This drives the real `FetchRequest.retryFunc` directly — no network, no + * native deps — with fake timers so the backoff `sleep` is instant. + * + * The first block locks the CURRENT (single-arg) behaviour and is GREEN today. + * The `describe.skip` block specifies the parametrised `(url, maxRetries)` API + * ChainEngineer is adding — UN-SKIP it the moment that signature lands. Until + * then it is staged (not red) so the shared chain suite stays green during S1. + */ +import { describe, it, expect, vi, afterEach } from 'vitest'; +import type { FetchResponse } from 'ethers'; +import { boundedRetryFetchRequest } from '../src/evm-adapter-rpc.js'; + +const URL = 'https://rpc.example/key'; +// retryFunc ignores req/response; a bare stub satisfies the signature. +const RESP = undefined as unknown as FetchResponse; + +async function decideWithFakeTimers( + fn: NonNullable['retryFunc']>, + req: ReturnType, + attempt: number, +): Promise { + // The retry DECISION resolves only after the internal backoff sleep; drive + // fake timers so a "retry=true" verdict returns without real wall time. + const pending = fn(req, RESP, attempt); + await vi.advanceTimersByTimeAsync(2_000); + return pending; +} + +describe('boundedRetryFetchRequest — current single-arg retry budget (#894)', () => { + afterEach(() => vi.useRealTimers()); + + it('retries through attempt 4 and gives up at attempt 5 (RPC_REQUEST_MAX_RETRIES)', async () => { + vi.useFakeTimers(); + const req = boundedRetryFetchRequest(URL); + const retryFunc = req.retryFunc!; + expect(typeof retryFunc).toBe('function'); + + // attempts 0..4 → retry (true), after a bounded backoff sleep. + expect(await decideWithFakeTimers(retryFunc, req, 0)).toBe(true); + expect(await decideWithFakeTimers(retryFunc, req, 4)).toBe(true); + + // attempt 5 → give up (false), and WITHOUT sleeping (no timer advance). + expect(await retryFunc(req, RESP, 5)).toBe(false); + // ...still false past the cap. + expect(await retryFunc(req, RESP, 9)).toBe(false); + }); + + it('the give-up verdict at the cap is synchronous (no backoff sleep before failing over)', async () => { + // A single-RPC node must not add a stray sleep to its terminal give-up; + // the false at the cap resolves with no pending timer. + vi.useFakeTimers(); + const req = boundedRetryFetchRequest(URL); + const verdict = req.retryFunc!(req, RESP, 5); + // No timer advance — if the impl slept here this would still be pending. + await expect(verdict).resolves.toBe(false); + }); +}); + +// The parametrised `(url, maxRetries)` contract — the core of immediate failover: +// maxRetries=0 fails over on the FIRST failure (multi-RPC), maxRetries=5 keeps +// the #894 budget (single-RPC). ChainEngineer landed the 2-arg signature +// (default = RPC_REQUEST_MAX_RETRIES, so single-arg callers are unchanged), so +// this runs live. +describe('boundedRetryFetchRequest(url, maxRetries) — parametrised failover budget', () => { + afterEach(() => vi.useRealTimers()); + + it('maxRetries=0 → gives up immediately at attempt 0 (multi-RPC: fail over at once, no backoff)', async () => { + vi.useFakeTimers(); + const req = boundedRetryFetchRequest(URL, 0); + // attempt 0 must be false AND synchronous — no per-endpoint backoff sleep. + // (If the impl slept here, this would stay pending with no timer advanced.) + await expect(req.retryFunc!(req, RESP, 0)).resolves.toBe(false); + }); + + it('maxRetries=5 → preserves the #894 budget (true through 4, false at 5)', async () => { + vi.useFakeTimers(); + const req = boundedRetryFetchRequest(URL, 5); + expect(await decideWithFakeTimers(req.retryFunc!, req, 0)).toBe(true); + expect(await decideWithFakeTimers(req.retryFunc!, req, 4)).toBe(true); + expect(await req.retryFunc!(req, RESP, 5)).toBe(false); + }); + + it('default maxRetries (no 2nd arg) equals the single-RPC #894 budget of 5', async () => { + vi.useFakeTimers(); + const req = boundedRetryFetchRequest(URL); // default = RPC_REQUEST_MAX_RETRIES + expect(await decideWithFakeTimers(req.retryFunc!, req, 4)).toBe(true); + expect(await req.retryFunc!(req, RESP, 5)).toBe(false); + }); +}); diff --git a/packages/chain/test/evm-adapter.unit.test.ts b/packages/chain/test/evm-adapter.unit.test.ts index 235b954c60..ab1bdeb3bb 100644 --- a/packages/chain/test/evm-adapter.unit.test.ts +++ b/packages/chain/test/evm-adapter.unit.test.ts @@ -1625,9 +1625,13 @@ describe('PR3 / RC11 — publish-preflight TTL cache', () => { it('getEvmChainId issues exactly one provider.getNetwork call across repeat reads', async () => { const a = new EVMChainAdapter(minimalConfig()); const getNetwork = recorder(async () => ({ chainId: 31337n })); - (a as unknown as { provider: { getNetwork: () => Promise<{ chainId: bigint }> } }).provider = { + // R1: getEvmChainId now reads via readWithFailover over this.providers[] + // (was this.provider.getNetwork). Mock this.providers[0]; the TTL-cache / + // dedup / no-cache-on-failure behaviour is unchanged (the cache wraps + // readWithFailover), so the assertions below are preserved verbatim. + (a as unknown as { providers: Array<{ getNetwork: () => Promise<{ chainId: bigint }> }> }).providers = [{ getNetwork: getNetwork as unknown as () => Promise<{ chainId: bigint }>, - }; + }]; expect(await a.getEvmChainId()).toBe(31337n); expect(await a.getEvmChainId()).toBe(31337n); @@ -1669,9 +1673,13 @@ describe('PR3 / RC11 — publish-preflight TTL cache', () => { const a = new EVMChainAdapter(minimalConfig()); let returned = 31337n; const getNetwork = recorder(async () => ({ chainId: returned })); - (a as unknown as { provider: { getNetwork: () => Promise<{ chainId: bigint }> } }).provider = { + // R1: getEvmChainId now reads via readWithFailover over this.providers[] + // (was this.provider.getNetwork). Mock this.providers[0]; the TTL-cache / + // dedup / no-cache-on-failure behaviour is unchanged (the cache wraps + // readWithFailover), so the assertions below are preserved verbatim. + (a as unknown as { providers: Array<{ getNetwork: () => Promise<{ chainId: bigint }> }> }).providers = [{ getNetwork: getNetwork as unknown as () => Promise<{ chainId: bigint }>, - }; + }]; expect(await a.getEvmChainId()).toBe(31337n); expect(getNetwork.calls).toHaveLength(1); @@ -1689,9 +1697,13 @@ describe('PR3 / RC11 — publish-preflight TTL cache', () => { it('invalidatePublishPreflightCache forces a fresh read on next call', async () => { const a = new EVMChainAdapter(minimalConfig()); const getNetwork = recorder(async () => ({ chainId: 31337n })); - (a as unknown as { provider: { getNetwork: () => Promise<{ chainId: bigint }> } }).provider = { + // R1: getEvmChainId now reads via readWithFailover over this.providers[] + // (was this.provider.getNetwork). Mock this.providers[0]; the TTL-cache / + // dedup / no-cache-on-failure behaviour is unchanged (the cache wraps + // readWithFailover), so the assertions below are preserved verbatim. + (a as unknown as { providers: Array<{ getNetwork: () => Promise<{ chainId: bigint }> }> }).providers = [{ getNetwork: getNetwork as unknown as () => Promise<{ chainId: bigint }>, - }; + }]; await a.getEvmChainId(); await a.getEvmChainId(); @@ -1709,9 +1721,13 @@ describe('PR3 / RC11 — publish-preflight TTL cache', () => { if (attempts === 1) throw new Error('rate limited'); return { chainId: 31337n }; }); - (a as unknown as { provider: { getNetwork: () => Promise<{ chainId: bigint }> } }).provider = { + // R1: getEvmChainId now reads via readWithFailover over this.providers[] + // (was this.provider.getNetwork). Mock this.providers[0]; the TTL-cache / + // dedup / no-cache-on-failure behaviour is unchanged (the cache wraps + // readWithFailover), so the assertions below are preserved verbatim. + (a as unknown as { providers: Array<{ getNetwork: () => Promise<{ chainId: bigint }> }> }).providers = [{ getNetwork: getNetwork as unknown as () => Promise<{ chainId: bigint }>, - }; + }]; await expect(a.getEvmChainId()).rejects.toThrow('rate limited'); // Second call should retry — failure was not memoised. @@ -2567,17 +2583,24 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign (a as any).provider.getBalance = recorder(async (addr: string) => nativeByAddr.get(String(addr).toLowerCase()) ?? ABUNDANT_WEI); + // R1: readTracBalance now reads via contractReadWithFailover → withRunner + // does `token.connect(p).balanceOf(addr)`, so the CONNECTED contract (what + // token.connect returns) must expose balanceOf — not just the top-level + // token. (Native getBalance still works: the helper mutates + // this.provider.getBalance and this.provider === this.providers[0], so the + // shared object is what readWithFailover reads.) + const balanceOf = recorder(async (addr: string) => + tracByAddr.get(String(addr).toLowerCase()) ?? ABUNDANT_WEI); const tokenWithSigner = { allowance: recorder(async (owner: string, _spender: string) => { return allowanceByOwner.get(owner.toLowerCase()) ?? 0n; }), approve: recorder(() => undefined), + balanceOf, }; (a as any).contracts.token = { connect: recorder(() => tokenWithSigner), - // Read path (`readTracBalance`) calls this.contracts.token.balanceOf. - balanceOf: recorder(async (addr: string) => - tracByAddr.get(String(addr).toLowerCase()) ?? ABUNDANT_WEI), + balanceOf, // kept for any direct (non-connected) top-level reader }; const populateSpy = recorder(async () => ({ @@ -2684,7 +2707,11 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign expect(approveSender).toBe(walletB); expect(signSpy.calls).toHaveLength(1); - expect(signSpy.calls[0][0]).toBe(walletB); + // R1/OBS-1: populateAndSignAcrossProviders signs on the per-provider runner + // (signer.connect(providers[i])) — same key/ADDRESS as walletB, new object. + // Assert the signer ADDRESS, not object identity (#870 "publish signed by + // walletB, no mid-flight rotation" invariant is preserved). + expect((signSpy.calls[0][0] as ethers.Wallet).address).toBe(walletB.address); }); it('publish path: when publisherAddress is omitted, round-robin signer is also the approve signer (no mid-flight rotation)', async () => { @@ -2710,7 +2737,8 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign expect(approveMethod).toBe('approve'); expect(approveArgs).toEqual([PARITY_KA_ADDRESS, 1n]); expect(approveSender).toBe(walletA); - expect(signSpy.calls[0][0]).toBe(walletA); + // R1/OBS-1: signer reconnected per-provider — assert ADDRESS not identity. + expect((signSpy.calls[0][0] as ethers.Wallet).address).toBe(walletA.address); }); it('update path: approve fires from the on-chain publisher wallet, NOT a round-robin pick from the pool', async () => { @@ -2741,11 +2769,13 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign (a as any).resolveCurrentTokenAmount = recorder(async () => 0n); (a as any).computeUpdateNewTokenAmount = recorder(async () => 0n); (a as any).getIdentityId = recorder(async () => 0n); - // `provider.getNetwork()` is called for chainId; stub it so the update - // path doesn't try to hit the placeholder RPC. - (a as any).provider = { - getNetwork: recorder(async () => ({ chainId: 31337n })), - }; + // `provider.getNetwork()` is called for chainId; stub it so the update path + // doesn't hit the placeholder RPC. R1: getEvmChainId reads via + // readWithFailover over this.providers[0] (=== this.provider), so MUTATE + // getNetwork on the shared object — REPLACING this.provider would orphan + // this.providers[0] (and the helper's getBalance mock) and the read would + // dial the dead RPC instead. + (a as any).provider.getNetwork = recorder(async () => ({ chainId: 31337n })); const updateParams: any = { kaId, @@ -2789,7 +2819,11 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign expect(approveLabel).toBe('approve V10 update TRAC'); expect(signSpy.calls).toHaveLength(1); - expect(signSpy.calls[0][0]).toBe(walletB); + // R1/OBS-1: populateAndSignAcrossProviders signs on the per-provider runner + // (signer.connect(providers[i])) — same key/ADDRESS as walletB, new object. + // Assert the signer ADDRESS, not object identity (#870 "publish signed by + // walletB, no mid-flight rotation" invariant is preserved). + expect((signSpy.calls[0][0] as ethers.Wallet).address).toBe(walletB.address); }); // ----------------------------------------------------------------------------- @@ -3440,6 +3474,61 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco expect(signSpy.calls).toEqual([]); }); + it('C6 (G-OBS1b): forces EXACTLY ONE approve across a provider-failover × TooLowAllowance interleaving (shared OUTER latch, not per-provider)', async () => { + // The case a PER-PROVIDER latch would double-fire: the inner per-provider + // populate loop fails over on provider #1's RETRYABLE 429, then provider #2 + // reverts TooLowAllowance (non-retryable → propagates to the OUTER recovery), + // which fires ONE forced approve and re-runs the WHOLE inner loop (now + // succeeds). The forcedReapprove latch lives at the recovery OUTER scope, so + // it fires exactly once no matter how many endpoints the inner loop tried — + // immediate failover introduces ZERO extra approve txs (INV-1 + G-OBS1b). + const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); + (a as any).providers = [{}, {}]; // two endpoints so the inner loop fails over + const r429 = () => { const e = new Error('429 too many requests'); (e as any).status = 429; return e; }; + let call = 0; + const populate = recorder(async () => { + call += 1; + if (call === 1) throw r429(); // provider[0], pass 1 → retryable → fail over + if (call === 2) throw tooLowAllowanceRevert(); // provider[1], pass 1 → non-retryable → propagate + return { to: V10_KA_ADDRESS, data: '0xabcd' }; // provider[0], pass 2 (post-approve) → succeeds + }); + const kaContract = { publish: { populateTransaction: populate } }; + + const result = await (a as any).populateAndSignV10WithAllowanceRecovery( + signer, kaContract, 'publish', {}, V10_KA_ADDRESS, 0n, 'label', + ); + + expect(result).toEqual({ signedTx: '0xsigned', txHash: '0xhash' }); + expect(ensureSpy.calls).toHaveLength(1); // EXACTLY ONE forced approve across the failover + expect(ensureSpy.calls[0][4]).toBe(true); // force=true + expect(signSpy.calls).toHaveLength(1); // publish signed exactly once (INV-1) + expect(populate.calls).toHaveLength(3); // p0(429) → p1(TooLow) → [approve] → p0(ok) + }); + + it('OBS-1: a RETRYABLE populate failure fails over to the next provider and signs exactly once (no double-sign)', async () => { + // Plain OBS-1 populate failover (no allowance recovery): provider #1's + // populate is rate-limited, provider #2 populates fine → signed once on #2. + const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); + (a as any).providers = [{}, {}]; + const r429 = () => { const e = new Error('429 too many requests'); (e as any).status = 429; return e; }; + let call = 0; + const populate = recorder(async () => { + call += 1; + if (call === 1) throw r429(); // provider[0] → fail over + return { to: V10_KA_ADDRESS, data: '0xabcd' }; // provider[1] → populates + }); + const kaContract = { publish: { populateTransaction: populate } }; + + const result = await (a as any).populateAndSignV10WithAllowanceRecovery( + signer, kaContract, 'publish', {}, V10_KA_ADDRESS, 0n, 'label', + ); + + expect(result).toEqual({ signedTx: '0xsigned', txHash: '0xhash' }); + expect(populate.calls).toHaveLength(2); // p0(429) → p1(ok) + expect(signSpy.calls).toHaveLength(1); // signed once, on the healthy provider + expect(ensureSpy.calls).toEqual([]); // no TooLowAllowance → no forced approve + }); + it('enriches the SECOND raw TooLowAllowance before throwing the one-shot failure', async () => { const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); const populate = recorder(async () => { throw rawTooLowAllowanceRevert(); }); diff --git a/packages/chain/test/getmaxkanumberforauthor.unit.test.ts b/packages/chain/test/getmaxkanumberforauthor.unit.test.ts index d8b1d03923..df51750e7a 100644 --- a/packages/chain/test/getmaxkanumberforauthor.unit.test.ts +++ b/packages/chain/test/getmaxkanumberforauthor.unit.test.ts @@ -248,6 +248,49 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# expect(stale.getMaxKaNumberForAuthor.staticCall.calls).toEqual([]); }); + // R1 bespoke-loop boundary (base:2068): the view staticCall is NOT routed + // through readWithFailover because its error shapes are overloaded — a + // TRANSIENT error must fail over to the next endpoint, but a DETERMINISTIC + // absent-view (BAD_DATA/CALL_EXCEPTION) must NOT fail over (it's the same on + // every endpoint) and instead fall through to the pre-10.0.4 scan. + it('bespoke staticCall loop: a TRANSIENT 429 fails over to the next endpoint and answers from the view (no scan)', async () => { + const queryFilter = recorder(async () => []); + let attempt = 0; + const storage: any = { + getMaxKaNumberForAuthor: viewMock(async () => { + attempt += 1; + if (attempt === 1) { const e: any = new Error('429 too many requests'); e.status = 429; throw e; } + return 5n; + }), + filters: { KnowledgeAssetCreated: recorder(() => 'F') }, + queryFilter, + }; + const a = makeAdapter(storage, 100_000); + (a as any).providers = [{}, {}]; // two endpoints → the view read fails over + expect(await a.getMaxKaNumberForAuthor(AUTHOR)).toBe(5n); // served by endpoint #2 + expect(storage.getMaxKaNumberForAuthor.staticCall.calls).toHaveLength(2); // failed over once + expect(queryFilter.calls).toEqual([]); // the view answered → NEVER scans logs + }); + + it('bespoke staticCall loop: an ABSENT-view (BAD_DATA) is deterministic across endpoints — no failover, straight to the scan', async () => { + const badData: any = new Error(EMPTY_VIEW_RESULT); + badData.code = 'BAD_DATA'; + const queryFilter = recorder(async () => []); + const storage: any = { + target: '0x5555555555555555555555555555555555555555', + getMaxKaNumberForAuthor: viewMock(async () => { throw badData; }), + filters: { KnowledgeAssetCreated: recorder(() => 'F') }, + queryFilter, + }; + const a = makeAdapter(storage, 3_000); + const backend = () => ({ getBlockNumber: recorder(async () => 3_000), getCode: recorder(async () => '0x6000') }); + (a as any).providers = [backend(), backend()]; + expect(await a.getMaxKaNumberForAuthor(AUTHOR)).toBe(-1n); // degraded to the (empty) scan + // Absent-view is deterministic → BREAK after ONE attempt, never consult endpoint #2. + expect(storage.getMaxKaNumberForAuthor.staticCall.calls).toHaveLength(1); + expect(queryFilter.calls.length).toBeGreaterThan(0); // the scan ran instead + }); + it('rethrows malformed BAD_DATA instead of treating every decode failure as an absent view', async () => { const err: any = new Error( 'could not decode result data (value="0x1234", info={ method: "getMaxKaNumberForAuthor", signature: "getMaxKaNumberForAuthor(address)" }, code=BAD_DATA, version=6.16.0)', @@ -752,9 +795,18 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# }; const a = makeAdapter(storage, head); const code = (block?: number) => (block === undefined || block >= deployBlock ? '0x6000' : '0x'); - // b0 is reachable (head ok) but HANGS on getCode; the deploy-block search - // must time out its attempts and fail over to b1 rather than stalling. - const b0 = { getBlockNumber: recorder(async () => head), getCode: recorder(() => new Promise(() => {})) }; + // b0 is reachable (head ok) and serves the STANDALONE bytecode probe + // (block===undefined → readWithFailover, R1) but HANGS on the deploy-block + // SEARCH reads (block!==undefined); the search must time out its 3×4s + // attempts and fail over to b1 rather than stalling. (Hanging the + // standalone probe too would add a 4s readWithFailover hop and push the + // total past the 13s advance — out of scope for this "search fails over" + // assertion.) + const b0 = { + getBlockNumber: recorder(async () => head), + getCode: recorder((_a: string, block?: number) => + block === undefined ? Promise.resolve('0x6000') : new Promise(() => {})), + }; const b1 = { getBlockNumber: recorder(async () => head), getCode: recorder(async (_a: string, block?: number) => code(block)) }; (a as any).providers = [b0, b1]; @@ -1008,8 +1060,15 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# queryFilter, }; const a = makeAdapter(storage, 0); - // backend 0: completely down — getBlockNumber itself fails (a real, non-historical error) - const downBackend = { getBlockNumber: recorder(async () => { throw new Error('503 node is down'); }), getCode: recorder(() => undefined) }; + // backend 0: completely down — getBlockNumber AND getCode fail (a real, + // non-historical error). R1: the standalone bytecode probe now consults + // getCode via readWithFailover, so a down node must THROW (not return + // undefined, which would be read as a valid empty result and short-circuit + // before failover to the pruned-but-reachable backend). + const downBackend = { + getBlockNumber: recorder(async () => { throw new Error('503 node is down'); }), + getCode: recorder(async () => { throw new Error('503 node is down'); }), + }; // backend 1: reachable but pruned — historical getCode unavailable const prunedBackend = { getBlockNumber: recorder(async () => head), diff --git a/packages/chain/test/loopback-rpc-harness.ts b/packages/chain/test/loopback-rpc-harness.ts new file mode 100644 index 0000000000..cf0bf942e4 --- /dev/null +++ b/packages/chain/test/loopback-rpc-harness.ts @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Shared loopback JSON-RPC harness for the immediate-RPC-failover tests + * (T1 read failover, the real-provider WRITE failover gate, and the staged + * T3/T6). NOT a test file (no `.test.` suffix) so vitest does not run it. + * + * Spins real `node:http` JSON-RPC servers so tests drive REAL + * `ethers.JsonRpcProvider`s / `FetchRequest` / the adapter's failover loops — + * the thing the existing ~170 bare-object provider mocks bypass entirely. No + * Hardhat, no native deps; runs locally (`--ignore-scripts`) and in CI. + * + * Teardown discipline (REQUIRED by callers): under a perpetual 429 ethers keeps + * retrying on keep-alive sockets AFTER the awaited call rejects. Always + * `adapter.destroy()` then `rpc.close()` (which `closeAllConnections()` first) + * in afterEach, or the hook hangs past vitest's timeout — the known flaky-CI + * failure mode (see evm-adapter.unit.test.ts:1549). + */ +import { createServer, type Server } from 'node:http'; +import type { AddressInfo } from 'node:net'; + +/** chainId 31337 (matches the tests' `chainId: 'evm:31337'`). */ +export const CHAIN_ID_HEX = '0x7a69'; + +export interface LoopbackRpc { + url: string; + server: Server; + /** Per-JSON-RPC-method request counts, e.g. `hits('eth_chainId')`. */ + hits: (method: string) => number; + totalHits: () => number; + /** Force-close sockets then close the server (afterEach teardown). */ + close: () => Promise; +} + +export interface LoopbackOptions { + /** JSON-RPC methods that respond HTTP 429 (rate-limited). */ + throttle?: Iterable; + /** Override canned results per method (return a hex string). */ + results?: Record; +} + +const DEFAULT_RESULTS: Record = { + eth_chainId: CHAIN_ID_HEX, + eth_blockNumber: '0x10', + eth_getCode: '0x1234', + eth_call: '0x' + '00'.repeat(32), + eth_sendRawTransaction: '0x' + '11'.repeat(32), + eth_getTransactionReceipt: '', // '' → null result (receipt not yet mined) +}; + +/** + * Start a loopback JSON-RPC server. Methods in `throttle` answer HTTP 429; + * everything else returns a canned OK result. `eth_chainId`/`eth_blockNumber` + * are always answerable (unless throttled) so a healthy endpoint satisfies + * ethers' network detection. + */ +export async function startLoopbackRpc(options: LoopbackOptions = {}): Promise { + const throttle = new Set(options.throttle ?? []); + const results = { ...DEFAULT_RESULTS, ...(options.results ?? {}) }; + const counts = new Map(); + + const server = createServer((req, res) => { + let raw = ''; + req.on('data', (c) => { raw += c; }); + req.on('end', () => { + let body: unknown; + try { body = JSON.parse(raw); } catch { body = {}; } + const reqs = (Array.isArray(body) ? body : [body]) as Array<{ id: number; method: string }>; + let throttled = false; + const out: unknown[] = []; + for (const r of reqs) { + counts.set(r.method, (counts.get(r.method) ?? 0) + 1); + if (throttle.has(r.method)) { throttled = true; continue; } + const result = r.method in results ? results[r.method] : '0x'; + out.push({ jsonrpc: '2.0', id: r.id, result: result === '' ? null : result }); + } + if (throttled) { + res.writeHead(429, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + jsonrpc: '2.0', + id: reqs[0]?.id ?? null, + error: { code: -32005, message: 'rate limited' }, + })); + return; + } + res.setHeader('content-type', 'application/json'); + res.end(JSON.stringify(Array.isArray(body) ? out : out[0])); + }); + }); + + await new Promise((resolve) => server.listen(0, '127.0.0.1', () => resolve())); + const addr = server.address() as AddressInfo; + return { + url: `http://127.0.0.1:${addr.port}`, + server, + hits: (method) => counts.get(method) ?? 0, + totalHits: () => [...counts.values()].reduce((a, b) => a + b, 0), + close: async () => { + server.closeAllConnections?.(); + await new Promise((resolve) => server.close(() => resolve())); + }, + }; +} diff --git a/packages/chain/test/mock-adapter-parity.test.ts b/packages/chain/test/mock-adapter-parity.test.ts index 1e261bc47a..c0b803c856 100644 --- a/packages/chain/test/mock-adapter-parity.test.ts +++ b/packages/chain/test/mock-adapter-parity.test.ts @@ -116,6 +116,15 @@ const MOCK_EXEMPT_FROM_EVM = new Set([ 'getTransactionReceiptWithFailover', 'waitForReceiptWithFailover', 'signPopulatedTransaction', + // R1 immediate-RPC-failover read/populate plumbing: the per-provider read + // failover loop, its contract-view wrapper, the `.connect()` runner helper, + // and the populate+sign-across-providers loop. Protected EVM-only helpers over + // `this.providers[]` (the mock has no RPC provider pool), not ChainAdapter + // contract methods — same category as the write-failover helpers above. + 'readWithFailover', + 'contractReadWithFailover', + 'withRunner', + 'populateAndSignAcrossProviders', 'sendSignedTransactionAndWait', 'sendPopulatedTransaction', 'sendContractTransaction', diff --git a/packages/chain/test/multi-rpc-provider-shape.test.ts b/packages/chain/test/multi-rpc-provider-shape.test.ts index 9fb577a374..98007b6b72 100644 --- a/packages/chain/test/multi-rpc-provider-shape.test.ts +++ b/packages/chain/test/multi-rpc-provider-shape.test.ts @@ -7,9 +7,14 @@ const PK = '0x' + '1'.repeat(64); const HUB = '0x0000000000000000000000000000000000000001'; // Constructing the adapter is offline (providers are lazy / never dialled), so -// these assertions need no live RPC — they exercise the backwards-compat split -// in the constructor: 1 endpoint => bare JsonRpcProvider (identical to the -// pre-multi-RPC path), >1 endpoint => FallbackProvider failover. +// these assertions need no live RPC — they exercise the constructor's provider +// topology: 1 endpoint => bare JsonRpcProvider (identical to the pre-multi-RPC +// path); >1 endpoint => N bare JsonRpcProviders in `this.providers[]` with the +// bare PRIMARY exposed as the read provider. R1 removed the ethers +// FallbackProvider — reads fail over EXPLICITLY via `readWithFailover` over +// `this.providers[]` (the immediate-failover behaviour itself is covered by +// multi-rpc-read-failover.test.ts, so that coverage is NOT dropped here, only +// the now-removed FallbackProvider topology assertion is updated). describe('multi-RPC provider shape (backwards compatibility)', () => { it('a single rpcUrl yields a bare JsonRpcProvider (no FallbackProvider)', () => { const a = new EVMChainAdapter({ @@ -24,7 +29,7 @@ describe('multi-RPC provider shape (backwards compatibility)', () => { expect(a.getRpcUrls()).toEqual(['http://127.0.0.1:1']); }); - it('multiple rpcUrls yield a FallbackProvider over all endpoints (primary first)', () => { + it('multiple rpcUrls build a bare-primary read provider + N providers for readWithFailover (no FallbackProvider)', () => { const a = new EVMChainAdapter({ rpcUrl: 'http://127.0.0.1:1', rpcUrls: ['http://127.0.0.1:2', 'http://127.0.0.1:3'], @@ -32,8 +37,19 @@ describe('multi-RPC provider shape (backwards compatibility)', () => { privateKey: PK, allowNoAdminSigner: true, }); - expect(a.getReadProvider()).toBeInstanceOf(FallbackProvider); + // R1: getReadProvider() is the bare PRIMARY JsonRpcProvider — the + // FallbackProvider is gone; reads fail over explicitly via readWithFailover. + const read = a.getReadProvider(); + expect(read).toBeInstanceOf(JsonRpcProvider); + expect(read).not.toBeInstanceOf(FallbackProvider); + // All endpoints stay configured, primary first — the failover topology now + // lives in this.providers[] (one bare JsonRpcProvider per endpoint), which + // readWithFailover iterates. expect(a.getRpcUrls()).toEqual(['http://127.0.0.1:1', 'http://127.0.0.1:2', 'http://127.0.0.1:3']); + const providers = (a as unknown as { providers: unknown[] }).providers; + expect(providers).toHaveLength(3); + expect(providers.every((p) => p instanceof JsonRpcProvider)).toBe(true); + expect(providers.some((p) => p instanceof FallbackProvider)).toBe(false); }); it('dedupes a backup that repeats the primary (no redundant provider)', () => { diff --git a/packages/chain/test/multi-rpc-read-failover.test.ts b/packages/chain/test/multi-rpc-read-failover.test.ts new file mode 100644 index 0000000000..08e1354936 --- /dev/null +++ b/packages/chain/test/multi-rpc-read-failover.test.ts @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * T1 (immediate-RPC-failover): REAL multi-RPC READ failover over loopback + * JSON-RPC servers — the regression gate the existing suite cannot provide. + * + * Why this file exists: the ~170 adapter failover tests inject bare-object + * provider mocks (`(a as any).providers = [...]`) that bypass JsonRpcProvider / + * FetchRequest / the read path entirely, AND they only cover WRITES. The READ + * failover path (today: ethers `FallbackProvider`; after R1: an explicit + * `readWithFailover` loop over the bare providers) has ZERO real coverage. + * Empirically the current FallbackProvider read path does NOT reliably fail over + * on a fast 429 (it advances only on a 4s STALL; even at the default retry + * budget it can surface the primary's error with a healthy backup) — so R1 is a + * correctness fix, not just a latency win, and it needs a REAL-provider test. + * + * Harness: `startLoopbackRpc` (node:http, no Hardhat / native deps). Teardown + * MUST destroy() each adapter and close() each server (closeAllConnections) — a + * perpetual 429 keeps ethers retrying on keep-alive sockets after the call + * rejects, which otherwise hangs afterEach past vitest's hook timeout. + * + * The CONTROL tests (healthy endpoints) are GREEN today and prove the harness + + * the real-provider-over-loopback adapter works. The `describe.skip` TARGET + * blocks specify the immediate-failover behaviour R1 delivers. + * + * ── UN-SKIP PROTOCOL (HARD S1-ACCEPTANCE GATE, lead-mandated 2026-06-25) ── + * Trigger: the moment ChainEngineer signals reads (getEvmChainId / Hub + * getContractAddress / resolveContract / contract-views) route through + * `readWithFailover` over the bare `this.providers[]` with per-endpoint + * retries = 0 for multi-RPC, flip ALL `describe.skip` TARGET blocks below to + * live `describe`. They MUST then be GREEN — S1 is NOT complete until these are + * live-green (lead + TxSafetyReviewer require it at S1 sign-off; the skip is + * provably temporary, not optional). Kept skip ONLY while S1 is mid-flight so a + * transient red from an unrelated in-progress edit can't mask a real regression. + * VERIFIED 2026-06-25: with the targets un-skipped against ChainEngineer's S1 + * working tree these PASS (read 5/5, write 3/3) — the gate flips RED→GREEN + * exactly as designed (current code: getEvmChainId rejects SERVER_ERROR in ~64ms + * with a healthy backup = no failover). Do NOT weaken the assertions. + */ +import { describe, it, expect, afterEach, beforeEach } from 'vitest'; +import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; +import { startLoopbackRpc, type LoopbackRpc } from './loopback-rpc-harness.js'; +import { getRpcFailoverStats, _resetRpcFailoverStatsForTest } from '../src/rpc-failover-log.js'; + +const DEPLOYER_PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; +const HUB = '0x0000000000000000000000000000000000000001'; + +function minimalConfig(overrides: Partial = {}): EVMAdapterConfig { + return { + rpcUrl: 'http://127.0.0.1:1', + privateKey: DEPLOYER_PK, + hubAddress: HUB, + chainId: 'evm:31337', + allowNoAdminSigner: true, + ...overrides, + }; +} + +describe('multi-RPC read failover (real loopback providers)', () => { + const adapters: EVMChainAdapter[] = []; + const servers: LoopbackRpc[] = []; + function track(a: EVMChainAdapter): EVMChainAdapter { adapters.push(a); return a; } + function trackServer(s: LoopbackRpc): LoopbackRpc { servers.push(s); return s; } + + afterEach(async () => { + // Stop ethers' background retry loop / idle sockets FIRST, then close the + // servers, so close() resolves promptly instead of hanging on an in-flight + // 429 retry (the flaky-CI failure mode). + for (const a of adapters.splice(0)) { + try { a.destroy(); } catch { /* destroy() is idempotent */ } + } + for (const s of servers.splice(0)) await s.close(); + }); + + // ── CONTROL (GREEN today): the harness + real-provider adapter works ── + + it('control: a healthy 2-RPC adapter resolves chainId over real loopback providers', async () => { + const primary = trackServer(await startLoopbackRpc()); + const backup = trackServer(await startLoopbackRpc()); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + const start = Date.now(); + await expect(a.getEvmChainId()).resolves.toBe(31337n); + expect(Date.now() - start).toBeLessThan(10_000); + // Two distinct endpoints really were wired (not deduped to one). + expect(a.getRpcUrls()).toEqual([primary.url, backup.url]); + }); + + it('control: a single-RPC adapter resolves chainId over a real loopback provider', async () => { + const only = trackServer(await startLoopbackRpc()); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: only.url }))); + await expect(a.getEvmChainId()).resolves.toBe(31337n); + expect(only.hits('eth_chainId')).toBeGreaterThanOrEqual(1); + }); + + // ── TARGET (immediate failover) ────────────────────────────────────────── + // UN-SKIP when R1 routes reads through `readWithFailover` over the bare + // providers AND multi-RPC sets per-endpoint retries = 0. On the CURRENT code + // these are RED (the FallbackProvider read path does not immediately fail over + // a fast 429), which is exactly the regression this gate locks. Verified + // entrypoint: getEvmChainId() -> this.provider.getNetwork() (a "direct + // this.provider.*" read in the locked read surface). + // GATE-HAS-TEETH CHECK (QA, 2026-06-25): with this block un-skipped against the + // current code, getEvmChainId() REJECTS with SERVER_ERROR in ~64ms while the + // backup is healthy — i.e. no failover — so the first test fails exactly as a + // regression gate should. It must flip GREEN once R1 lands; do not weaken it. + describe('TARGET — immediate read failover (R1)', () => { + it('primary 429 on the read → served by the healthy backup, primary hit exactly once (no per-endpoint retry)', async () => { + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_chainId'] })); + const backup = trackServer(await startLoopbackRpc()); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + const start = Date.now(); + // The read is served by the backup despite the primary 429ing it. + await expect(a.getEvmChainId()).resolves.toBe(31337n); + const elapsed = Date.now() - start; + + // Immediate: the primary's eth_chainId is attempted exactly ONCE (no 5× + // same-endpoint backoff before failing over) and the whole read is fast. + expect(primary.hits('eth_chainId')).toBe(1); + expect(backup.hits('eth_chainId')).toBeGreaterThanOrEqual(1); + expect(elapsed).toBeLessThan(2_000); + }); + + it('all endpoints 429 → surfaces RPC_ENDPOINTS_EXHAUSTED (retryable), one attempt per endpoint', async () => { + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_chainId'] })); + const backup = trackServer(await startLoopbackRpc({ throttle: ['eth_chainId'] })); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + const start = Date.now(); + await expect(a.getEvmChainId()).rejects.toMatchObject({ code: 'RPC_ENDPOINTS_EXHAUSTED' }); + expect(Date.now() - start).toBeLessThan(3_000); + // Exactly one attempt per endpoint per pass (no 5× per-endpoint retry). + expect(primary.hits('eth_chainId')).toBe(1); + expect(backup.hits('eth_chainId')).toBe(1); + }); + }); + + // Direct unit of the new read-failover primitive. UN-SKIP when + // `readWithFailover` exists. Asserts the loop advances on a retryable error + // and serves from the next provider — the read-side mirror of the write loops. + describe('TARGET — readWithFailover primitive (R1)', () => { + it('advances to the next provider on a retryable error and serves its result', async () => { + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_getCode'] })); + const backup = trackServer(await startLoopbackRpc()); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + const readWithFailover = (a as unknown as { + readWithFailover: (label: string, fn: (p: { getCode: (addr: string) => Promise }) => Promise) => Promise; + }).readWithFailover; + + const code = await readWithFailover.call(a, 'getCode', (p) => p.getCode(HUB)); + expect(code).toBe('0x1234'); + expect(primary.hits('eth_getCode')).toBe(1); + expect(backup.hits('eth_getCode')).toBeGreaterThanOrEqual(1); + }); + }); + + // T6: read failover is OBSERVABLE and HOST-ONLY (no key leak) + bumps the + // process-wide /api/status counters. The rpc-failover-log unit test already + // proves rpcHost/noteRpcFailover are host-only in isolation; this proves the + // READ failover path actually routes through that logger end-to-end — the + // read-side W3 observability the old code lacked (logging was "WRITE failover + // only"). Part of the same hard S1-acceptance gate (un-skip with the others). + describe('TARGET — read failover observability is host-only (R1, T6)', () => { + beforeEach(() => { _resetRpcFailoverStatsForTest(); }); + afterEach(() => { _resetRpcFailoverStatsForTest(); }); + + it('a real read failover logs HOST-ONLY (no URL/key) and increments the failover counter', async () => { + // Primary URL carries a fake API-key path; the host-only logger must never + // emit it. The loopback server ignores the path and 429s the read. + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_chainId'] })); + const backup = trackServer(await startLoopbackRpc()); + const primaryWithKey = `${primary.url}/v1/SECRET-API-KEY-abc123`; + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primaryWithKey, rpcUrls: [backup.url] }))); + + const warnings: string[] = []; + const origWarn = console.warn; + console.warn = ((...args: unknown[]) => { warnings.push(String(args[0])); }) as typeof console.warn; + try { + await expect(a.getEvmChainId()).resolves.toBe(31337n); + } finally { + console.warn = origWarn; + } + + // A failover line was emitted, host-only: contains the loopback host but + // NEVER the scheme or the API key embedded in the configured URL. + const failoverLines = warnings.filter((w) => w.includes('RPC failover')); + expect(failoverLines.length).toBeGreaterThanOrEqual(1); + const joined = failoverLines.join('\n'); + expect(joined).toContain('127.0.0.1'); + expect(joined).not.toContain('SECRET-API-KEY'); + expect(joined).not.toContain('://'); + + // Process-wide /api/status counters recorded the read failover, host-only. + const stats = getRpcFailoverStats(); + expect(stats.failovers).toBeGreaterThanOrEqual(1); + const hosts = Object.keys(stats.byEndpointHost); + expect(hosts.some((h) => h.startsWith('127.0.0.1'))).toBe(true); + expect(hosts.every((h) => !h.includes('SECRET-API-KEY') && !h.includes('://'))).toBe(true); + }); + }); +}); diff --git a/packages/chain/test/multi-rpc-write-failover.test.ts b/packages/chain/test/multi-rpc-write-failover.test.ts new file mode 100644 index 0000000000..1cf3f731e3 --- /dev/null +++ b/packages/chain/test/multi-rpc-write-failover.test.ts @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * INV-7 (writes): REAL multi-RPC WRITE failover over loopback JSON-RPC servers + * — the write-side mirror of T1, closing the same false-green. + * + * The ~170 existing write-failover tests inject bare-object providers + * (`(a as any).providers = [primary, backup]`) whose `broadcastTransaction` + * throws synchronously, so they prove the failover LOOP advances but can NEVER + * prove "no per-endpoint backoff": they bypass `boundedRetryFetchRequest` / + * `FetchRequest` entirely, so they pass identically whether the multi-RPC + * per-endpoint retry budget is 0 or 5. This drives a REAL `JsonRpcProvider`'s + * `broadcastTransaction` against loopback servers, so the failed primary's + * `eth_sendRawTransaction` is counted and "exactly once" actually means + * immediate failover. + * + * Note: ethers' `broadcastTransaction` re-parses the signed tx and asserts the + * node's returned hash equals the tx hash — so the accepting server must return + * the REAL hash of the signed tx (computed below), not a dummy. + * + * CONTROL (healthy primary) is GREEN today. The `describe.skip` TARGET is part of + * the HARD S1-acceptance gate (see multi-rpc-read-failover.test.ts header): + * UN-SKIP it the moment ChainEngineer wires multi-RPC provider construction to + * `boundedRetryFetchRequest(url, 0)`; it MUST then be GREEN (S1 not complete + * until live-green). On the current code (retry budget 5) the failed primary's + * broadcast is retried ~6× over ~7.5s before failover, so the "hit exactly once + * / bounded" assertions are RED today — exactly the immediate-failover + * regression this gate locks. VERIFIED 2026-06-25: un-skipped against S1 these + * PASS (3/3). Kept skip only while S1 is mid-flight. + */ +import { describe, it, expect, afterEach } from 'vitest'; +import { ethers } from 'ethers'; +import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; +import { startLoopbackRpc, type LoopbackRpc } from './loopback-rpc-harness.js'; + +const DEPLOYER_PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; +const HUB = '0x0000000000000000000000000000000000000001'; + +function minimalConfig(overrides: Partial = {}): EVMAdapterConfig { + return { + rpcUrl: 'http://127.0.0.1:1', + privateKey: DEPLOYER_PK, + hubAddress: HUB, + chainId: 'evm:31337', + allowNoAdminSigner: true, + ...overrides, + }; +} + +/** A real, offline-signed legacy tx + its canonical hash (no RPC contact). */ +async function signTx(): Promise<{ signedTx: string; txHash: string }> { + const wallet = new ethers.Wallet(DEPLOYER_PK); + const signedTx = await wallet.signTransaction({ + to: HUB, + nonce: 0, + gasLimit: 21_000, + gasPrice: 1_000_000_000, + value: 0, + chainId: 31337, + }); + return { signedTx, txHash: ethers.Transaction.from(signedTx).hash! }; +} + +describe('multi-RPC write failover (real loopback providers)', () => { + const adapters: EVMChainAdapter[] = []; + const servers: LoopbackRpc[] = []; + function track(a: EVMChainAdapter): EVMChainAdapter { adapters.push(a); return a; } + function trackServer(s: LoopbackRpc): LoopbackRpc { servers.push(s); return s; } + + afterEach(async () => { + for (const a of adapters.splice(0)) { + try { a.destroy(); } catch { /* idempotent */ } + } + for (const s of servers.splice(0)) await s.close(); + }); + + // ── CONTROL (GREEN today): real broadcast over a healthy loopback provider ── + it('control: broadcasts a real signed tx to a healthy primary (no failover)', async () => { + const { signedTx, txHash } = await signTx(); + const primary = trackServer(await startLoopbackRpc({ results: { eth_sendRawTransaction: txHash } })); + const backup = trackServer(await startLoopbackRpc({ results: { eth_sendRawTransaction: txHash } })); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + await expect( + (a as any).broadcastSignedTransactionWithFailover(signedTx, txHash, 'unit write'), + ).resolves.toBeUndefined(); + expect(primary.hits('eth_sendRawTransaction')).toBe(1); + expect(backup.hits('eth_sendRawTransaction')).toBe(0); // healthy primary → never reached + }); + + // ── TARGET (immediate failover): un-skip after retries=0 on write providers ── + // RED today (retry budget 5 → primary broadcast retried ~6× over ~7.5s before + // failover); GREEN once multi-RPC providers are built with + // boundedRetryFetchRequest(url, 0). The SAME signed tx is re-broadcast to the + // backup (idempotent, INV-4); the failed primary is hit exactly once (INV-7). + describe('TARGET — immediate write failover (retries=0, R1)', () => { + it('primary 429 on broadcast → SAME tx accepted by backup, primary hit exactly once, bounded', async () => { + const { signedTx, txHash } = await signTx(); + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_sendRawTransaction'] })); + const backup = trackServer(await startLoopbackRpc({ results: { eth_sendRawTransaction: txHash } })); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + const start = Date.now(); + await expect( + (a as any).broadcastSignedTransactionWithFailover(signedTx, txHash, 'unit write'), + ).resolves.toBeUndefined(); + const elapsed = Date.now() - start; + + // Immediate: the 429ing primary's eth_sendRawTransaction is attempted + // EXACTLY once (no 5× same-endpoint backoff) and the SAME raw tx is + // accepted by the backup. Bounded well under the 10s broadcast timeout. + expect(primary.hits('eth_sendRawTransaction')).toBe(1); + expect(backup.hits('eth_sendRawTransaction')).toBe(1); + expect(elapsed).toBeLessThan(2_000); + }); + + it('all endpoints 429 on broadcast → RPC_ENDPOINTS_EXHAUSTED, one attempt per endpoint', async () => { + const { signedTx, txHash } = await signTx(); + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_sendRawTransaction'] })); + const backup = trackServer(await startLoopbackRpc({ throttle: ['eth_sendRawTransaction'] })); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + await expect( + (a as any).broadcastSignedTransactionWithFailover(signedTx, txHash, 'unit write'), + ).rejects.toMatchObject({ code: 'RPC_ENDPOINTS_EXHAUSTED' }); + expect(primary.hits('eth_sendRawTransaction')).toBe(1); + expect(backup.hits('eth_sendRawTransaction')).toBe(1); + }); + }); +}); diff --git a/packages/chain/test/readwithfailover-loop.unit.test.ts b/packages/chain/test/readwithfailover-loop.unit.test.ts new file mode 100644 index 0000000000..af30b93a60 --- /dev/null +++ b/packages/chain/test/readwithfailover-loop.unit.test.ts @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Read-failover LOOP LOGIC for R1's `readWithFailover` — the read-side mirror of + * the write-loop unit tests in evm-adapter.unit.test.ts. Drives the loop with + * bare-object `(a as any).providers = [...]` mocks (the same style the write + * loops use): cheap, fast, no HTTP server, exercises advance-on-retryable / + * exhaustion / single-RPC / non-retryable-throws / host-only logging. + * + * SCOPE NOTE (deliberate): bare-object mocks prove the loop CONTROL FLOW but NOT + * the "immediate / no per-endpoint backoff" property — they reject synchronously, + * bypassing boundedRetryFetchRequest / FetchRequest, so they pass identically + * whether per-endpoint retries is 0 or 5. The retries=0 IMMEDIACY guarantee is + * proven separately with REAL providers in multi-rpc-read-failover.test.ts. This + * file is necessary-but-not-sufficient on its own; the two together are the net. + */ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; +import { isChainRpcTransportError } from '../src/chain-rpc-transport-error.js'; +import { getRpcFailoverStats, _resetRpcFailoverStatsForTest } from '../src/rpc-failover-log.js'; +import { RPC_LOG_SCAN_TIMEOUT_MS, RPC_READ_STALL_TIMEOUT_MS } from '../src/evm-adapter-constants.js'; + +const PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; +const HUB = '0x0000000000000000000000000000000000000001'; + +function recorder(impl: (...args: A) => R) { + const calls: A[] = []; + const fn = (...args: A): R => { calls.push(args); return impl(...args); }; + return Object.assign(fn, { calls }); +} + +function minimalConfig(overrides: Partial = {}): EVMAdapterConfig { + return { + rpcUrl: 'https://primary.example', + privateKey: PK, + hubAddress: HUB, + chainId: 'evm:31337', + allowNoAdminSigner: true, + ...overrides, + }; +} + +// The constructor builds REAL JsonRpcProviders over the (fake) URLs + wires +// error-listener network detection; destroy() them immediately so no socket / +// detection promise lingers ("Vite server not exiting" → flaky CI exit 1), THEN +// override `this.providers` with the bare-object mocks below. +function freshAdapter(cfg: EVMAdapterConfig): EVMChainAdapter { + const a = new EVMChainAdapter(cfg); + try { a.destroy(); } catch { /* idempotent; never dialled */ } + return a; +} + +const retryable429 = () => { const e = new Error('429 too many requests'); (e as any).status = 429; return e; }; +const callExceptionErr = () => { const e = new Error('execution reverted'); (e as any).code = 'CALL_EXCEPTION'; return e; }; + +// readWithFailover is protected; reach it via the same `as any` convention the +// rest of the chain suite uses. `fn` receives the bare provider; we give each +// mock provider a single `read()` method. +function readWithFailover(a: EVMChainAdapter, fn: (p: any) => Promise, label = 'unit read'): Promise { + return (a as any).readWithFailover(label, fn); +} + +describe('readWithFailover — read-failover loop logic (bare-mock, R1)', () => { + beforeEach(() => { _resetRpcFailoverStatsForTest(); }); + afterEach(() => { _resetRpcFailoverStatsForTest(); }); + + it('advances to the next provider on a retryable error and serves its result', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const primary = { read: recorder(async () => { throw retryable429(); }) }; + const backup = { read: recorder(async () => 'served-by-backup') }; + (a as any).providers = [primary, backup]; + + await expect(readWithFailover(a, (p) => p.read())).resolves.toBe('served-by-backup'); + expect(primary.read.calls).toHaveLength(1); + expect(backup.read.calls).toHaveLength(1); + }); + + it('exhausts ALL endpoints → ChainRpcTransportError RPC_ENDPOINTS_EXHAUSTED, one attempt each', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const primary = { read: recorder(async () => { throw retryable429(); }) }; + const backup = { read: recorder(async () => { throw retryable429(); }) }; + (a as any).providers = [primary, backup]; + + let thrown: any; + try { await readWithFailover(a, (p) => p.read()); } catch (e) { thrown = e; } + expect(thrown).toMatchObject({ code: 'RPC_ENDPOINTS_EXHAUSTED', rpcUrls: ['https://primary.example', 'https://backup.example'] }); + expect(isChainRpcTransportError(thrown)).toBe(true); + // HOST-ONLY aggregate message: names hosts, never the full https:// URL. + expect(thrown.message).toContain('primary.example'); + expect(thrown.message).not.toContain('https://'); + expect(primary.read.calls).toHaveLength(1); + expect(backup.read.calls).toHaveLength(1); + }); + + it('single-RPC: a retryable failure still stamps RPC_ENDPOINTS_EXHAUSTED but keeps the original message verbatim', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://only.example' })); + const only = { read: recorder(async () => { throw new Error('connect ECONNREFUSED 127.0.0.1:8545'); }) }; + (a as any).providers = [only]; + + let thrown: any; + try { await readWithFailover(a, (p) => p.read()); } catch (e) { thrown = e; } + expect(thrown).toMatchObject({ code: 'RPC_ENDPOINTS_EXHAUSTED' }); + // No second endpoint → message stays byte-identical (no "all endpoints" aggregate). + expect(thrown.message).toBe('connect ECONNREFUSED 127.0.0.1:8545'); + expect(thrown.message).not.toContain('all configured RPC endpoints'); + expect(only.read.calls).toHaveLength(1); + }); + + it('does NOT fail over a deterministic non-retryable error (CALL_EXCEPTION) — throws it, backup untouched', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const err = callExceptionErr(); + const primary = { read: recorder(async () => { throw err; }) }; + const backup = { read: recorder(async () => 'should-not-be-reached') }; + (a as any).providers = [primary, backup]; + + await expect(readWithFailover(a, (p) => p.read())).rejects.toBe(err); + expect(backup.read.calls).toEqual([]); + }); + + it('logs each failover hop HOST-ONLY and bumps the process-wide counters', async () => { + const a = freshAdapter(minimalConfig({ + rpcUrl: 'https://primary.example/v1/SECRET-KEY', rpcUrls: ['https://backup.example'], + })); + const primary = { read: recorder(async () => { throw retryable429(); }) }; + const backup = { read: recorder(async () => 'ok') }; + (a as any).providers = [primary, backup]; + + const warnings: string[] = []; + const origWarn = console.warn; + console.warn = ((...args: unknown[]) => { warnings.push(String(args[0])); }) as typeof console.warn; + try { + await expect(readWithFailover(a, (p) => p.read())).resolves.toBe('ok'); + } finally { + console.warn = origWarn; + } + + const failoverLines = warnings.filter((w) => w.includes('RPC failover')); + expect(failoverLines.length).toBeGreaterThanOrEqual(1); + const joined = failoverLines.join('\n'); + expect(joined).toContain('primary.example'); + expect(joined).not.toContain('SECRET-KEY'); + expect(joined).not.toContain('://'); + + const stats = getRpcFailoverStats(); + expect(stats.failovers).toBeGreaterThanOrEqual(1); + expect(Object.keys(stats.byEndpointHost).every((h) => !h.includes('SECRET-KEY') && !h.includes('://'))).toBe(true); + }); +}); + +// Regression for the log-scan cap fix (adversarial-review find): the 4s +// point-read cap was aborting WIDE events.ts queryFilter/getLogs reads (9000-block +// poller ranges legitimately >4s) → threw RPC_ENDPOINTS_EXHAUSTED before the +// publisher poller advanced its cursor → permanent stall. Fix: +// `multiAttemptTimeoutMs` (caps MULTI-RPC attempts only) + RPC_LOG_SCAN_TIMEOUT_MS +// (30s) on the 9 events.ts reads; SINGLE-RPC stays uncapped (#894 — nothing to +// fail over to). Fake timers throughout — no real 5s/30s sleeps. +describe('readWithFailover — per-attempt cap (log-scan stall fix)', () => { + // A read whose promise resolves after `ms` of (fake) time. + const delayedRead = (ms: number, value: string) => + recorder(() => new Promise((resolve) => { setTimeout(() => resolve(value), ms); })); + + afterEach(() => { vi.useRealTimers(); }); + + it('MULTI-RPC: a wide read >4s but <30s COMPLETES on the primary with multiAttemptTimeoutMs (not aborted, no failover)', async () => { + vi.useFakeTimers(); + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const primary = { read: delayedRead(5_000, 'PRIMARY') }; + const backup = { read: recorder(async () => 'BACKUP') }; + (a as any).providers = [primary, backup]; + + const p = (a as any).readWithFailover('log scan', (pr: any) => pr.read(), { multiAttemptTimeoutMs: RPC_LOG_SCAN_TIMEOUT_MS }); + await vi.advanceTimersByTimeAsync(6_000); // past the 5s read, well under the 30s cap + expect(await p).toBe('PRIMARY'); + expect(primary.read.calls).toHaveLength(1); + expect(backup.read.calls).toEqual([]); // completed on the primary → backup never consulted + }); + + it('MULTI-RPC: the SAME wide read under the DEFAULT point-read cap aborts at ~4s and fails over (proves the cap matters)', async () => { + vi.useFakeTimers(); + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const primary = { read: delayedRead(5_000, 'PRIMARY') }; + const backup = { read: recorder(async () => 'BACKUP') }; + (a as any).providers = [primary, backup]; + + const p = (a as any).readWithFailover('point read', (pr: any) => pr.read()); // no opt → RPC_READ_STALL_TIMEOUT_MS (4s) + await vi.advanceTimersByTimeAsync(RPC_READ_STALL_TIMEOUT_MS + 1_500); // primary times out at 4s → fail over + expect(await p).toBe('BACKUP'); + expect(primary.read.calls).toHaveLength(1); + expect(backup.read.calls).toHaveLength(1); + }); + + it('SINGLE-RPC: multiAttemptTimeoutMs NEVER caps — a >30s healthy read still completes, no abort (#894)', async () => { + vi.useFakeTimers(); + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://only.example' })); // single endpoint + const only = { read: delayedRead(35_000, 'ONLY') }; + (a as any).providers = [only]; + + const p = (a as any).readWithFailover('log scan', (pr: any) => pr.read(), { multiAttemptTimeoutMs: RPC_LOG_SCAN_TIMEOUT_MS }); + await vi.advanceTimersByTimeAsync(36_000); // would exceed the 30s cap IF it applied to single-RPC + expect(await p).toBe('ONLY'); // uncapped → completes + expect(only.read.calls).toHaveLength(1); + }); + + it('precedence: attemptTimeoutMs caps EVEN single-RPC (fail-open funding reads) — an over-budget read aborts → exhausted', async () => { + vi.useFakeTimers(); + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://only.example' })); + const only = { read: delayedRead(5_000, 'ONLY') }; + (a as any).providers = [only]; + + const settled = (a as any).readWithFailover('funding', (pr: any) => pr.read(), { attemptTimeoutMs: 1_000 }) + .then((r: unknown) => r, (e: unknown) => e); + await vi.advanceTimersByTimeAsync(1_500); // exceeds the 1s hard cap → aborts; single → exhausted + const outcome: any = await settled; + expect(outcome.code).toBe('RPC_ENDPOINTS_EXHAUSTED'); + expect(only.read.calls).toHaveLength(1); + }); +}); diff --git a/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts b/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts index cf7d13969f..b70167fade 100644 --- a/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts +++ b/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts @@ -46,9 +46,14 @@ function makeStubbedAdapter(opts: { }) { const a = new EVMChainAdapter(minimalConfig()); (a as any).initialized = true; - (a as any).provider = { - getNetwork: async () => ({ chainId: TEST_CHAIN_ID }), - }; + // R1: getEvmChainId reads chainId via readWithFailover over this.providers[0] + // (=== this.provider in prod). Set the mock on BOTH so the digest's chainId + // read resolves to the stub instead of dialling the placeholder RPC. (The + // contract stubs below have no `.connect`, so contractReadWithFailover's + // withRunner uses them directly — no change needed there.) + const provider = { getNetwork: async () => ({ chainId: TEST_CHAIN_ID }) }; + (a as any).provider = provider; + (a as any).providers = [provider]; (a as any).contracts.knowledgeAssetsLifecycle = { getAddress: async () => KAV10_ADDRESS, }; diff --git a/packages/chain/test/write-tx-safety-invariants.unit.test.ts b/packages/chain/test/write-tx-safety-invariants.unit.test.ts new file mode 100644 index 0000000000..7fe6939379 --- /dev/null +++ b/packages/chain/test/write-tx-safety-invariants.unit.test.ts @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Write-path TX-SAFETY invariants (TxSafetyReviewer's contract for the + * immediate-failover change). The existing ~170 adapter failover tests inject + * bare-object providers that bypass the real broadcast/receipt path — a FALSE + * GREEN for tx-safety. These drive the REAL + * `dispatchSerializedV10Write → sendSignedTransactionAndWait → + * broadcastSignedTransactionWithFailover / waitForReceiptWithFailover` path + * (the seam where the S2 set-retry will live), stubbing ONLY the providers' + * `broadcastTransaction`/`getTransactionReceipt`, so the sign / WAL call-counts + * are observable. This is the evm-adapter-nonce-serialization seam MINUS its + * `sendSignedTransactionAndWait` mock, so the failover (and future set-retry) is + * actually exercised. + * + * NOW (current write path): INV-1 single-sign across a broadcast-failover sweep, + * INV-3 WAL-once + ordering, INV-4 broadcast idempotency, INV-5 + * reverted-receipt-no-resubmit + non-retryable-no-failover. + * + * STAGED (describe.skip → un-skip when ChainEngineer's S2 set-retry lands inside + * sendSignedTransactionAndWait): INV-1/2/3 across the set-retry MULTI-PASS, INV-6 + * nonce/lock under set-retry. STAGED for S3 breaker: INV-8. The set-retry-multi + * assertions (buildSignedTx==1 / onBroadcast==1 regardless of pass count) are the + * headline regressions — they have teeth only once the set-retry exists. + */ +import { describe, it, expect, vi } from 'vitest'; +import { ethers } from 'ethers'; +import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; +import { RPC_ENDPOINT_SET_RETRIES, RPC_ENDPOINT_SET_RETRY_BACKOFF_MS } from '../src/evm-adapter-constants.js'; + +const PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; +const HUB = '0x0000000000000000000000000000000000000001'; + +function recorder(impl: (...args: A) => R) { + const calls: A[] = []; + const fn = (...args: A): R => { calls.push(args); return impl(...args); }; + return Object.assign(fn, { calls }); +} + +function minimalConfig(overrides: Partial = {}): EVMAdapterConfig { + return { + rpcUrl: 'https://primary.example', + rpcUrls: ['https://backup.example'], + privateKey: PK, + hubAddress: HUB, + chainId: 'evm:31337', + allowNoAdminSigner: true, + ...overrides, + }; +} + +// The constructor builds REAL JsonRpcProviders over the (fake) URLs and wires +// error-listener network detection on them. We immediately destroy() those so +// no keep-alive socket / detection promise lingers ("Vite server not exiting" → +// intermittent CI exit 1), THEN the test overrides `this.providers` with stubs. +function freshAdapter(cfg: EVMAdapterConfig): EVMChainAdapter { + const a = new EVMChainAdapter(cfg); + try { a.destroy(); } catch { /* idempotent; never dialled */ } + return a; +} + +const SIGNED = '0xSIGNEDTX'; +const TXHASH = '0x' + '11'.repeat(32); +const fakeReceipt = (status: number) => + ({ hash: TXHASH, blockNumber: 1, index: 0, status, logs: [] }) as unknown as ethers.TransactionReceipt; +const retryable429 = () => { const e = new Error('429 too many requests'); (e as any).status = 429; return e; }; +const neverNull = (pre: string): never => { throw new Error(`unexpected null receipt for ${pre}`); }; + +// Drive the REAL dispatch → send → broadcast/receipt path. `buildSignedTx` and +// `onBroadcast` are spied; only the providers are stubbed. +async function runDispatch(a: EVMChainAdapter, opts: { + onBroadcast?: (info: { txHash: string }) => Promise | void; + buildSignedTx?: () => Promise<{ signedTx: string; txHash: string }>; +}) { + const signer = new ethers.Wallet(PK); + const buildSignedTx = opts.buildSignedTx ?? (async () => ({ signedTx: SIGNED, txHash: TXHASH })); + return (a as any).dispatchSerializedV10Write(signer, 'publish', opts.onBroadcast, buildSignedTx, neverNull); +} + +describe('write-path tx-safety invariants (real dispatch/broadcast/receipt path)', () => { + it('INV-1: signs EXACTLY once across a broadcast-failover sweep (#1 429 → #2 accepts)', async () => { + const a = freshAdapter(minimalConfig()); + const buildSignedTx = recorder(async () => ({ signedTx: SIGNED, txHash: TXHASH })); + const onBroadcast = recorder(async () => undefined); + const primary = { + broadcastTransaction: recorder(async () => { throw retryable429(); }), + getTransactionReceipt: recorder(async () => null), + }; + const backup = { + broadcastTransaction: recorder(async () => ({ hash: TXHASH })), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + (a as any).providers = [primary, backup]; + + const receipt = await runDispatch(a, { onBroadcast, buildSignedTx }); + expect(receipt.status).toBe(1); + // The signed tx is built ONCE even though broadcast failed over to #2. + expect(buildSignedTx.calls).toHaveLength(1); + expect(onBroadcast.calls).toHaveLength(1); + // The SAME signed raw tx was handed to both endpoints (no re-sign). + expect(primary.broadcastTransaction.calls).toEqual([[SIGNED]]); + expect(backup.broadcastTransaction.calls).toEqual([[SIGNED]]); + }); + + it('INV-3: WAL onBroadcast fires exactly once and STRICTLY before any broadcast', async () => { + const a = freshAdapter(minimalConfig({ rpcUrls: [] })); // single endpoint, happy path + const timeline: string[] = []; + const onBroadcast = recorder(async () => { timeline.push('wal'); }); + const only = { + broadcastTransaction: recorder(async () => { timeline.push('broadcast'); return { hash: TXHASH }; }), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + (a as any).providers = [only]; + + await runDispatch(a, { onBroadcast }); + expect(onBroadcast.calls).toHaveLength(1); + expect(timeline[0]).toBe('wal'); + expect(timeline.indexOf('wal')).toBeLessThan(timeline.indexOf('broadcast')); + }); + + it('INV-4: broadcast idempotency — #1 transient error → #2 "already known" → success, single signed tx', async () => { + const a = freshAdapter(minimalConfig()); + const buildSignedTx = recorder(async () => ({ signedTx: SIGNED, txHash: TXHASH })); + const primary = { + broadcastTransaction: recorder(async () => { throw retryable429(); }), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + const backup = { + broadcastTransaction: recorder(async () => { throw new Error('already known'); }), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + (a as any).providers = [primary, backup]; + + const receipt = await runDispatch(a, { buildSignedTx }); + expect(receipt.status).toBe(1); + // "already known" on #2 is treated as accepted — no error surfaced. + // Exactly one signed tx; the SAME raw tx hit both endpoints (no 2nd distinct tx). + expect(buildSignedTx.calls).toHaveLength(1); + expect(primary.broadcastTransaction.calls).toEqual([[SIGNED]]); + expect(backup.broadcastTransaction.calls).toEqual([[SIGNED]]); + }); + + it('INV-5a: a mined REVERTED receipt (status=0) throws CALL_EXCEPTION with NO resubmit', async () => { + const a = freshAdapter(minimalConfig({ rpcUrls: [] })); + const only = { + broadcastTransaction: recorder(async () => ({ hash: TXHASH })), + getTransactionReceipt: recorder(async () => fakeReceipt(0)), // mined, reverted + }; + (a as any).providers = [only]; + + await expect(runDispatch(a, {})).rejects.toMatchObject({ code: 'CALL_EXCEPTION' }); + // Broadcast happened once; the revert must NOT trigger a resubmit. + expect(only.broadcastTransaction.calls).toHaveLength(1); + }); + + it('INV-5b: a non-retryable build/sign error throws BEFORE any broadcast or WAL checkpoint', async () => { + const a = freshAdapter(minimalConfig()); + const onBroadcast = recorder(async () => undefined); + const buildSignedTx = recorder(async () => { + const e = new Error('insufficient funds for gas'); (e as any).code = 'INSUFFICIENT_FUNDS'; throw e; + }); + const primary = { + broadcastTransaction: recorder(async () => ({ hash: TXHASH })), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + (a as any).providers = [primary]; + + await expect(runDispatch(a, { onBroadcast, buildSignedTx })).rejects.toThrow(/insufficient funds/i); + // Fails closed: no WAL checkpoint, no broadcast. + expect(onBroadcast.calls).toEqual([]); + expect(primary.broadcastTransaction.calls).toEqual([]); + }); +}); + +// S2 set-retry lives INSIDE sendSignedTransactionAndWait (base:1073), wrapping +// broadcastSignedTransactionWithFailover ONLY, up to RPC_ENDPOINT_SET_RETRIES +// extra full passes with a fixed RPC_ENDPOINT_SET_RETRY_BACKOFF_MS sleep between +// (a non-injectable const → fake timers). It re-broadcasts the SAME +// {signedTx,txHash}; the seam is signer-free so re-sign is structurally +// impossible (INV-2). These are the headline tx-safety regressions. +describe('write-path tx-safety invariants — set-retry MULTI-PASS (S2)', () => { + // Spy broadcastSignedTransactionWithFailover (the PASS-count seam, per TxSafety: + // raw provider.broadcastTransaction is called providers.length× PER pass) while + // still running the real per-provider broadcast loop underneath. + function spyBroadcastPasses(a: EVMChainAdapter) { + const orig = (a as any).broadcastSignedTransactionWithFailover.bind(a); + const spy = recorder((...args: unknown[]) => orig(...args)); + (a as any).broadcastSignedTransactionWithFailover = spy; + return spy; + } + + it('INV-1/2/3 + C3: all-exhaust set-retry — signs once / WAL once / re-broadcasts the SAME tx for setRetries+1 passes', async () => { + vi.useFakeTimers(); + try { + const a = freshAdapter(minimalConfig()); + const buildSignedTx = recorder(async () => ({ signedTx: SIGNED, txHash: TXHASH })); + const onBroadcast = recorder(async () => undefined); + const throttled = () => ({ + broadcastTransaction: recorder(async (_raw: string) => { throw retryable429(); }), + getTransactionReceipt: recorder(async () => null), + }); + const p0 = throttled(); const p1 = throttled(); + (a as any).providers = [p0, p1]; + const broadcastPasses = spyBroadcastPasses(a); + + const settled = runDispatch(a, { onBroadcast, buildSignedTx }).then(() => 'ok', (e) => e); + await vi.advanceTimersByTimeAsync((RPC_ENDPOINT_SET_RETRIES + 1) * RPC_ENDPOINT_SET_RETRY_BACKOFF_MS + 50); + const outcome: any = await settled; + + expect(outcome.code).toBe('RPC_ENDPOINTS_EXHAUSTED'); + // PASS count = setRetries + 1 (one initial pass + the bounded retries). + expect(broadcastPasses.calls).toHaveLength(RPC_ENDPOINT_SET_RETRIES + 1); + expect(buildSignedTx.calls).toHaveLength(1); // INV-1/2: signed once across all passes + expect(onBroadcast.calls).toHaveLength(1); // INV-3: WAL fired once, never re-fired + // C3 (idempotency root): every broadcast got the byte-identical SIGNED tx. + const allRaw = [...p0.broadcastTransaction.calls, ...p1.broadcastTransaction.calls]; + expect(allRaw.length).toBeGreaterThan(1); + expect(allRaw.every((c) => c[0] === SIGNED)).toBe(true); + } finally { + vi.useRealTimers(); + } + }); + + it('INV-1/2: a later set-retry pass that succeeds yields the receipt with buildSignedTx still == 1', async () => { + vi.useFakeTimers(); + try { + const a = freshAdapter(minimalConfig()); + const buildSignedTx = recorder(async () => ({ signedTx: SIGNED, txHash: TXHASH })); + const onBroadcast = recorder(async () => undefined); + let attempt = 0; // shared across both providers: pass 0 = first 2 attempts (429), pass 1 accepts + const provider = () => ({ + broadcastTransaction: recorder(async (_raw: string) => { attempt += 1; if (attempt <= 2) throw retryable429(); return { hash: TXHASH }; }), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }); + (a as any).providers = [provider(), provider()]; + const broadcastPasses = spyBroadcastPasses(a); + + const settled = runDispatch(a, { onBroadcast, buildSignedTx }).then((r) => r, (e) => e); + await vi.advanceTimersByTimeAsync((RPC_ENDPOINT_SET_RETRIES + 1) * RPC_ENDPOINT_SET_RETRY_BACKOFF_MS + 50); + const receipt: any = await settled; + + expect(receipt.status).toBe(1); + expect(broadcastPasses.calls).toHaveLength(2); // pass 0 (exhaust) + pass 1 (accept) + expect(buildSignedTx.calls).toHaveLength(1); + expect(onBroadcast.calls).toHaveLength(1); + } finally { + vi.useRealTimers(); + } + }); + + it('INV-5/C2: a mined REVERTED receipt does NOT trigger a set-retry re-broadcast (broadcast pass == 1)', async () => { + const a = freshAdapter(minimalConfig({ rpcUrls: [] })); + const buildSignedTx = recorder(async () => ({ signedTx: SIGNED, txHash: TXHASH })); + const only = { + broadcastTransaction: recorder(async () => ({ hash: TXHASH })), // broadcast SUCCEEDS + getTransactionReceipt: recorder(async () => fakeReceipt(0)), // but mined REVERTED + }; + (a as any).providers = [only]; + const broadcastPasses = spyBroadcastPasses(a); + + await expect(runDispatch(a, { buildSignedTx })).rejects.toMatchObject({ code: 'CALL_EXCEPTION' }); + // The set-retry wraps the BROADCAST phase only; the reverted receipt comes + // from waitForReceiptWithFailover OUTSIDE the loop → broadcast fired ONCE, + // no resubmit of a tx that already executed-and-reverted on chain. + expect(broadcastPasses.calls).toHaveLength(1); + expect(only.broadcastTransaction.calls).toHaveLength(1); + expect(buildSignedTx.calls).toHaveLength(1); + }); + + it('INV-6: the per-wallet lock is HELD across set-retry passes — a concurrent same-wallet write cannot sign until the first resolves', async () => { + vi.useFakeTimers(); + try { + const a = freshAdapter(minimalConfig({ rpcUrls: [] })); + const signer = new ethers.Wallet(PK); + const events: string[] = []; + let attempt = 0; + const provider = { + // Write-1: 429 on its first broadcast (forces a set-retry backoff), accepts next. + broadcastTransaction: recorder(async () => { attempt += 1; if (attempt === 1) throw retryable429(); return { hash: TXHASH }; }), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + (a as any).providers = [provider]; + const w1build = recorder(async () => { events.push('w1:build'); return { signedTx: SIGNED, txHash: TXHASH }; }); + const w2build = recorder(async () => { events.push('w2:build'); return { signedTx: '0xW2', txHash: '0x' + '22'.repeat(32) }; }); + + const w1 = (a as any).dispatchSerializedV10Write(signer, 'publish', undefined, w1build, neverNull); + const w2 = (a as any).dispatchSerializedV10Write(signer, 'publish', undefined, w2build, neverNull); + + // Park write-1 mid-backoff: it has built + 429'd + is sleeping. Write-2 must + // NOT have started building (the per-wallet lock is held across the set-retry). + await vi.advanceTimersByTimeAsync(RPC_ENDPOINT_SET_RETRY_BACKOFF_MS / 2); + expect(w1build.calls).toHaveLength(1); + expect(w2build.calls).toHaveLength(0); + + await vi.advanceTimersByTimeAsync(RPC_ENDPOINT_SET_RETRY_BACKOFF_MS + 50); + await Promise.all([w1, w2]); + // Strict end-to-end serialization survives the set-retry. + expect(events).toEqual(['w1:build', 'w2:build']); + } finally { + vi.useRealTimers(); + } + }); +}); + +describe.skip('write-path tx-safety invariants — circuit breaker (S3, un-skip when breaker lands)', () => { + it('INV-8: with all hosts cooling-down, a WRITE still broadcasts to ≥1 endpoint (no zero-attempt exhaustion)', async () => { + // TODO(S3): mark every host cooling-down in the process-wide breaker, then a + // write must still attempt ≥1 broadcast (eligible-set never empties to a + // zero-attempt RPC_ENDPOINTS_EXHAUSTED). + expect(true).toBe(true); + }); +}); From df7dc3f48d408dbfe3a0275a282189aee7e451b5 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Fri, 26 Jun 2026 02:45:55 +0200 Subject: [PATCH 2/4] =?UTF-8?q?fix(chain):=20address=20PR=20#1335=20review?= =?UTF-8?q?=20=E2=80=94=20read-failover=20classifier,=20getReadProvider,?= =?UTF-8?q?=20withRunner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses 6 otReviewAgent comments (all read-path): - readWithFailover / contractReadWithFailover now take an optional `isRetryable` classifier (default isRetryableRpcError). getMaxKaNumberForAuthor's bespoke per-endpoint staticCall loop is replaced by a single readWithFailover call with an absent-view classifier — it now gets the per-attempt withTimeout it lacked (a hung primary fails over instead of hanging), with no copied loop/logging/exhaustion. The existing absent-view outer catch (scan / bytecode-confirm) is unchanged. [#1, #5] - contractReadWithFailover defaults to a new `isContractViewRetryable` (= isRetryableRpcError MINUS BAD_DATA): a contract-view BAD_DATA is a deterministic ABI/empty-decode error, not an RPC outage, so it now surfaces directly instead of failing over across endpoints and masking the real error as RPC_ENDPOINTS_EXHAUSTED (restores pre-PR FallbackProvider behavior). Scoped to view reads only; global isRetryableRpcError untouched (writes/cli/agent). [#2] - Removed the obsolete public getReadProvider() (zero callers; it returned the bare primary and silently bypassed multi-RPC read failover) + its mock-parity exemption + two mechanical test refs. [#3, #4] - Tightened withRunner: a concrete `.connect` probe (no cast-through-unknown / any on the production boundary) with the test-double passthrough as its own guarded branch. [#6] Tests: BAD_DATA-not-retryable-for-views (+ 429 contrast), opts.isRetryable precedence both ways, getMaxKaNumber hung-primary timeout-then-failover, and the preserved absent-view boundary. Chain suite 706 pass / 1 skip; full build green. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_012DGBSrmN6Y8Vsgb7x46amp --- packages/chain/src/evm-adapter-base.ts | 171 +++++++++++------- packages/chain/test/evm-adapter.unit.test.ts | 1 - .../test/getmaxkanumberforauthor.unit.test.ts | 45 ++++- .../chain/test/mock-adapter-parity.test.ts | 1 - .../test/multi-rpc-provider-shape.test.ts | 12 +- .../test/readwithfailover-loop.unit.test.ts | 69 +++++++ 6 files changed, 220 insertions(+), 79 deletions(-) diff --git a/packages/chain/src/evm-adapter-base.ts b/packages/chain/src/evm-adapter-base.ts index af6655464c..e50a98af44 100644 --- a/packages/chain/src/evm-adapter-base.ts +++ b/packages/chain/src/evm-adapter-base.ts @@ -307,6 +307,21 @@ async function contractAddress(contract: Contract): Promise { throw new Error('DKGKnowledgeAssets address is unavailable from the resolved contract handle.'); } +/** + * Failover classifier for CONTRACT VIEW reads (`contractReadWithFailover`'s + * default): the generic `isRetryableRpcError` transient set MINUS `BAD_DATA`. + * A view `BAD_DATA` ("could not decode result data") is a DETERMINISTIC + * client-side decode of an empty / wrong-shape return for the ABI type — not an + * RPC outage — so failing over would re-hit the same decode on every endpoint + * and mask it as `RPC_ENDPOINTS_EXHAUSTED`. The pre-PR FallbackProvider never + * failed over on a post-decode error; this restores that. (Direct provider reads + * — getCode/getBalance/getNetwork — never produce BAD_DATA, so they keep the + * unmodified `isRetryableRpcError`.) + */ +function isContractViewRetryable(err: unknown): boolean { + return isRetryableRpcError(err) && errorCode(err) !== 'BAD_DATA'; +} + export class EVMChainAdapterBase { /** See `ChainAdapter.deploymentId`. */ get deploymentId(): string { @@ -866,8 +881,21 @@ export class EVMChainAdapterBase { protected async readWithFailover( label: string, fn: (provider: JsonRpcProvider) => Promise, - opts?: { attemptTimeoutMs?: number; multiAttemptTimeoutMs?: number }, + opts?: { + attemptTimeoutMs?: number; + multiAttemptTimeoutMs?: number; + // Classifier for "should this error fail over to the next endpoint?". + // Defaults to the generic RPC-transient check; callers override it for + // reads whose error shapes carry domain meaning that must NOT be treated as + // a failover-worthy transient — e.g. a contract VIEW's `BAD_DATA` decode + // (deterministic, see `contractReadWithFailover`) or + // `getMaxKaNumberForAuthor`'s absent-view shapes. A non-retryable error is + // rethrown AT ONCE (the caller's own catch then classifies it), never + // failed over or masked as `RPC_ENDPOINTS_EXHAUSTED`. + isRetryable?: (err: unknown) => boolean; + }, ): Promise { + const isRetryable = opts?.isRetryable ?? isRetryableRpcError; let lastRetryable: unknown; for (let i = 0; i < this.providers.length; i += 1) { const isLast = i === this.providers.length - 1; @@ -889,7 +917,7 @@ export class EVMChainAdapterBase { ? attempt : withTimeout(attempt, capMs, `${label} via RPC #${i + 1}`)); } catch (err) { - if (!isRetryableRpcError(err)) throw err; + if (!isRetryable(err)) throw err; lastRetryable = err; if (!isLast) { noteRpcFailover(label, this.rpcUrls[i], err, this.rpcUrls[i + 1]); @@ -914,38 +942,64 @@ export class EVMChainAdapterBase { }); } + /** + * Reconnect a Contract / Wallet `handle` to `runner` so a per-endpoint read or + * populate executes against that runner (the failover mechanism). A production + * handle is always an ethers `Contract` / `Wallet`, both of which expose + * `.connect`, so it is rebound. A bare TEST DOUBLE may omit `.connect`; the + * guarded fallback — the ONLY branch that returns the handle un-rebound — + * passes the stub through so the read still executes against it. The + * `T extends Contract | Wallet` bound + the single explicit `connect` assertion + * (no cast through `unknown`) keep this off the arbitrary-`any` boundary. + */ + protected withRunner(handle: T, runner: JsonRpcProvider | Wallet): T { + // Production: an ethers Contract / Wallet — both expose `.connect`, which + // rebinds to `runner`. Their `connect()` return types (BaseContract / a + // wallet subtype) and param types (ContractRunner vs Provider) differ and are + // not statically `T`, so the handle is only PROBED for `.connect` (a concrete + // shape, NOT widened to `any` or cast through `unknown`) and the rebound + // result is asserted back to `T`. + const rebind = (handle as { connect?: (runner: never) => unknown }).connect; + if (typeof rebind !== 'function') { + // Bare TEST DOUBLE without `.connect` — the ONLY branch that returns the + // handle un-rebound — so the read still executes against the stub. + return handle; + } + return rebind.call(handle, runner as never) as T; + } + /** * Convenience wrapper over `readWithFailover` for a CONTRACT VIEW read: runs * `fn` against `contract` reconnected to each provider in turn (so the view * call executes on the bare per-provider runner, with failover), leaving the - * boot-bound `this.contracts.*` handle untouched. The `.connect(p) as Contract` - * cast (ethers' `BaseContract.connect` loses the dynamic-method index - * signature) lives here once instead of at every call site. Same purity - * contract as `readWithFailover` — `fn` MUST be a pure view read, never a - * state-changing send (writes go through the dedicated write loops). - */ - /** - * Reconnect a contract / signer `handle` to `runner` so a per-endpoint - * read or populate executes against that runner (the failover mechanism). - * Every production handle is an ethers `Contract` / `Wallet` with `.connect`; - * if a handle cannot be rebound (no `.connect`), degrade GRACEFULLY to the - * handle AS-IS rather than throwing. In production that path is never taken; - * it only applies to bare test doubles, where it lets the read still execute - * against the stub. + * boot-bound `this.contracts.*` handle untouched (the `.connect` cast — ethers' + * `BaseContract.connect` loses the dynamic-method index signature — lives in + * `withRunner` once instead of at every call site). Same purity contract as + * `readWithFailover` — `fn` MUST be a pure view read, never a state-changing + * send (writes go through the dedicated write loops). + * + * The default failover classifier is `isContractViewRetryable` (generic + * RPC-transient MINUS `BAD_DATA`): a view `BAD_DATA` ("could not decode result + * data") is a DETERMINISTIC client-side decode, not an RPC outage, so failing + * over would just re-hit the same decode on every endpoint and mask it as + * `RPC_ENDPOINTS_EXHAUSTED`. Pre-PR these reads went through the FallbackProvider, + * which never failed over on a post-decode error; this restores that. A caller + * may pass its own `isRetryable` to override. */ - protected withRunner(handle: T, runner: JsonRpcProvider | Wallet): T { - return typeof (handle as { connect?: unknown })?.connect === 'function' - ? (handle as unknown as { connect(r: unknown): T }).connect(runner) - : handle; - } - protected contractReadWithFailover( label: string, contract: Contract, fn: (c: Contract) => Promise, - opts?: { attemptTimeoutMs?: number; multiAttemptTimeoutMs?: number }, + opts?: { + attemptTimeoutMs?: number; + multiAttemptTimeoutMs?: number; + isRetryable?: (err: unknown) => boolean; + }, ): Promise { - return this.readWithFailover(label, (p) => fn(this.withRunner(contract, p)), opts); + return this.readWithFailover(label, (p) => fn(this.withRunner(contract, p)), { + ...opts, + isRetryable: opts?.isRetryable ?? isContractViewRetryable, + }); } protected async waitForReceiptWithFailover( @@ -2064,42 +2118,35 @@ export class EVMChainAdapterBase { let bareRevert: unknown; const getMax = (storage as any).getMaxKaNumberForAuthor; if (typeof getMax?.staticCall === 'function') { - // This view read is NOT routed through `readWithFailover` because its - // error shapes are semantically overloaded: a `BAD_DATA`/`CALL_EXCEPTION` - // here means "the deployed contract lacks the selector (pre-10.0.4)", - // which `isRetryableRpcError` would mis-classify as a transient (BAD_DATA - // is generically retryable) and fail over on — turning a deterministic - // absent-view answer into a spurious `RPC_ENDPOINTS_EXHAUSTED`. So we run - // an explicit per-endpoint loop that distinguishes the two: absent-view - // shapes are DETERMINISTIC across endpoints (don't fail over — hand to the - // pre-10.0.4 classification below), and ONLY a genuine transient RPC error - // advances to the next endpoint. - for (let i = 0; i < this.providers.length; i += 1) { - const isLast = i === this.providers.length - 1; - try { - const connectedGetMax = - (this.withRunner(storage, this.providers[i]) as Contract).getMaxKaNumberForAuthor; - const max = await connectedGetMax.staticCall(normalized); - return BigInt(max); - } catch (err) { - // Ordering invariant: isKaHighWaterViewUnavailable runs first and calls - // enrichEvmError(err), which only rewrites a message carrying decodable - // `data=0x…` + the literal "unknown custom error" — neither present on a - // bare "missing revert data" (data=null) error — so it leaves the shape - // isKaHighWaterBareRevert keys on untouched. Preserve that if - // enrichEvmError's rewrite rules change. - if (isKaHighWaterViewUnavailable(err)) { - break; // unambiguous absent-view shape → fall through to the bounded scan - } - if (isKaHighWaterBareRevert(err)) { - bareRevert = err; // confirm against the deployed bytecode below - break; - } - if (isRetryableRpcError(err) && !isLast) { - noteRpcFailover('getMaxKaNumberForAuthor staticCall', this.rpcUrls[i], err, this.rpcUrls[i + 1]); - continue; // genuine transient → fail over to the next endpoint - } - throw err; // decoded revert, or a transient on the last endpoint → never crawl + try { + // Route through `readWithFailover` (which gives the per-attempt stall + // timeout + endpoint failover for free) with a CUSTOM classifier: the + // absent-view shapes (`BAD_DATA` empty-`0x`, bare `CALL_EXCEPTION`) are + // DETERMINISTIC across endpoints and mean "pre-10.0.4 contract lacks the + // selector", so they are NON-retryable here — `readWithFailover` rethrows + // them straight to the catch below (→ scan / bytecode-confirm) instead of + // failing over and masking them as `RPC_ENDPOINTS_EXHAUSTED`. ONLY a + // genuine transient advances to the next endpoint. `isKaHighWaterViewUnavailable` + // runs FIRST (it enriches the error) so the ordering invariant the catch + // below also relies on is preserved. + const max = await this.readWithFailover( + 'DKGKnowledgeAssets.getMaxKaNumberForAuthor', + (p) => (this.withRunner(storage, p) as Contract).getMaxKaNumberForAuthor.staticCall(normalized), + { + isRetryable: (err) => + isRetryableRpcError(err) + && !isKaHighWaterViewUnavailable(err) + && !isKaHighWaterBareRevert(err), + }, + ); + return BigInt(max); + } catch (err) { + if (isKaHighWaterViewUnavailable(err)) { + // Unambiguous absent-view shape → fall through to the bounded scan. + } else if (isKaHighWaterBareRevert(err)) { + bareRevert = err; // confirm against the deployed bytecode below + } else { + throw err; // transient exhaustion / decoded revert → never crawl } } } @@ -2863,10 +2910,6 @@ export class EVMChainAdapterBase { return this.primaryProvider; } - getReadProvider(): JsonRpcProvider { - return this.provider; - } - getRpcUrls(): string[] { return [...this.rpcUrls]; } diff --git a/packages/chain/test/evm-adapter.unit.test.ts b/packages/chain/test/evm-adapter.unit.test.ts index ab1bdeb3bb..3448ed74e0 100644 --- a/packages/chain/test/evm-adapter.unit.test.ts +++ b/packages/chain/test/evm-adapter.unit.test.ts @@ -204,7 +204,6 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { const a = new EVMChainAdapter(minimalConfig()); expect(a.getProvider()).toBeDefined(); expect(typeof a.getProvider().getBlockNumber).toBe('function'); - expect(a.getReadProvider()).toBeDefined(); }); it('issues un-batched JSON-RPC requests (batchMaxCount=1) so a rate-limited read rejects on its own awaited promise — issue #939', async () => { diff --git a/packages/chain/test/getmaxkanumberforauthor.unit.test.ts b/packages/chain/test/getmaxkanumberforauthor.unit.test.ts index df51750e7a..cfc62a60a4 100644 --- a/packages/chain/test/getmaxkanumberforauthor.unit.test.ts +++ b/packages/chain/test/getmaxkanumberforauthor.unit.test.ts @@ -20,6 +20,7 @@ import { describe, it, expect, vi } from 'vitest'; import { ethers } from 'ethers'; import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; +import { RPC_READ_STALL_TIMEOUT_MS } from '../src/evm-adapter-constants.js'; function recorder(impl: (...args: A) => R) { const calls: A[] = []; @@ -248,12 +249,14 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# expect(stale.getMaxKaNumberForAuthor.staticCall.calls).toEqual([]); }); - // R1 bespoke-loop boundary (base:2068): the view staticCall is NOT routed - // through readWithFailover because its error shapes are overloaded — a - // TRANSIENT error must fail over to the next endpoint, but a DETERMINISTIC - // absent-view (BAD_DATA/CALL_EXCEPTION) must NOT fail over (it's the same on - // every endpoint) and instead fall through to the pre-10.0.4 scan. - it('bespoke staticCall loop: a TRANSIENT 429 fails over to the next endpoint and answers from the view (no scan)', async () => { + // The view staticCall now routes THROUGH readWithFailover (base:2132) with a + // CUSTOM classifier (isRetryableRpcError minus the absent-view / bareRevert + // shapes), giving it the per-attempt stall timeout + endpoint failover the old + // bespoke loop lacked — while preserving the boundary: a TRANSIENT error fails + // over to the next endpoint, but a DETERMINISTIC absent-view + // (BAD_DATA/CALL_EXCEPTION) is non-retryable -> rethrown straight to the catch + // -> the pre-10.0.4 scan (never failed over / masked as RPC_ENDPOINTS_EXHAUSTED). + it('getMaxKaNumber view (readWithFailover): a TRANSIENT 429 fails over to the next endpoint and answers from the view (no scan)', async () => { const queryFilter = recorder(async () => []); let attempt = 0; const storage: any = { @@ -272,7 +275,7 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# expect(queryFilter.calls).toEqual([]); // the view answered → NEVER scans logs }); - it('bespoke staticCall loop: an ABSENT-view (BAD_DATA) is deterministic across endpoints — no failover, straight to the scan', async () => { + it('getMaxKaNumber view (readWithFailover): an ABSENT-view (BAD_DATA) is deterministic across endpoints — no failover, straight to the scan', async () => { const badData: any = new Error(EMPTY_VIEW_RESULT); badData.code = 'BAD_DATA'; const queryFilter = recorder(async () => []); @@ -286,11 +289,37 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# const backend = () => ({ getBlockNumber: recorder(async () => 3_000), getCode: recorder(async () => '0x6000') }); (a as any).providers = [backend(), backend()]; expect(await a.getMaxKaNumberForAuthor(AUTHOR)).toBe(-1n); // degraded to the (empty) scan - // Absent-view is deterministic → BREAK after ONE attempt, never consult endpoint #2. + // Absent-view is non-retryable → rethrown after ONE attempt, endpoint #2 never consulted. expect(storage.getMaxKaNumberForAuthor.staticCall.calls).toHaveLength(1); expect(queryFilter.calls.length).toBeGreaterThan(0); // the scan ran instead }); + it('getMaxKaNumber view (readWithFailover): a HUNG primary staticCall times out (per-attempt cap the bespoke loop lacked) and fails over to the backup', async () => { + vi.useFakeTimers(); + try { + let attempt = 0; + const storage: any = { + getMaxKaNumberForAuthor: viewMock(() => + (attempt += 1) === 1 + ? new Promise(() => {}) // primary hangs forever + : Promise.resolve(7n)), // backup answers + filters: { KnowledgeAssetCreated: recorder(() => 'F') }, + queryFilter: recorder(async () => []), + }; + const a = makeAdapter(storage, 100_000); + (a as any).providers = [{}, {}]; + const p = a.getMaxKaNumberForAuthor(AUTHOR); + // The new readWithFailover cap aborts the hung primary at the 4s multi-RPC + // default and fails over (the old bespoke loop had NO per-attempt timeout → + // a hung backend stalled the whole resolution). + await vi.advanceTimersByTimeAsync(RPC_READ_STALL_TIMEOUT_MS + 500); + expect(await p).toBe(7n); // answered by the backup's staticCall + expect(storage.getMaxKaNumberForAuthor.staticCall.calls).toHaveLength(2); + } finally { + vi.useRealTimers(); + } + }); + it('rethrows malformed BAD_DATA instead of treating every decode failure as an absent view', async () => { const err: any = new Error( 'could not decode result data (value="0x1234", info={ method: "getMaxKaNumberForAuthor", signature: "getMaxKaNumberForAuthor(address)" }, code=BAD_DATA, version=6.16.0)', diff --git a/packages/chain/test/mock-adapter-parity.test.ts b/packages/chain/test/mock-adapter-parity.test.ts index c0b803c856..6daabd31f3 100644 --- a/packages/chain/test/mock-adapter-parity.test.ts +++ b/packages/chain/test/mock-adapter-parity.test.ts @@ -76,7 +76,6 @@ const MOCK_EXEMPT_FROM_EVM = new Set([ 'getContract', // resolves a Contract from the Hub — not applicable off-chain 'getBlockNumber', // the mock exposes its own block counter differently (advanceBlock) 'getProvider', // returns a JsonRpcProvider; mock has none - 'getReadProvider', // returns the EVM fallback read provider; mock has no RPC provider 'getSignerAddress', // mock exposes `signerAddress` as a field 'getSignerAddresses', // pool not applicable to mock 'getAuthorizedPublisherAddress', // pool-specific signer selection; mock has one signerAddress diff --git a/packages/chain/test/multi-rpc-provider-shape.test.ts b/packages/chain/test/multi-rpc-provider-shape.test.ts index 98007b6b72..ef5faaf797 100644 --- a/packages/chain/test/multi-rpc-provider-shape.test.ts +++ b/packages/chain/test/multi-rpc-provider-shape.test.ts @@ -23,7 +23,7 @@ describe('multi-RPC provider shape (backwards compatibility)', () => { privateKey: PK, allowNoAdminSigner: true, }); - const read = a.getReadProvider(); + const read = a.getProvider(); expect(read).toBeInstanceOf(JsonRpcProvider); expect(read).not.toBeInstanceOf(FallbackProvider); expect(a.getRpcUrls()).toEqual(['http://127.0.0.1:1']); @@ -37,9 +37,11 @@ describe('multi-RPC provider shape (backwards compatibility)', () => { privateKey: PK, allowNoAdminSigner: true, }); - // R1: getReadProvider() is the bare PRIMARY JsonRpcProvider — the - // FallbackProvider is gone; reads fail over explicitly via readWithFailover. - const read = a.getReadProvider(); + // R1: getProvider() is the bare PRIMARY JsonRpcProvider — the + // FallbackProvider is gone; reads fail over explicitly via readWithFailover + // over this.providers[] (getReadProvider() was removed as obsolete: there is + // no single read provider anymore). + const read = a.getProvider(); expect(read).toBeInstanceOf(JsonRpcProvider); expect(read).not.toBeInstanceOf(FallbackProvider); // All endpoints stay configured, primary first — the failover topology now @@ -61,7 +63,7 @@ describe('multi-RPC provider shape (backwards compatibility)', () => { allowNoAdminSigner: true, }); // Collapses to a single unique endpoint -> no FallbackProvider. - expect(a.getReadProvider()).not.toBeInstanceOf(FallbackProvider); + expect(a.getProvider()).not.toBeInstanceOf(FallbackProvider); expect(a.getRpcUrls()).toEqual(['http://127.0.0.1:1']); }); }); diff --git a/packages/chain/test/readwithfailover-loop.unit.test.ts b/packages/chain/test/readwithfailover-loop.unit.test.ts index af30b93a60..5ae23149a8 100644 --- a/packages/chain/test/readwithfailover-loop.unit.test.ts +++ b/packages/chain/test/readwithfailover-loop.unit.test.ts @@ -214,3 +214,72 @@ describe('readWithFailover — per-attempt cap (log-scan stall fix)', () => { expect(only.read.calls).toHaveLength(1); }); }); + +// #2 review fix: contractReadWithFailover DEFAULTS its failover classifier to +// isContractViewRetryable = isRetryableRpcError MINUS BAD_DATA. A contract VIEW's +// BAD_DATA ("could not decode result data") is a DETERMINISTIC client-side decode, +// not an RPC outage — failing over would re-hit the same decode on every endpoint +// and mask it as RPC_ENDPOINTS_EXHAUSTED (the pre-PR FallbackProvider never failed +// over on a post-decode error). So BAD_DATA must surface DIRECTLY (no failover), +// while a real transient (429) still fails over; opts.isRetryable overrides it. +describe('contractReadWithFailover — view classifier (BAD_DATA non-retryable) + opts.isRetryable override', () => { + const badDataError = () => { + const e: any = new Error('could not decode result data (value="0x", code=BAD_DATA)'); + e.code = 'BAD_DATA'; + return e; + }; + + it('a BAD_DATA view decode is NON-retryable → surfaces DIRECTLY, NO failover (not masked as RPC_ENDPOINTS_EXHAUSTED)', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + (a as any).providers = [{}, {}]; + const bad = badDataError(); + const view = recorder(async () => { throw bad; }); + const contract = { view }; // no .connect → withRunner uses it as-is (same recorder per provider) + + await expect((a as any).contractReadWithFailover('someView', contract, (c: any) => c.view())) + .rejects.toBe(bad); // the ORIGINAL BAD_DATA, NOT a ChainRpcTransportError/RPC_ENDPOINTS_EXHAUSTED + expect(view.calls).toHaveLength(1); // deterministic → NO failover, backup never consulted + }); + + it('a real transient (429) on the SAME view path IS retryable → fails over to the next endpoint', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + (a as any).providers = [{}, {}]; + let n = 0; + const view = recorder(async () => { + n += 1; + if (n === 1) { const e: any = new Error('429 too many requests'); e.status = 429; throw e; } + return 'OK'; + }); + const contract = { view }; + + await expect((a as any).contractReadWithFailover('someView', contract, (c: any) => c.view())).resolves.toBe('OK'); + expect(view.calls).toHaveLength(2); // 429 is a genuine transient → failed over + }); + + it('an explicit opts.isRetryable OVERRIDES the isContractViewRetryable default (BAD_DATA then treated retryable → fails over)', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + (a as any).providers = [{}, {}]; + let n = 0; + const view = recorder(async () => { n += 1; if (n === 1) throw badDataError(); return 'OK'; }); + const contract = { view }; + + await expect( + (a as any).contractReadWithFailover('someView', contract, (c: any) => c.view(), { isRetryable: () => true }), + ).resolves.toBe('OK'); + expect(view.calls).toHaveLength(2); // default overridden → BAD_DATA failed over + }); + + it('readWithFailover honours a custom opts.isRetryable that makes a normally-retryable 429 NON-retryable (surfaces it, no failover)', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const err429 = (() => { const e: any = new Error('429 too many requests'); e.status = 429; return e; })(); + const primary = { read: recorder(async () => { throw err429; }) }; + const backup = { read: recorder(async () => 'BACKUP') }; + (a as any).providers = [primary, backup]; + + // Custom classifier: nothing is retryable → the 429 surfaces directly, no failover. + await expect((a as any).readWithFailover('t', (p: any) => p.read(), { isRetryable: () => false })) + .rejects.toBe(err429); + expect(primary.read.calls).toHaveLength(1); + expect(backup.read.calls).toEqual([]); + }); +}); From 8839a6b38922091cae6ba2ee578c2f64c34c5c37 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Fri, 26 Jun 2026 04:11:25 +0200 Subject: [PATCH 3/4] =?UTF-8?q?fix(chain):=20PR=20#1335=20review=20round?= =?UTF-8?q?=202=20=E2=80=94=20deprecate=20getReadProvider,=20split=20rebin?= =?UTF-8?q?d=20helpers,=20events=20scan=20helper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the reviewer's second round (7 items): - Restored getReadProvider() as @deprecated (returns the bare primary and does NOT fail over — callers should use the adapter read methods, which route through readWithFailover). Removing a public method from the published EVMChainAdapter export is a breaking change. [B-1] - Trimmed the large doc-comment blocks on readWithFailover and the V10 populate/broadcast helpers down to the stable contract (retry classifier, timeout semantics, purity), dropping the PR-stage/incident prose. [B-3] - Extracted queryFilterWithFailover in events.ts that bakes in the 30s LOG_SCAN_OPTS; all 9 event scans use it, so the wide-log timeout is owned once by the module instead of call-site discipline. [B-4] - Split withRunner into rebindContract + rebindSigner with a concrete `.connect` probe — no cast-through-unknown, no production branch for non-ethers test doubles. rebindSigner is byte-identical to the old signer path. [A/#6] - Migrated the affected tests to connect-capable mocks (a shared connectable() helper) so the real rebindContract/rebindSigner run; the failover test now uses provider-specific connect doubles and asserts the BACKUP-connected view is called (verifies per-provider rebinding). [B-2] Added a single-RPC bounded-retry-budget guard [B-5] and a listenForEvents wide-queryFilter log-scan-timeout test [B-6]. mock-parity exemptions updated for the new protected helpers (rebindContract/rebindSigner/queryFilterWithFailover). Chain suite 709 pass / 1 skip; full runtime build green. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_012DGBSrmN6Y8Vsgb7x46amp --- packages/chain/src/evm-adapter-base.ts | 251 +++++++----------- packages/chain/src/evm-adapter-events.ts | 71 ++--- packages/chain/test/evm-adapter.unit.test.ts | 87 +++--- .../test/getmaxkanumberforauthor.unit.test.ts | 12 +- .../chain/test/mock-adapter-parity.test.ts | 14 +- .../test/readwithfailover-loop.unit.test.ts | 146 ++++++++-- .../v10-update-ack-digest-parity.unit.test.ts | 28 +- 7 files changed, 340 insertions(+), 269 deletions(-) diff --git a/packages/chain/src/evm-adapter-base.ts b/packages/chain/src/evm-adapter-base.ts index e50a98af44..e2ba1720dc 100644 --- a/packages/chain/src/evm-adapter-base.ts +++ b/packages/chain/src/evm-adapter-base.ts @@ -828,55 +828,29 @@ export class EVMChainAdapterBase { } /** - * R1 read-failover primitive. Runs `fn` against the FIRST configured RPC - * provider; on a RETRYABLE error (429 / 5xx / network — `isRetryableRpcError`) - * advances IMMEDIATELY to the next provider, logging each hop host-only via - * `noteRpcFailover`. A NON-retryable error (a real revert / decoded custom - * error / bad argument) throws at once — failing over a deterministic chain - * error would be pointless and could mask it. When every provider is - * exhausted, throw the typed `RPC_ENDPOINTS_EXHAUSTED` transport error so the - * HTTP boundary maps it to a bounded 503 (the same contract the write loops + - * `init()` use). + * Per-endpoint read-failover primitive (the bare `this.providers[]`, no + * FallbackProvider). Runs `fn` against each provider in turn; on a RETRYABLE + * error advances to the next (host-only `noteRpcFailover` per hop) and, once + * all are exhausted, throws the typed `RPC_ENDPOINTS_EXHAUSTED` (→ bounded + * 503). A NON-retryable error is rethrown AT ONCE (failing over a deterministic + * chain error would only mask it). The default "retryable?" classifier is + * `isRetryableRpcError`; override it via `opts.isRetryable` for reads whose + * error shapes carry domain meaning (a contract view's `BAD_DATA`, + * `getMaxKaNumberForAuthor`'s absent-view). * - * This MIRRORS the write loops (`sendContractTransaction`, - * `broadcastSignedTransactionWithFailover`) and the in-tree scan failover - * (`queryEventLogsPage`): explicit per-provider iteration over the BARE - * `this.providers[i]`, NOT a `FallbackProvider`. ethers' quorum:1 - * `FallbackProvider` throws a fast error to the caller WITHOUT consulting a - * backup (it advances only on a ~4s stall), so it never delivered reliable - * read failover — which is why reads route through this explicit loop instead. + * The per-attempt `withTimeout` is a hard deadline that ABORTS and fails over a + * hung backend: + * - MULTI-RPC: every attempt capped at `RPC_READ_STALL_TIMEOUT_MS` (4s, suits + * a POINT read); a WIDE read (a multi-thousand-block `eth_getLogs`) raises + * it via `opts.multiAttemptTimeoutMs` so a slow-but-healthy scan isn't + * aborted + failed over into a spurious exhaustion. + * - SINGLE-RPC: uncapped (nothing to fail over to; #894) UNLESS + * `opts.attemptTimeoutMs` is given (a hard bound on EVERY attempt incl. + * single-RPC — fail-open funding reads that must not stall selection). * - * A per-attempt `withTimeout` makes a HUNG (pending, not erroring) backend - * yield to the next. This is a NEW hard per-attempt deadline — NOT a - * re-creation of the removed `FallbackProvider`'s `stallTimeout`. That stall - * was a PARALLELIZE trigger (it raced a backup against a slow backend and took - * whichever answered first); it NEVER aborted a healthy-but-slow read. Here the - * deadline DOES abort the attempt and fail it over, so it must be sized to the - * read: - * - MULTI-RPC: every attempt is hard-capped — `RPC_READ_STALL_TIMEOUT_MS` - * (4s) by default, which suits a POINT read (a >4s point read IS a hung - * backend). A WIDE read that legitimately runs longer — a topic-filtered - * `eth_getLogs` over a multi-thousand-block window — MUST raise the cap via - * `opts.multiAttemptTimeoutMs` (e.g. `RPC_LOG_SCAN_TIMEOUT_MS`). Otherwise - * the 4s deadline aborts a slow-but-healthy scan, fails it over across every - * endpoint, and throws a spurious `RPC_ENDPOINTS_EXHAUSTED` (for the event - * poller that escapes before the cursor advances → a permanent stall). - * - SINGLE-RPC: NO cap by default — a slow-but-valid read (init() Hub lookups, - * a wide getLogs) is not killed, and `boundedRetryFetchRequest`'s - * in-FetchRequest retry remains the only bound (#894). `multiAttemptTimeoutMs` - * does NOT cap single-RPC (there is nothing to fail over to). - * Callers that want a HARD bound on EVERY attempt INCLUDING single-RPC — e.g. - * fail-open funding reads that must not stall selection — pass - * `opts.attemptTimeoutMs`, which caps ALL attempts. - * Exhaustion still carries the typed code even for one provider. - * - * `fn` receives the active provider so callers route the actual read through - * it: a direct provider read as `p => p.getCode(addr)`, a contract VIEW read - * as `p => contract.connect(p).someView(args)` (a view-only reconnect — the - * returned Contract is bound to the bare provider; `this.contracts.*` is - * untouched). `fn` MUST be a pure read — no signing / broadcast / WAL side - * effect — because it may execute on more than one provider; writes use the - * dedicated write loops, never this. + * `fn` receives the active provider (`p => p.getCode(addr)`, or for a view + * `p => contract.connect(p).someView(args)`) and MUST be a PURE read — no + * sign / broadcast / WAL — since it may execute on more than one provider. */ protected async readWithFailover( label: string, @@ -884,14 +858,8 @@ export class EVMChainAdapterBase { opts?: { attemptTimeoutMs?: number; multiAttemptTimeoutMs?: number; - // Classifier for "should this error fail over to the next endpoint?". - // Defaults to the generic RPC-transient check; callers override it for - // reads whose error shapes carry domain meaning that must NOT be treated as - // a failover-worthy transient — e.g. a contract VIEW's `BAD_DATA` decode - // (deterministic, see `contractReadWithFailover`) or - // `getMaxKaNumberForAuthor`'s absent-view shapes. A non-retryable error is - // rethrown AT ONCE (the caller's own catch then classifies it), never - // failed over or masked as `RPC_ENDPOINTS_EXHAUSTED`. + // Override the "should this error fail over?" classifier (default + // `isRetryableRpcError`). isRetryable?: (err: unknown) => boolean; }, ): Promise { @@ -943,48 +911,27 @@ export class EVMChainAdapterBase { } /** - * Reconnect a Contract / Wallet `handle` to `runner` so a per-endpoint read or - * populate executes against that runner (the failover mechanism). A production - * handle is always an ethers `Contract` / `Wallet`, both of which expose - * `.connect`, so it is rebound. A bare TEST DOUBLE may omit `.connect`; the - * guarded fallback — the ONLY branch that returns the handle un-rebound — - * passes the stub through so the read still executes against it. The - * `T extends Contract | Wallet` bound + the single explicit `connect` assertion - * (no cast through `unknown`) keep this off the arbitrary-`any` boundary. + * Rebind a CONTRACT to `runner` (a provider for a view read, or a signer for a + * write populate) for one per-endpoint attempt, leaving the boot-bound + * `this.contracts.*` handle untouched. The `as Contract` recovers the + * dynamic-method index signature ethers' `BaseContract.connect` drops. */ - protected withRunner(handle: T, runner: JsonRpcProvider | Wallet): T { - // Production: an ethers Contract / Wallet — both expose `.connect`, which - // rebinds to `runner`. Their `connect()` return types (BaseContract / a - // wallet subtype) and param types (ContractRunner vs Provider) differ and are - // not statically `T`, so the handle is only PROBED for `.connect` (a concrete - // shape, NOT widened to `any` or cast through `unknown`) and the rebound - // result is asserted back to `T`. - const rebind = (handle as { connect?: (runner: never) => unknown }).connect; - if (typeof rebind !== 'function') { - // Bare TEST DOUBLE without `.connect` — the ONLY branch that returns the - // handle un-rebound — so the read still executes against the stub. - return handle; - } - return rebind.call(handle, runner as never) as T; + protected rebindContract(contract: Contract, runner: JsonRpcProvider | Wallet): Contract { + return contract.connect(runner) as Contract; + } + + /** Rebind a SIGNER to `provider` for one per-endpoint populate+sign attempt. */ + protected rebindSigner(signer: Wallet, provider: JsonRpcProvider): Wallet { + return signer.connect(provider); } /** - * Convenience wrapper over `readWithFailover` for a CONTRACT VIEW read: runs - * `fn` against `contract` reconnected to each provider in turn (so the view - * call executes on the bare per-provider runner, with failover), leaving the - * boot-bound `this.contracts.*` handle untouched (the `.connect` cast — ethers' - * `BaseContract.connect` loses the dynamic-method index signature — lives in - * `withRunner` once instead of at every call site). Same purity contract as - * `readWithFailover` — `fn` MUST be a pure view read, never a state-changing - * send (writes go through the dedicated write loops). - * - * The default failover classifier is `isContractViewRetryable` (generic - * RPC-transient MINUS `BAD_DATA`): a view `BAD_DATA` ("could not decode result - * data") is a DETERMINISTIC client-side decode, not an RPC outage, so failing - * over would just re-hit the same decode on every endpoint and mask it as - * `RPC_ENDPOINTS_EXHAUSTED`. Pre-PR these reads went through the FallbackProvider, - * which never failed over on a post-decode error; this restores that. A caller - * may pass its own `isRetryable` to override. + * `readWithFailover` for a CONTRACT VIEW read: runs `fn` against `contract` + * rebound to each provider in turn (failover), leaving `this.contracts.*` + * untouched. `fn` MUST be a pure view read. The default failover classifier is + * `isContractViewRetryable` (the transient set MINUS `BAD_DATA`, which on a + * view is a deterministic decode, not an outage, so it is rethrown rather than + * failed over and masked as exhaustion); a caller may pass its own `isRetryable`. */ protected contractReadWithFailover( label: string, @@ -996,7 +943,7 @@ export class EVMChainAdapterBase { isRetryable?: (err: unknown) => boolean; }, ): Promise { - return this.readWithFailover(label, (p) => fn(this.withRunner(contract, p)), { + return this.readWithFailover(label, (p) => fn(this.rebindContract(contract, p)), { ...opts, isRetryable: opts?.isRetryable ?? isContractViewRetryable, }); @@ -1039,19 +986,12 @@ export class EVMChainAdapterBase { } /** - * #888: populate + sign a V10 write tx with one-shot recovery for a - * stale-RPC `TooLowAllowance` revert, shared by BOTH V10 write paths - * (`createKnowledgeAssets` publish and `updateV10` — incl. metadata-only - * updates). ethers estimates gas while populating; on an internally - * load-balanced RPC that estimate can read a stale TRAC allowance and - * revert `TooLowAllowance` even though the approve above succeeded - * (post-approve propagation lag) or was skipped on a stale-high read of an - * allowance the prior write already consumed. This is strictly - * pre-broadcast (before the `onBroadcast` WAL checkpoint), so on that one - * revert we force a fresh approve up to the publish floor — confirming it - * is visible on the same read path via `ensureV10ApproveTrac(force=true)` — - * and retry populate+sign exactly once. Any other error, or a second - * `TooLowAllowance`, is enriched when possible and then propagated. + * #888: populate + sign a V10 write tx (shared by publish + update) with a + * one-shot recovery for a stale-RPC `TooLowAllowance` revert. Gas estimation + * during populate can read a stale TRAC allowance and revert even though the + * approve succeeded; this is strictly pre-broadcast, so on that ONE revert we + * force a fresh approve (`ensureV10ApproveTrac(force=true)`) and retry exactly + * once. Any other error, or a second `TooLowAllowance`, propagates. */ protected async populateAndSignV10WithAllowanceRecovery( signer: Wallet, @@ -1062,23 +1002,16 @@ export class EVMChainAdapterBase { tokenAmount: bigint, reapproveLabel: string, ): Promise<{ signedTx: string; txHash: string }> { - // OBS-1 + G-OBS1b (LOCKED layering): the V10 populate+sign reads - // (eth_estimateGas, eth_getTransactionCount nonce, fee data) fail over - // endpoint-to-endpoint via the SHARED `populateAndSignAcrossProviders` loop - // (so a 429ing primary can't fail-fast the publish), while the #888 - // stale-allowance recovery stays a strict ONE-SHOT: - // - OUTER (this loop) owns the SINGLE `forcedReapprove` latch + the ONE - // `ensureV10ApproveTrac(force=true)` call. - // - INNER = `populateAndSignAcrossProviders`, iterating the bare providers. - // A `TooLowAllowance` revert is a CALL_EXCEPTION (non-retryable), so the inner - // loop does NOT fail over on it — it propagates straight up HERE. Because the - // latch lives at the OUTER scope (NEVER reset per endpoint), a stale-allowance - // read fires AT MOST ONE forced approve per publish no matter how many - // endpoints the inner loop tried — immediate failover introduces ZERO new - // on-chain txs. Only the ONE returned `{signedTx,txHash}` is broadcast (INV-1); - // the whole thing runs inside the per-wallet `KeyedSerializer` from - // `dispatchSerializedV10Write`, preserving nonce monotonicity (#953) and - // staying strictly pre-broadcast / pre-WAL. + // Per-endpoint populate+sign failover lives in the shared + // `populateAndSignAcrossProviders` (so a 429ing primary can't fail-fast the + // publish); the #888 stale-allowance recovery stays a strict ONE-SHOT. OUTER + // (this loop) owns the SINGLE `forcedReapprove` latch + the lone forced + // approve; INNER iterates the bare providers. `TooLowAllowance` is a + // CALL_EXCEPTION (non-retryable), so the inner loop does NOT fail over on it — + // it propagates up here. The latch is never reset per endpoint, so at most + // ONE forced approve fires per publish regardless of endpoints tried. Only the + // one returned signed tx is broadcast; the whole thing runs inside the + // per-wallet `KeyedSerializer` (#953), strictly pre-broadcast / pre-WAL. let forcedReapprove = false; for (;;) { try { @@ -1117,22 +1050,15 @@ export class EVMChainAdapterBase { txHash: string, label: string, ): Promise { - // S2 bounded set-retry, scoped to the BROADCAST phase ONLY. After a full - // per-endpoint broadcast pass exhausts with a retryable error (e.g. a brief - // window where every endpoint 429s), re-broadcast the SAME - // already-signed/already-WAL-checkpointed tx up to RPC_ENDPOINT_SET_RETRIES - // extra full passes with a short backoff, then surface the error. - // - // tx-safety: this seam is SIGNER-FREE — it only has `{signedTx,txHash}`, so - // re-signing (a new nonce / a second distinct tx) is STRUCTURALLY impossible - // (INV-2). Re-broadcasting the byte-identical signed tx is idempotent - // (`isKnownTransactionError` → treated as accepted), and the WAL `onBroadcast` - // already fired exactly once UPSTREAM in `dispatchSerializedV10Write` before - // this method was entered (INV-3/INV-6). The whole thing runs inside the - // per-wallet KeyedSerializer lock for the V10 path, so the lock is held - // across the retries (no concurrent same-wallet send slips in). We do NOT - // re-broadcast on a slow-to-mine receipt — `waitForReceiptWithFailover` owns - // its own per-poll failover + 180s deadline — so lock-hold stays bounded. + // Bounded set-retry, BROADCAST phase ONLY: after a full per-endpoint + // broadcast pass exhausts with a retryable error (a brief all-endpoints-429), + // re-broadcast the SAME signed tx up to `RPC_ENDPOINT_SET_RETRIES` extra + // passes with a short backoff. tx-safe: this seam is SIGNER-FREE so re-signing + // is structurally impossible, re-broadcasting the byte-identical tx is + // idempotent (`isKnownTransactionError`), and the WAL `onBroadcast` already + // fired once upstream. The receipt wait is NOT re-broadcast (it owns its own + // poll + deadline), so lock-hold (held across the retries for the V10 path) + // stays bounded. for (let pass = 0; ; pass += 1) { try { await this.broadcastSignedTransactionWithFailover(signedTx, txHash, label); @@ -1205,23 +1131,18 @@ export class EVMChainAdapterBase { /** * Per-endpoint populate+sign loop SHARED by `sendContractTransaction` and the - * V10 publish/update path (`populateAndSignV10WithAllowanceRecovery`). - * Iterates `this.providers[i]`, connects the signer + contract to each, - * populates (gas/nonce/chainId reads, with optional OOG-buffer gas estimation) - * + signs, and returns the FIRST successful `{signedTx,txHash}`. Advances ONLY - * on `isRetryableRpcError`; a NON-retryable error (a decoded revert — e.g. - * `TooLowAllowance`) propagates AT ONCE so the caller can react (the V10 - * allowance-recovery loop depends on TooLowAllowance surfacing HERE, never - * being failed over). Exhaustion across every endpoint → typed - * `RPC_ENDPOINTS_EXHAUSTED` (single provider keeps the original message - * byte-identical; multiple providers get the host-only aggregate). + * V10 publish/update path. Iterates `this.providers[i]` (signer + contract + * rebound to each), populates (gas/nonce/chainId reads, optional OOG-buffer gas + * estimate) + signs, and returns the FIRST successful `{signedTx,txHash}`. + * Advances ONLY on `isRetryableRpcError`; a non-retryable error (a decoded + * revert — e.g. `TooLowAllowance`) propagates AT ONCE so the caller can react. + * Exhaustion → typed `RPC_ENDPOINTS_EXHAUSTED`. * - * STRICTLY pre-broadcast: it signs exactly ONCE on the winning provider and - * does NOT broadcast or fire the WAL — the caller broadcasts the single - * returned signed tx via `sendSignedTransactionAndWait`. This seam is what - * keeps the #684/#241 WAL split intact (onBroadcast sits between sign and - * broadcast), so the V10 path reuses THIS helper instead of routing through - * `sendContractTransaction` (which broadcasts internally). + * STRICTLY pre-broadcast: signs once on the winning provider, does NOT broadcast + * or fire the WAL — the caller broadcasts the single returned tx. This keeps the + * WAL split intact (onBroadcast between sign and broadcast), so the V10 path + * reuses THIS helper rather than `sendContractTransaction` (which broadcasts + * internally). */ protected async populateAndSignAcrossProviders( contract: Contract, @@ -1233,9 +1154,9 @@ export class EVMChainAdapterBase { ): Promise<{ signedTx: string; txHash: string }> { let lastRetryable: unknown; for (let i = 0; i < this.providers.length; i += 1) { - const rpcSigner = this.withRunner(signer, this.providers[i]); + const rpcSigner = this.rebindSigner(signer, this.providers[i]); try { - const connected = this.withRunner(contract, rpcSigner) as any; + const connected = this.rebindContract(contract, rpcSigner) as any; const populated = await withTimeout( connected[method].populateTransaction(...args) as Promise, RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS, @@ -2131,7 +2052,7 @@ export class EVMChainAdapterBase { // below also relies on is preserved. const max = await this.readWithFailover( 'DKGKnowledgeAssets.getMaxKaNumberForAuthor', - (p) => (this.withRunner(storage, p) as Contract).getMaxKaNumberForAuthor.staticCall(normalized), + (p) => this.rebindContract(storage, p).getMaxKaNumberForAuthor.staticCall(normalized), { isRetryable: (err) => isRetryableRpcError(err) @@ -2910,6 +2831,18 @@ export class EVMChainAdapterBase { return this.primaryProvider; } + /** + * @deprecated Returns the bare PRIMARY provider, which does NOT fail over: the + * ethers `FallbackProvider` was removed and reads now route through the + * adapter's own read methods (`readWithFailover` over `this.providers[]`). Call + * those read methods instead, or `getProvider()` if you explicitly want the + * bare primary. Retained only for backward compatibility — this adapter is a + * published export, so removing a public method would be a breaking change. + */ + getReadProvider(): JsonRpcProvider { + return this.provider; + } + getRpcUrls(): string[] { return [...this.rpcUrls]; } diff --git a/packages/chain/src/evm-adapter-events.ts b/packages/chain/src/evm-adapter-events.ts index e8139caaec..fd64ad4a7e 100644 --- a/packages/chain/src/evm-adapter-events.ts +++ b/packages/chain/src/evm-adapter-events.ts @@ -26,6 +26,26 @@ export class EventsMethods extends EVMChainAdapterBase { // Events // ===================================================================== + /** + * A WIDE `eth_getLogs` scan with read-failover, baking in `LOG_SCAN_OPTS` so + * the wide-log multi-RPC timeout is owned HERE once (not by per-call-site + * discipline). Used by every `listenForEvents` branch below. + */ + private queryFilterWithFailover( + contract: ethers.Contract, + label: string, + eventFilter: ethers.ContractEventName, + fromBlock: ethers.BlockTag, + toBlock?: ethers.BlockTag, + ): Promise<(ethers.Log | ethers.EventLog)[]> { + return this.contractReadWithFailover( + label, + contract, + (c) => c.queryFilter(eventFilter, fromBlock, toBlock), + LOG_SCAN_OPTS, + ); + } + async *listenForEvents(filter: EventFilter): AsyncIterable { await this.init(); @@ -39,10 +59,8 @@ export class EventsMethods extends EVMChainAdapterBase { continue; } const eventFilter = storage.filters.KnowledgeBatchCreated(); - const logs = await this.contractReadWithFailover( - 'kasV9.queryFilter(KnowledgeBatchCreated)', storage, - (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), - LOG_SCAN_OPTS, + const logs = await this.queryFilterWithFailover( + storage, 'kasV9.queryFilter(KnowledgeBatchCreated)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, ); for (const log of logs) { @@ -70,10 +88,8 @@ export class EventsMethods extends EVMChainAdapterBase { const cgStorage = this.contracts.contextGraphStorage; if (cgStorage) { const eventFilter = cgStorage.filters.ContextGraphExpanded(); - const logs = await this.contractReadWithFailover( - 'cgStorage.queryFilter(ContextGraphExpanded)', cgStorage, - (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), - LOG_SCAN_OPTS, + const logs = await this.queryFilterWithFailover( + cgStorage, 'cgStorage.queryFilter(ContextGraphExpanded)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, ); for (const log of logs) { @@ -102,10 +118,8 @@ export class EventsMethods extends EVMChainAdapterBase { const cgStorage = this.contracts.contextGraphStorage; if (cgStorage) { const eventFilter = cgStorage.filters.KnowledgeAssetRegisteredToContextGraph(); - const logs = await this.contractReadWithFailover( - 'cgStorage.queryFilter(KnowledgeAssetRegisteredToContextGraph)', cgStorage, - (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), - LOG_SCAN_OPTS, + const logs = await this.queryFilterWithFailover( + cgStorage, 'cgStorage.queryFilter(KnowledgeAssetRegisteredToContextGraph)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, ); for (const log of logs) { @@ -153,9 +167,8 @@ export class EventsMethods extends EVMChainAdapterBase { : 'KnowledgeAssetCreated'; const kcFilter = kaStorage.filters[createEventName](); - const kcLogs = await this.contractReadWithFailover( - 'kas.queryFilter(KnowledgeAssetCreated)', kaStorage, (c) => c.queryFilter(kcFilter, fromB, toB), - LOG_SCAN_OPTS, + const kcLogs = await this.queryFilterWithFailover( + kaStorage, 'kas.queryFilter(KnowledgeAssetCreated)', kcFilter, fromB, toB, ); // Legacy mint range. `KnowledgeAssetsMinted` is still declared on the @@ -165,9 +178,8 @@ export class EventsMethods extends EVMChainAdapterBase { const mintByTx = new Map(); if (hasEvent('KnowledgeAssetsMinted')) { const mintFilter = kaStorage.filters.KnowledgeAssetsMinted(); - const mintLogs = await this.contractReadWithFailover( - 'kas.queryFilter(KnowledgeAssetsMinted)', kaStorage, (c) => c.queryFilter(mintFilter, fromB, toB), - LOG_SCAN_OPTS, + const mintLogs = await this.queryFilterWithFailover( + kaStorage, 'kas.queryFilter(KnowledgeAssetsMinted)', mintFilter, fromB, toB, ); for (const ml of mintLogs) { const mp = kaStorage.interface.parseLog({ topics: [...ml.topics], data: ml.data }); @@ -190,9 +202,8 @@ export class EventsMethods extends EVMChainAdapterBase { if (isGreenfield) { try { const transferFilter = kaStorage.filters.Transfer(ethers.ZeroAddress); - const transferLogs = await this.contractReadWithFailover( - 'kas.queryFilter(Transfer)', kaStorage, (c) => c.queryFilter(transferFilter, fromB, toB), - LOG_SCAN_OPTS, + const transferLogs = await this.queryFilterWithFailover( + kaStorage, 'kas.queryFilter(Transfer)', transferFilter, fromB, toB, ); for (const tl of transferLogs) { const tp = kaStorage.interface.parseLog({ topics: [...tl.topics], data: tl.data }); @@ -252,10 +263,8 @@ export class EventsMethods extends EVMChainAdapterBase { const registry = this.contracts.contextGraphNameRegistry; if (registry) { const eventFilter = registry.filters.NameClaimed(); - const logs = await this.contractReadWithFailover( - 'cgNameRegistry.queryFilter(NameClaimed)', registry, - (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), - LOG_SCAN_OPTS, + const logs = await this.queryFilterWithFailover( + registry, 'cgNameRegistry.queryFilter(NameClaimed)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, ); for (const log of logs) { const parsed = registry.interface.parseLog({ topics: [...log.topics], data: log.data }); @@ -279,10 +288,8 @@ export class EventsMethods extends EVMChainAdapterBase { const cgStorage = this.contracts.contextGraphStorage; if (cgStorage) { const eventFilter = cgStorage.filters.ContextGraphCreated(); - const logs = await this.contractReadWithFailover( - 'cgStorage.queryFilter(ContextGraphCreated)', cgStorage, - (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), - LOG_SCAN_OPTS, + const logs = await this.queryFilterWithFailover( + cgStorage, 'cgStorage.queryFilter(ContextGraphCreated)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, ); for (const log of logs) { const parsed = cgStorage.interface.parseLog({ topics: [...log.topics], data: log.data }); @@ -316,10 +323,8 @@ export class EventsMethods extends EVMChainAdapterBase { const profileStorage = this.contracts.profileStorage; if (profileStorage) { const eventFilter = profileStorage.filters.RelayCapabilityUpdated(); - const logs = await this.contractReadWithFailover( - 'profileStorage.queryFilter(RelayCapabilityUpdated)', profileStorage, - (c) => c.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock), - LOG_SCAN_OPTS, + const logs = await this.queryFilterWithFailover( + profileStorage, 'profileStorage.queryFilter(RelayCapabilityUpdated)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, ); for (const log of logs) { const parsed = profileStorage.interface.parseLog({ topics: [...log.topics], data: log.data }); diff --git a/packages/chain/test/evm-adapter.unit.test.ts b/packages/chain/test/evm-adapter.unit.test.ts index 3448ed74e0..286cfa46d8 100644 --- a/packages/chain/test/evm-adapter.unit.test.ts +++ b/packages/chain/test/evm-adapter.unit.test.ts @@ -36,6 +36,19 @@ beforeEach(() => { _resetRpcFailoverStatsForTest(); }); +// Round-2 review split withRunner → rebindContract (contract.connect(p), NO +// test-double fallback) so production handles MUST be .connect-able (they are — +// real ethers Contracts). These unit tests mock this.contracts.* as plain method +// objects; `connectable` makes a mock satisfy that boundary with a single-provider +// NO-OP self-rebind (connect returns the mock), so the REAL rebindContract runs +// (a genuine rebindContract regression would still be caught). Idempotent — +// leaves a MEANINGFUL .connect (e.g. token → tokenWithSigner) untouched. +function connectable(mock: T): T { + const m = mock as { connect?: unknown }; + if (m && typeof m.connect !== 'function') m.connect = () => mock; + return mock; +} + const DEPLOYER_PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; const OTHER_PK = '0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b63b91100'; const ADMIN_PK = '0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a'; @@ -265,10 +278,10 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { publishAuthorityAccountId: 0n, })); (a as any).init = async () => undefined; - (a as any).contracts.contextGraphStorage = { + (a as any).contracts.contextGraphStorage = connectable({ getAccessPolicy, getContextGraph, - }; + }); await expect(a.getContextGraphAccessPolicy(6n)).resolves.toBe(1); expect(getAccessPolicy.calls.at(-1)).toEqual([6n]); @@ -278,7 +291,7 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { it('parses accessPolicy from tuple fallback results', async () => { const a = new EVMChainAdapter(minimalConfig()); (a as any).init = async () => undefined; - (a as any).contracts.contextGraphStorage = { + (a as any).contracts.contextGraphStorage = connectable({ getAccessPolicy: recorder(async () => { throw new Error('selector unavailable'); }), @@ -293,7 +306,7 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { ethers.ZeroAddress, 0n, ]), - }; + }); await expect(a.getContextGraphAccessPolicy(7n)).resolves.toBe(0); }); @@ -302,11 +315,11 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { const a = new EVMChainAdapter(minimalConfig()); const rpcError = new Error('rpc unavailable'); (a as any).init = async () => undefined; - (a as any).contracts.contextGraphStorage = { + (a as any).contracts.contextGraphStorage = connectable({ isContextGraphActive: recorder(async () => { throw rpcError; }), - }; + }); await expect(a.isContextGraphActiveOnChain(8n)).rejects.toThrow('rpc unavailable'); }); @@ -315,7 +328,7 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { const a = new EVMChainAdapter(minimalConfig()); const isContextGraphActive = recorder(async (_id: bigint) => false); (a as any).init = async () => undefined; - (a as any).contracts.contextGraphStorage = { isContextGraphActive }; + (a as any).contracts.contextGraphStorage = connectable({ isContextGraphActive }); await expect(a.isContextGraphActiveOnChain(9n)).resolves.toBe(false); expect(isContextGraphActive.calls.at(-1)).toEqual([9n]); @@ -338,11 +351,11 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { if (name === 'Token') throw new Error('Hub.Token should not be resolved when tokenAddress is configured'); return contractAddress; }); - (a as any).contracts.hub = { + (a as any).contracts.hub = connectable({ getContractAddress, getAssetStorageAddress: recorder(async () => assetStorageAddress), on: recorder(async () => undefined), - }; + }); await (a as any).init(); @@ -1026,12 +1039,12 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { const a = new EVMChainAdapter(minimalConfig({ additionalKeys: [OTHER_PK] })); const [firstAddress, secondAddress] = a.getSignerAddresses(); (a as any).init = async () => undefined; - (a as any).contracts.contextGraphs = { + (a as any).contracts.contextGraphs = connectable({ isAuthorizedPublisher: recorder(async () => { await Promise.resolve(); return true; }), - }; + }); const [firstReserved, secondReserved] = await Promise.all([ a.getAuthorizedPublisherAddress(1n), @@ -1656,9 +1669,9 @@ describe('PR3 / RC11 — publish-preflight TTL cache', () => { const minimumRequiredSignatures = recorder(async () => 3n); (a as unknown as { init: () => Promise }).init = async () => undefined; (a as unknown as { contracts: { parametersStorage: { minimumRequiredSignatures: () => Promise } } }).contracts = { - parametersStorage: { + parametersStorage: connectable({ minimumRequiredSignatures: minimumRequiredSignatures as unknown as () => Promise, - }, + }), }; expect(await a.getMinimumRequiredSignatures()).toBe(3); @@ -2030,10 +2043,12 @@ const V10_KA_ADDRESS = '0x' + 'aa'.repeat(20); // `allowance(...)` and connects it to the signer. `approve` itself goes through // the (stubbed) `sendContractTransaction`, so the recorder just needs to exist. function makeStubToken(allowance: bigint) { - const tokenWithSigner = { + // tokenWithSigner is read via contractReadWithFailover after token→signer + // rebind, so it too must be .connect-able (self no-op rebind). + const tokenWithSigner = connectable({ allowance: recorder(async (..._a: unknown[]) => allowance), approve: recorder(() => undefined), - }; + }); const tokenRoot = { connect: recorder((..._a: unknown[]) => tokenWithSigner), }; @@ -2590,13 +2605,13 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign // shared object is what readWithFailover reads.) const balanceOf = recorder(async (addr: string) => tracByAddr.get(String(addr).toLowerCase()) ?? ABUNDANT_WEI); - const tokenWithSigner = { + const tokenWithSigner = connectable({ allowance: recorder(async (owner: string, _spender: string) => { return allowanceByOwner.get(owner.toLowerCase()) ?? 0n; }), approve: recorder(() => undefined), balanceOf, - }; + }); (a as any).contracts.token = { connect: recorder(() => tokenWithSigner), balanceOf, // kept for any direct (non-connected) top-level reader @@ -2606,19 +2621,19 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign to: PARITY_KA_ADDRESS, data: '0xdeadbeef', })); - const kavContract = { + const kavContract = connectable({ getAddress: recorder(async () => PARITY_KA_ADDRESS), publish: { populateTransaction: populateSpy }, update: { populateTransaction: populateSpy }, - }; + }); (a as any).contracts.knowledgeAssetsLifecycle = { connect: recorder(() => kavContract), getAddress: recorder(async () => PARITY_KA_ADDRESS), }; - (a as any).contracts.contextGraphs = { + (a as any).contracts.contextGraphs = connectable({ isAuthorizedPublisher: recorder(async () => true), - }; + }); const sendSpy = recorder(async (..._a: unknown[]) => ({} as unknown)); (a as any).sendContractTransaction = sendSpy; @@ -2758,13 +2773,13 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign // Injected DI seams the update path needs in addition to the publish ones. const kaId = 42n; - (a as any).contracts.knowledgeAssetStorage = { + (a as any).contracts.knowledgeAssetStorage = connectable({ getLatestMerkleRootPublisher: recorder(async () => walletB.address), getMerkleRoots: recorder(async () => []), - }; - (a as any).contracts.contextGraphStorage = { + }); + (a as any).contracts.contextGraphStorage = connectable({ kaToContextGraph: recorder(async () => 0n), - }; + }); (a as any).resolveCurrentTokenAmount = recorder(async () => 0n); (a as any).computeUpdateNewTokenAmount = recorder(async () => 0n); (a as any).getIdentityId = recorder(async () => 0n); @@ -2912,7 +2927,7 @@ describe('createKnowledgeAssets — funding-aware wallet selection', () => { it('still throws "no authorized publisher" when no wallet is authorized (unchanged)', async () => { const { a } = makeMultiWalletV10Adapter(makeAllowanceByOwner()); - (a as any).contracts.contextGraphs = { isAuthorizedPublisher: recorder(async () => false) }; + (a as any).contracts.contextGraphs = connectable({ isAuthorizedPublisher: recorder(async () => false) }); await expect((a as any).nextAuthorizedSigner(CG)).rejects.toThrow(/No authorized publisher wallet/); }); @@ -3251,10 +3266,10 @@ describe('isTooLowAllowanceError (#888)', () => { function makeV10AdapterWithAllowanceSequence(values: bigint[]) { const a = new EVMChainAdapter(minimalConfig()); let i = 0; - const tokenWithSigner = { + const tokenWithSigner = connectable({ allowance: recorder(async () => values[Math.min(i++, values.length - 1)]), approve: recorder(() => undefined), - }; + }); const tokenRoot = { connect: recorder(() => tokenWithSigner) }; (a as any).contracts.token = tokenRoot; const sendSpy = recorder(async (..._a: unknown[]) => ({} as unknown)); @@ -3341,7 +3356,7 @@ describe('ensureV10ApproveTrac — forced re-approve + visibility poll (#888)', try { const a = new EVMChainAdapter(minimalConfig()); // allowance() returns a promise that never settles — a hung RPC read. - const token = { allowance: recorder(() => new Promise(() => {})) }; + const token = connectable({ allowance: recorder(() => new Promise(() => {})) }); const done = recorder(() => undefined); const poll = (a as any) .confirmAllowanceVisible(token, '0xowner', V10_KA_ADDRESS, 1n) @@ -3406,7 +3421,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco const populate = recorder(async () => ( populateQueue.shift() ?? (async () => ({ to: V10_KA_ADDRESS, data: '0xabcd' })) )()); - const kaContract = { [method]: { populateTransaction: populate } }; + const kaContract = connectable({ [method]: { populateTransaction: populate } }); const result = await (a as any).populateAndSignV10WithAllowanceRecovery( signer, @@ -3438,7 +3453,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco const populate = recorder(async () => ( populateQueue.shift() ?? (async () => ({ to: V10_KA_ADDRESS, data: '0xabcd' })) )()); - const kaContract = { publish: { populateTransaction: populate } }; + const kaContract = connectable({ publish: { populateTransaction: populate } }); const result = await (a as any).populateAndSignV10WithAllowanceRecovery( signer, @@ -3460,7 +3475,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco it('propagates a SECOND consecutive TooLowAllowance (recovery is one-shot, no infinite loop)', async () => { const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); const populate = recorder(async () => { throw tooLowAllowanceRevert(); }); - const kaContract = { publish: { populateTransaction: populate } }; + const kaContract = connectable({ publish: { populateTransaction: populate } }); await expect( (a as any).populateAndSignV10WithAllowanceRecovery( @@ -3491,7 +3506,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco if (call === 2) throw tooLowAllowanceRevert(); // provider[1], pass 1 → non-retryable → propagate return { to: V10_KA_ADDRESS, data: '0xabcd' }; // provider[0], pass 2 (post-approve) → succeeds }); - const kaContract = { publish: { populateTransaction: populate } }; + const kaContract = connectable({ publish: { populateTransaction: populate } }); const result = await (a as any).populateAndSignV10WithAllowanceRecovery( signer, kaContract, 'publish', {}, V10_KA_ADDRESS, 0n, 'label', @@ -3516,7 +3531,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco if (call === 1) throw r429(); // provider[0] → fail over return { to: V10_KA_ADDRESS, data: '0xabcd' }; // provider[1] → populates }); - const kaContract = { publish: { populateTransaction: populate } }; + const kaContract = connectable({ publish: { populateTransaction: populate } }); const result = await (a as any).populateAndSignV10WithAllowanceRecovery( signer, kaContract, 'publish', {}, V10_KA_ADDRESS, 0n, 'label', @@ -3531,7 +3546,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco it('enriches the SECOND raw TooLowAllowance before throwing the one-shot failure', async () => { const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); const populate = recorder(async () => { throw rawTooLowAllowanceRevert(); }); - const kaContract = { publish: { populateTransaction: populate } }; + const kaContract = connectable({ publish: { populateTransaction: populate } }); let thrown: any; try { @@ -3554,7 +3569,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco it('propagates an unrelated revert immediately without forcing a re-approve', async () => { const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); const populate = recorder(async () => { throw new Error('execution reverted: NotBatchPublisher()'); }); - const kaContract = { update: { populateTransaction: populate } }; + const kaContract = connectable({ update: { populateTransaction: populate } }); await expect( (a as any).populateAndSignV10WithAllowanceRecovery( diff --git a/packages/chain/test/getmaxkanumberforauthor.unit.test.ts b/packages/chain/test/getmaxkanumberforauthor.unit.test.ts index cfc62a60a4..5d0d55f39f 100644 --- a/packages/chain/test/getmaxkanumberforauthor.unit.test.ts +++ b/packages/chain/test/getmaxkanumberforauthor.unit.test.ts @@ -22,6 +22,16 @@ import { ethers } from 'ethers'; import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; import { RPC_READ_STALL_TIMEOUT_MS } from '../src/evm-adapter-constants.js'; +// Round-2 review split withRunner → rebindContract (contract.connect(p), no +// test-double fallback). makeAdapter gives `storage` a .connect; `connectable` +// makes any OTHER connect-less contract mock satisfy the boundary with a +// single-provider NO-OP self-rebind, so the REAL rebindContract runs. +function connectable(mock: T): T { + const m = mock as { connect?: unknown }; + if (m && typeof m.connect !== 'function') m.connect = () => mock; + return mock; +} + function recorder(impl: (...args: A) => R) { const calls: A[] = []; const fn = (...args: A): R => { @@ -220,7 +230,7 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# // set `initialized = false` but the cached binding still points at the OLD // contract. The getter must `await this.init()` to re-resolve before // reading, or it answers from the pre-rotation DKGKnowledgeAssets. - const mkStorage = (n: bigint) => ({ + const mkStorage = (n: bigint) => connectable({ getMaxKaNumberForAuthor: viewMock(async () => n), filters: { KnowledgeAssetCreated: recorder(() => 'F') }, queryFilter: recorder(() => undefined), diff --git a/packages/chain/test/mock-adapter-parity.test.ts b/packages/chain/test/mock-adapter-parity.test.ts index 6daabd31f3..44b4890c71 100644 --- a/packages/chain/test/mock-adapter-parity.test.ts +++ b/packages/chain/test/mock-adapter-parity.test.ts @@ -76,6 +76,7 @@ const MOCK_EXEMPT_FROM_EVM = new Set([ 'getContract', // resolves a Contract from the Hub — not applicable off-chain 'getBlockNumber', // the mock exposes its own block counter differently (advanceBlock) 'getProvider', // returns a JsonRpcProvider; mock has none + 'getReadProvider', // @deprecated bare-primary accessor; mock has no RPC provider 'getSignerAddress', // mock exposes `signerAddress` as a field 'getSignerAddresses', // pool not applicable to mock 'getAuthorizedPublisherAddress', // pool-specific signer selection; mock has one signerAddress @@ -116,13 +117,16 @@ const MOCK_EXEMPT_FROM_EVM = new Set([ 'waitForReceiptWithFailover', 'signPopulatedTransaction', // R1 immediate-RPC-failover read/populate plumbing: the per-provider read - // failover loop, its contract-view wrapper, the `.connect()` runner helper, - // and the populate+sign-across-providers loop. Protected EVM-only helpers over - // `this.providers[]` (the mock has no RPC provider pool), not ChainAdapter - // contract methods — same category as the write-failover helpers above. + // failover loop, its contract-view wrapper, the event-log scan wrapper, the + // contract/signer rebind helpers, and the populate+sign-across-providers loop. + // Protected EVM-only helpers over `this.providers[]` (the mock has no RPC + // provider pool), not ChainAdapter contract methods — same category as the + // write-failover helpers above. 'readWithFailover', 'contractReadWithFailover', - 'withRunner', + 'queryFilterWithFailover', + 'rebindContract', + 'rebindSigner', 'populateAndSignAcrossProviders', 'sendSignedTransactionAndWait', 'sendPopulatedTransaction', diff --git a/packages/chain/test/readwithfailover-loop.unit.test.ts b/packages/chain/test/readwithfailover-loop.unit.test.ts index 5ae23149a8..0ccb061bf6 100644 --- a/packages/chain/test/readwithfailover-loop.unit.test.ts +++ b/packages/chain/test/readwithfailover-loop.unit.test.ts @@ -222,51 +222,62 @@ describe('readWithFailover — per-attempt cap (log-scan stall fix)', () => { // and mask it as RPC_ENDPOINTS_EXHAUSTED (the pre-PR FallbackProvider never failed // over on a post-decode error). So BAD_DATA must surface DIRECTLY (no failover), // while a real transient (429) still fails over; opts.isRetryable overrides it. -describe('contractReadWithFailover — view classifier (BAD_DATA non-retryable) + opts.isRetryable override', () => { +describe('contractReadWithFailover — per-provider rebinding + view classifier (B-2, #2/#3)', () => { const badDataError = () => { const e: any = new Error('could not decode result data (value="0x", code=BAD_DATA)'); e.code = 'BAD_DATA'; return e; }; - - it('a BAD_DATA view decode is NON-retryable → surfaces DIRECTLY, NO failover (not masked as RPC_ENDPOINTS_EXHAUSTED)', async () => { + // A contract whose `.connect(provider)` returns a PROVIDER-SPECIFIC view double + // (round-2 rebindContract = contract.connect(p), no fallback). This PROVES the + // failover loop rebinds to the BACKUP provider's contract — a regression that + // re-ran the primary-bound contract would call primaryView twice, never backupView. + const perProviderContract = (byProvider: (p: unknown) => { view: ReturnType }) => + ({ connect: (p: unknown) => byProvider(p) }) as any; + + it('a BAD_DATA view decode is NON-retryable → surfaces DIRECTLY, NO failover (backup-connected view never called)', async () => { const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); - (a as any).providers = [{}, {}]; + const p0 = {}; const p1 = {}; + (a as any).providers = [p0, p1]; const bad = badDataError(); - const view = recorder(async () => { throw bad; }); - const contract = { view }; // no .connect → withRunner uses it as-is (same recorder per provider) + const primaryView = recorder(async () => { throw bad; }); + const backupView = recorder(async () => 'BACKUP-RESULT'); + const contract = perProviderContract((p) => (p === p0 ? { view: primaryView } : { view: backupView })); await expect((a as any).contractReadWithFailover('someView', contract, (c: any) => c.view())) .rejects.toBe(bad); // the ORIGINAL BAD_DATA, NOT a ChainRpcTransportError/RPC_ENDPOINTS_EXHAUSTED - expect(view.calls).toHaveLength(1); // deterministic → NO failover, backup never consulted + expect(primaryView.calls).toHaveLength(1); // primary-connected view called once + expect(backupView.calls).toEqual([]); // deterministic → backup-connected view NEVER consulted }); - it('a real transient (429) on the SAME view path IS retryable → fails over to the next endpoint', async () => { + it('a real transient (429) IS retryable → REBINDS to and is served by the BACKUP provider\'s view', async () => { const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); - (a as any).providers = [{}, {}]; - let n = 0; - const view = recorder(async () => { - n += 1; - if (n === 1) { const e: any = new Error('429 too many requests'); e.status = 429; throw e; } - return 'OK'; - }); - const contract = { view }; - - await expect((a as any).contractReadWithFailover('someView', contract, (c: any) => c.view())).resolves.toBe('OK'); - expect(view.calls).toHaveLength(2); // 429 is a genuine transient → failed over + const p0 = {}; const p1 = {}; + (a as any).providers = [p0, p1]; + const primaryView = recorder(async () => { const e: any = new Error('429 too many requests'); e.status = 429; throw e; }); + const backupView = recorder(async () => 'BACKUP-RESULT'); + const contract = perProviderContract((p) => (p === p0 ? { view: primaryView } : { view: backupView })); + + await expect((a as any).contractReadWithFailover('someView', contract, (c: any) => c.view())).resolves.toBe('BACKUP-RESULT'); + // B-2: the loop rebound to the BACKUP provider's contract — proven by the + // result coming from backupView, and primaryView hit exactly once (not twice). + expect(primaryView.calls).toHaveLength(1); + expect(backupView.calls).toHaveLength(1); }); - it('an explicit opts.isRetryable OVERRIDES the isContractViewRetryable default (BAD_DATA then treated retryable → fails over)', async () => { + it('an explicit opts.isRetryable OVERRIDES the isContractViewRetryable default (BAD_DATA → rebinds to the BACKUP view)', async () => { const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); - (a as any).providers = [{}, {}]; - let n = 0; - const view = recorder(async () => { n += 1; if (n === 1) throw badDataError(); return 'OK'; }); - const contract = { view }; + const p0 = {}; const p1 = {}; + (a as any).providers = [p0, p1]; + const primaryView = recorder(async () => { throw badDataError(); }); + const backupView = recorder(async () => 'BACKUP-RESULT'); + const contract = perProviderContract((p) => (p === p0 ? { view: primaryView } : { view: backupView })); await expect( (a as any).contractReadWithFailover('someView', contract, (c: any) => c.view(), { isRetryable: () => true }), - ).resolves.toBe('OK'); - expect(view.calls).toHaveLength(2); // default overridden → BAD_DATA failed over + ).resolves.toBe('BACKUP-RESULT'); + expect(primaryView.calls).toHaveLength(1); + expect(backupView.calls).toHaveLength(1); // default overridden → BAD_DATA failed over to the backup view }); it('readWithFailover honours a custom opts.isRetryable that makes a normally-retryable 429 NON-retryable (surfaces it, no failover)', async () => { @@ -283,3 +294,86 @@ describe('contractReadWithFailover — view classifier (BAD_DATA non-retryable) expect(backup.read.calls).toEqual([]); }); }); + +// B-5 (#894 guard): the CONSTRUCTOR wires the per-endpoint FetchRequest retry +// budget from the endpoint count — SINGLE-RPC keeps the bounded retry (its only +// resilience; nothing to fail over to), MULTI-RPC uses 0 (the explicit adapter +// failover advances on the first error). A regression to perEndpointRetries=0 +// for a single endpoint would surface RPC_ENDPOINTS_EXHAUSTED on the first 429. +// We inspect the constructed provider's real retryFunc directly (deterministic + +// fast) rather than driving ~7.5s of real loopback backoff — the behavioural +// real-429 path is already covered by evm-adapter.unit.test.ts's perpetual-429 +// block; this pins the wiring robustly. +describe('constructor RPC-retry budget wiring (#894 single-RPC vs multi-RPC, B-5)', () => { + afterEach(() => { vi.useRealTimers(); }); + const retryFuncOf = (a: EVMChainAdapter) => + (a.getProvider() as unknown as { + _getConnection: () => { retryFunc?: (r: unknown, x: unknown, n: number) => Promise }; + })._getConnection().retryFunc!; + + it('SINGLE-RPC: the one provider keeps the bounded retry budget (retries, NOT 0)', async () => { + const a = new EVMChainAdapter(minimalConfig({ rpcUrl: 'https://only.example' })); + try { + const retry = retryFuncOf(a); + expect(typeof retry).toBe('function'); + vi.useFakeTimers(); + const p0 = retry({}, {}, 0); await vi.advanceTimersByTimeAsync(2_000); expect(await p0).toBe(true); // retries + const p4 = retry({}, {}, 4); await vi.advanceTimersByTimeAsync(2_000); expect(await p4).toBe(true); // ...through the budget + expect(await retry({}, {}, 5)).toBe(false); // bounded → eventually RPC_ENDPOINTS_EXHAUSTED (#894) + } finally { + a.destroy(); + } + }); + + it('MULTI-RPC: each provider gives up at attempt 0 (retries=0) so the explicit failover advances at once', async () => { + const a = new EVMChainAdapter(minimalConfig({ rpcUrl: 'https://a.example', rpcUrls: ['https://b.example'] })); + try { + expect(await retryFuncOf(a)({}, {}, 0)).toBe(false); + } finally { + a.destroy(); + } + }); +}); + +// B-6: listenForEvents' wide eth_getLogs reads go through queryFilterWithFailover, +// which bakes in LOG_SCAN_OPTS (multiAttemptTimeoutMs = RPC_LOG_SCAN_TIMEOUT_MS, +// 30s) so a slow-but-healthy getLogs (a 9000-block poller range) isn't aborted by +// the 4s point-read cap. Exercises the REAL events.ts path (not readWithFailover +// directly) so dropping LOG_SCAN_OPTS from a branch would re-introduce the stall. +describe('listenForEvents — wide getLogs honours the 30s LOG_SCAN cap (B-6)', () => { + afterEach(() => { vi.useRealTimers(); }); + + it('a queryFilter that resolves at >4s but <30s COMPLETES (not aborted at the 4s point-read cap)', async () => { + vi.useFakeTimers(); + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + (a as any).initialized = true; // listenForEvents awaits init() first + (a as any).providers = [{}, {}]; // MULTI-RPC → the per-attempt cap applies + const log = { + topics: ['0x' + '00'.repeat(32)], data: '0x', blockNumber: 1, + transactionHash: '0x' + '11'.repeat(32), transactionIndex: 0, + }; + const parsed = { args: { batchId: 1n, publisher: '0x' + '22'.repeat(20), merkleRoot: '0x' + '33'.repeat(32), startKAId: 1n, endKAId: 1n } }; + // A wide getLogs that takes 5s (>4s point-read cap, <30s LOG_SCAN cap). + const queryFilter = recorder(() => new Promise((resolve) => { setTimeout(() => resolve([log]), 5_000); })); + const storage: any = { + connect: () => storage, // rebindContract(storage, p) = storage.connect(p) + filters: { KnowledgeBatchCreated: () => 'F' }, + interface: { parseLog: () => parsed }, + queryFilter, + }; + (a as any).contracts.knowledgeAssetsStorage = storage; + + const collected: Array<{ type: string }> = []; + const done = (async () => { + for await (const ev of a.listenForEvents({ eventTypes: ['KnowledgeBatchCreated'], fromBlock: 0 } as any)) { + collected.push(ev as { type: string }); + } + })(); + await vi.advanceTimersByTimeAsync(6_000); // past the 5s getLogs, under the 30s LOG_SCAN cap + await done; + + expect(queryFilter.calls).toHaveLength(1); // completed on the first endpoint, NOT aborted+failed-over + expect(collected).toHaveLength(1); + expect(collected[0].type).toBe('KnowledgeBatchCreated'); + }); +}); diff --git a/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts b/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts index b70167fade..0bd140b2cd 100644 --- a/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts +++ b/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts @@ -17,6 +17,16 @@ import { ethers } from 'ethers'; import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; import { computeUpdateACKDigest } from '@origintrail-official/dkg-core'; +// Round-2 review split withRunner → rebindContract (contract.connect(p), no +// test-double fallback). These tests mock this.contracts.* as plain method +// objects; `connectable` makes a mock satisfy that boundary with a +// single-provider NO-OP self-rebind, so the REAL rebindContract runs. +function connectable(mock: T): T { + const m = mock as { connect?: unknown }; + if (m && typeof m.connect !== 'function') m.connect = () => mock; + return mock; +} + const DEPLOYER_PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; const ADMIN_PK = '0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a'; const KAV10_ADDRESS = '0x000000000000000000000000000000000000c10a'; @@ -48,19 +58,19 @@ function makeStubbedAdapter(opts: { (a as any).initialized = true; // R1: getEvmChainId reads chainId via readWithFailover over this.providers[0] // (=== this.provider in prod). Set the mock on BOTH so the digest's chainId - // read resolves to the stub instead of dialling the placeholder RPC. (The - // contract stubs below have no `.connect`, so contractReadWithFailover's - // withRunner uses them directly — no change needed there.) + // read resolves to the stub instead of dialling the placeholder RPC. const provider = { getNetwork: async () => ({ chainId: TEST_CHAIN_ID }) }; (a as any).provider = provider; (a as any).providers = [provider]; - (a as any).contracts.knowledgeAssetsLifecycle = { + // The contract view reads go through contractReadWithFailover → rebindContract + // (contract.connect(p)), so the contract stubs must be .connect-able. + (a as any).contracts.knowledgeAssetsLifecycle = connectable({ getAddress: async () => KAV10_ADDRESS, - }; - (a as any).contracts.contextGraphStorage = { + }); + (a as any).contracts.contextGraphStorage = connectable({ kaToContextGraph: async () => opts.contextGraphId, - }; - (a as any).contracts.knowledgeAssetStorage = { + }); + (a as any).contracts.knowledgeAssetStorage = connectable({ getMerkleRoots: async () => new Array(Number(opts.preUpdateMerkleRootCount)).fill('0x00'), getTokenAmount: async () => opts.currentTokenAmount, // (preUpdateMerkleRootCount, minted, byteSize, endEpoch, tokenAmount, isImmutable, preUpdateMerkleLeafCount) @@ -73,7 +83,7 @@ function makeStubbedAdapter(opts: { false, 0n, ], - }; + }); return a; } From 8fa7e2bcc1f74f805f3994a451d960e9697af422 Mon Sep 17 00:00:00 2001 From: Jurij Skornik Date: Fri, 26 Jun 2026 12:53:12 +0200 Subject: [PATCH 4/4] test(chain): extract shared connectable() test helper (PR #1335 review nit) The connectable() mock-rebind helper was copied across evm-adapter.unit, getmaxkanumberforauthor.unit, and v10-update-ack-digest-parity.unit. Move it into a single packages/chain/test/connectable.ts and import it, removing the parallel copies that could drift. Round-3 review nit; test-only. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_012DGBSrmN6Y8Vsgb7x46amp --- packages/chain/test/connectable.ts | 19 +++++++++++++++++++ packages/chain/test/evm-adapter.unit.test.ts | 13 +------------ .../test/getmaxkanumberforauthor.unit.test.ts | 11 +---------- .../v10-update-ack-digest-parity.unit.test.ts | 11 +---------- 4 files changed, 22 insertions(+), 32 deletions(-) create mode 100644 packages/chain/test/connectable.ts diff --git a/packages/chain/test/connectable.ts b/packages/chain/test/connectable.ts new file mode 100644 index 0000000000..ebed7dc9bf --- /dev/null +++ b/packages/chain/test/connectable.ts @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Shared test helper (NOT a test file — no `.test.` suffix, so vitest skips it). + * + * The round-2 review split `withRunner` into `rebindContract` (contract.connect(p)) + * and `rebindSigner` (signer.connect(provider)) and removed the no-`.connect` + * test-double fallback, so production handles MUST be `.connect`-able (they are — + * real ethers `Contract`s). Unit tests, however, mock `this.contracts.*` as plain + * method objects. `connectable` makes such a mock satisfy that boundary with a + * single-provider NO-OP self-rebind (`connect` returns the mock itself), so the + * REAL `rebindContract`/`rebindSigner` runs against it — a genuine rebind + * regression is still caught. Idempotent: a mock that already carries a MEANINGFUL + * `.connect` (e.g. `token.connect(signer) → tokenWithSigner`) is left untouched. + */ +export function connectable(mock: T): T { + const m = mock as { connect?: unknown }; + if (m && typeof m.connect !== 'function') m.connect = () => mock; + return mock; +} diff --git a/packages/chain/test/evm-adapter.unit.test.ts b/packages/chain/test/evm-adapter.unit.test.ts index 286cfa46d8..312efde107 100644 --- a/packages/chain/test/evm-adapter.unit.test.ts +++ b/packages/chain/test/evm-adapter.unit.test.ts @@ -27,6 +27,7 @@ import { } from '../src/chain-adapter.js'; import { _resetRpcFailoverStatsForTest } from '../src/rpc-failover-log.js'; import { isChainRpcTransportError } from '../src/chain-rpc-transport-error.js'; +import { connectable } from './connectable.js'; // Isolate the process-wide RPC failover stats + dedup window before EVERY test // so a failover/exhaustion warning emitted by one test can't suppress (via the @@ -36,18 +37,6 @@ beforeEach(() => { _resetRpcFailoverStatsForTest(); }); -// Round-2 review split withRunner → rebindContract (contract.connect(p), NO -// test-double fallback) so production handles MUST be .connect-able (they are — -// real ethers Contracts). These unit tests mock this.contracts.* as plain method -// objects; `connectable` makes a mock satisfy that boundary with a single-provider -// NO-OP self-rebind (connect returns the mock), so the REAL rebindContract runs -// (a genuine rebindContract regression would still be caught). Idempotent — -// leaves a MEANINGFUL .connect (e.g. token → tokenWithSigner) untouched. -function connectable(mock: T): T { - const m = mock as { connect?: unknown }; - if (m && typeof m.connect !== 'function') m.connect = () => mock; - return mock; -} const DEPLOYER_PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; const OTHER_PK = '0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b63b91100'; diff --git a/packages/chain/test/getmaxkanumberforauthor.unit.test.ts b/packages/chain/test/getmaxkanumberforauthor.unit.test.ts index 5d0d55f39f..64d465d74d 100644 --- a/packages/chain/test/getmaxkanumberforauthor.unit.test.ts +++ b/packages/chain/test/getmaxkanumberforauthor.unit.test.ts @@ -21,16 +21,7 @@ import { describe, it, expect, vi } from 'vitest'; import { ethers } from 'ethers'; import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; import { RPC_READ_STALL_TIMEOUT_MS } from '../src/evm-adapter-constants.js'; - -// Round-2 review split withRunner → rebindContract (contract.connect(p), no -// test-double fallback). makeAdapter gives `storage` a .connect; `connectable` -// makes any OTHER connect-less contract mock satisfy the boundary with a -// single-provider NO-OP self-rebind, so the REAL rebindContract runs. -function connectable(mock: T): T { - const m = mock as { connect?: unknown }; - if (m && typeof m.connect !== 'function') m.connect = () => mock; - return mock; -} +import { connectable } from './connectable.js'; function recorder(impl: (...args: A) => R) { const calls: A[] = []; diff --git a/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts b/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts index 0bd140b2cd..c1b5cd9a6e 100644 --- a/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts +++ b/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts @@ -16,16 +16,7 @@ import { describe, it, expect } from 'vitest'; import { ethers } from 'ethers'; import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; import { computeUpdateACKDigest } from '@origintrail-official/dkg-core'; - -// Round-2 review split withRunner → rebindContract (contract.connect(p), no -// test-double fallback). These tests mock this.contracts.* as plain method -// objects; `connectable` makes a mock satisfy that boundary with a -// single-provider NO-OP self-rebind, so the REAL rebindContract runs. -function connectable(mock: T): T { - const m = mock as { connect?: unknown }; - if (m && typeof m.connect !== 'function') m.connect = () => mock; - return mock; -} +import { connectable } from './connectable.js'; const DEPLOYER_PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; const ADMIN_PK = '0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a';