diff --git a/packages/chain/src/evm-adapter-ack-sign.ts b/packages/chain/src/evm-adapter-ack-sign.ts index 5fc46c26b..9702252b5 100644 --- a/packages/chain/src/evm-adapter-ack-sign.ts +++ b/packages/chain/src/evm-adapter-ack-sign.ts @@ -37,7 +37,11 @@ export class AckSignMethods extends EVMChainAdapterBase { 'Verify cannot enforce ACK quorum without a real chain read — fix the adapter wiring or pass an explicit override.', ); } - const value = Number(await this.contracts.parametersStorage.minimumRequiredSignatures()); + const value = Number(await this.contractReadWithFailover( + 'parametersStorage.minimumRequiredSignatures', + this.contracts.parametersStorage, + (c) => c.minimumRequiredSignatures(), + )); this.cachedMinRequiredSignatures = { value, cachedAt: now }; return value; } @@ -52,7 +56,9 @@ export class AckSignMethods extends EVMChainAdapterBase { 'Verify path cannot enforce sharding-table eligibility without it.', ); } - return Boolean(await storage.nodeExists(identityId)); + return Boolean(await this.contractReadWithFailover( + 'shardingTableStorage.nodeExists', storage, (c) => c.nodeExists(identityId), + )); } /** @@ -80,16 +86,18 @@ export class AckSignMethods extends EVMChainAdapterBase { if (!identityStorage) return { valid: false, reason: 'rpc-error' }; const keyHash = ethers.keccak256(ethers.solidityPacked(['address'], [recoveredAddress])); - const hasPurpose: boolean = await identityStorage.keyHasPurpose( - claimedIdentityId, - keyHash, - OPERATIONAL_KEY_PURPOSE, + const hasPurpose: boolean = await this.contractReadWithFailover( + 'identityStorage.keyHasPurpose', identityStorage, + (c) => c.keyHasPurpose(claimedIdentityId, keyHash, OPERATIONAL_KEY_PURPOSE), ); if (!hasPurpose) return { valid: false, reason: 'key-not-registered' }; const shardingTableStorage = await this.resolveContract('ShardingTableStorage'); if (!shardingTableStorage) return { valid: false, reason: 'rpc-error' }; - const inST: boolean = Boolean(await shardingTableStorage.nodeExists(claimedIdentityId)); + const inST: boolean = Boolean(await this.contractReadWithFailover( + 'shardingTableStorage.nodeExists', shardingTableStorage, + (c) => c.nodeExists(claimedIdentityId), + )); if (!inST) return { valid: false, reason: 'not-in-sharding-table' }; return { valid: true }; } catch { @@ -122,7 +130,10 @@ export class AckSignMethods extends EVMChainAdapterBase { if (!identityStorage) return false; const keyHash = ethers.keccak256(ethers.solidityPacked(['address'], [recoveredAddress])); - return identityStorage.keyHasPurpose(claimedIdentityId, keyHash, OPERATIONAL_KEY_PURPOSE); + return this.contractReadWithFailover( + 'identityStorage.keyHasPurpose', identityStorage, + (c) => c.keyHasPurpose(claimedIdentityId, keyHash, OPERATIONAL_KEY_PURPOSE), + ); } async signACKDigest(digest: Uint8Array): Promise<{ r: Uint8Array; vs: Uint8Array } | undefined> { diff --git a/packages/chain/src/evm-adapter-base.ts b/packages/chain/src/evm-adapter-base.ts index 54b0809d8..e2ba1720d 100644 --- a/packages/chain/src/evm-adapter-base.ts +++ b/packages/chain/src/evm-adapter-base.ts @@ -11,7 +11,7 @@ * the external public API is unchanged. */ -import { JsonRpcProvider, FallbackProvider, Wallet, Contract, ethers } from 'ethers'; +import { JsonRpcProvider, Wallet, Contract, ethers } from 'ethers'; import { createFilterErrorSilencer, installFilterNotFoundConsoleSuppressor, formatProviderError } from './filter-error-silencer.js'; import type { FilterErrorSilencer } from './filter-error-silencer.js'; import { DEFAULT_APPROVAL_POLICY } from './chain-adapter.js'; @@ -27,7 +27,7 @@ import { ChainRpcTransportError, createRpcTimeoutError } from './chain-rpc-trans import { computeApprovalAction, effectivePublishAllowance, V10_PUBLISH_ONCHAIN_MIN_ALLOWANCE } from './evm-adapter-allowance.js'; import { formatProviderContext } from './evm-adapter-types.js'; import type { ContractCache, EVMAdapterConfig } from './evm-adapter-types.js'; -import { RPC_READ_STALL_TIMEOUT_MS, DEFAULT_RANDOM_SAMPLING_HUB_REFRESH_MS, RPC_BROADCAST_ATTEMPT_TIMEOUT_MS, RPC_RECEIPT_ATTEMPT_TIMEOUT_MS, RPC_RECEIPT_TIMEOUT_MS, RPC_RECEIPT_POLL_INTERVAL_MS, RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS, ADMIN_KEY_PURPOSE, OPERATIONAL_KEY_PURPOSE, PUBLISHER_FUNDING_CACHE_TTL_MS } from './evm-adapter-constants.js'; +import { RPC_READ_STALL_TIMEOUT_MS, DEFAULT_RANDOM_SAMPLING_HUB_REFRESH_MS, RPC_BROADCAST_ATTEMPT_TIMEOUT_MS, RPC_RECEIPT_ATTEMPT_TIMEOUT_MS, RPC_RECEIPT_TIMEOUT_MS, RPC_RECEIPT_POLL_INTERVAL_MS, RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS, RPC_ENDPOINT_SET_RETRIES, RPC_ENDPOINT_SET_RETRY_BACKOFF_MS, ADMIN_KEY_PURPOSE, OPERATIONAL_KEY_PURPOSE, PUBLISHER_FUNDING_CACHE_TTL_MS } from './evm-adapter-constants.js'; /** * Maps a Hub-registered contract name to the function that invalidates @@ -307,6 +307,21 @@ async function contractAddress(contract: Contract): Promise { throw new Error('DKGKnowledgeAssets address is unavailable from the resolved contract handle.'); } +/** + * Failover classifier for CONTRACT VIEW reads (`contractReadWithFailover`'s + * default): the generic `isRetryableRpcError` transient set MINUS `BAD_DATA`. + * A view `BAD_DATA` ("could not decode result data") is a DETERMINISTIC + * client-side decode of an empty / wrong-shape return for the ABI type — not an + * RPC outage — so failing over would re-hit the same decode on every endpoint + * and mask it as `RPC_ENDPOINTS_EXHAUSTED`. The pre-PR FallbackProvider never + * failed over on a post-decode error; this restores that. (Direct provider reads + * — getCode/getBalance/getNetwork — never produce BAD_DATA, so they keep the + * unmodified `isRetryableRpcError`.) + */ +function isContractViewRetryable(err: unknown): boolean { + return isRetryableRpcError(err) && errorCode(err) !== 'BAD_DATA'; +} + export class EVMChainAdapterBase { /** See `ChainAdapter.deploymentId`. */ get deploymentId(): string { @@ -317,7 +332,16 @@ export class EVMChainAdapterBase { readonly chainId: string; - protected readonly provider: JsonRpcProvider | FallbackProvider; + /** + * The bare primary RPC provider (== `primaryProvider`). The nominal runner + * that signers, boot-bound contract handles, and the Hub-rotation event + * subscription bind to — NOT the read-failover surface. Every read reconnects + * to a per-endpoint provider via `readWithFailover`; the `FallbackProvider` + * was removed (see the constructor). Kept as a distinct field name for the + * binding sites; reads must never call `this.provider.()` directly + * (route through `readWithFailover`). + */ + protected readonly provider: JsonRpcProvider; protected readonly primaryProvider: JsonRpcProvider; @@ -597,26 +621,37 @@ export class EVMChainAdapterBase { // (gossip-publish-handler / finalization-handler `verifyOnChain`). Batching // is a transport optimisation only — disabling it is semantically inert and // does not change the number of `eth_getLogs` operations issued. + // Immediate-failover (R1): per-endpoint retries are 0 when ≥2 endpoints are + // configured, so the FIRST retryable failure propagates at once and the + // explicit per-provider failover loops (reads: `readWithFailover`; writes: + // `sendContractTransaction` / broadcast / receipt / the V10 populate loop) + // advance to the next endpoint immediately instead of burning ~7.5s of + // same-endpoint backoff on an endpoint we already know is failing. A + // single-RPC node keeps the bounded `RPC_REQUEST_MAX_RETRIES` retry (its + // only resilience; #894) via the default. See `boundedRetryFetchRequest`. + const perEndpointRetries = this.rpcUrls.length > 1 ? 0 : undefined; this.providers = this.rpcUrls.map( - (url) => new JsonRpcProvider(boundedRetryFetchRequest(url), undefined, { + (url) => new JsonRpcProvider(boundedRetryFetchRequest(url, perEndpointRetries), undefined, { cacheTimeout: -1, polling: true, batchMaxCount: 1, }), ); this.primaryProvider = this.providers[0]; - this.provider = this.providers.length === 1 - ? this.primaryProvider - : new FallbackProvider( - this.providers.map((provider, index) => ({ - provider, - priority: index + 1, - stallTimeout: RPC_READ_STALL_TIMEOUT_MS, - weight: 1, - })), - undefined, - { quorum: 1 }, - ); + // No `FallbackProvider`: reads route through `readWithFailover` over the bare + // `this.providers[]` for TRUE immediate failover. ethers' quorum:1 + // FallbackProvider threw a fast error straight to the caller WITHOUT + // consulting a backup (it advanced only on a ~4s stall) — empirically + // unreliable read failover even with per-endpoint retries > 0 — and its + // sub-providers shared this same `this.providers[]` array, so the + // multi-RPC `retries=0` above would have disabled its staller-based failover + // anyway. Removing it also eliminates the sticky `_lastFatalError` / + // one-shot `#initialSync` latch. `this.provider` is now just the bare + // primary: the nominal runner that signers, boot-bound contract handles, and + // the Hub-rotation event subscription bind to. Every actual READ reconnects + // to the loop provider (`readWithFailover`) and every WRITE reconnects + // per-endpoint explicitly, so this binding is never the failover surface. + this.provider = this.primaryProvider; const providerContext = formatProviderContext(config); // PR-8: install the filter-not-found silencer. Without this, RPC // nodes that GC filters faster than ethers' polling cadence @@ -792,6 +827,128 @@ export class EVMChainAdapterBase { return null; } + /** + * Per-endpoint read-failover primitive (the bare `this.providers[]`, no + * FallbackProvider). Runs `fn` against each provider in turn; on a RETRYABLE + * error advances to the next (host-only `noteRpcFailover` per hop) and, once + * all are exhausted, throws the typed `RPC_ENDPOINTS_EXHAUSTED` (→ bounded + * 503). A NON-retryable error is rethrown AT ONCE (failing over a deterministic + * chain error would only mask it). The default "retryable?" classifier is + * `isRetryableRpcError`; override it via `opts.isRetryable` for reads whose + * error shapes carry domain meaning (a contract view's `BAD_DATA`, + * `getMaxKaNumberForAuthor`'s absent-view). + * + * The per-attempt `withTimeout` is a hard deadline that ABORTS and fails over a + * hung backend: + * - MULTI-RPC: every attempt capped at `RPC_READ_STALL_TIMEOUT_MS` (4s, suits + * a POINT read); a WIDE read (a multi-thousand-block `eth_getLogs`) raises + * it via `opts.multiAttemptTimeoutMs` so a slow-but-healthy scan isn't + * aborted + failed over into a spurious exhaustion. + * - SINGLE-RPC: uncapped (nothing to fail over to; #894) UNLESS + * `opts.attemptTimeoutMs` is given (a hard bound on EVERY attempt incl. + * single-RPC — fail-open funding reads that must not stall selection). + * + * `fn` receives the active provider (`p => p.getCode(addr)`, or for a view + * `p => contract.connect(p).someView(args)`) and MUST be a PURE read — no + * sign / broadcast / WAL — since it may execute on more than one provider. + */ + protected async readWithFailover( + label: string, + fn: (provider: JsonRpcProvider) => Promise, + opts?: { + attemptTimeoutMs?: number; + multiAttemptTimeoutMs?: number; + // Override the "should this error fail over?" classifier (default + // `isRetryableRpcError`). + isRetryable?: (err: unknown) => boolean; + }, + ): Promise { + const isRetryable = opts?.isRetryable ?? isRetryableRpcError; + let lastRetryable: unknown; + for (let i = 0; i < this.providers.length; i += 1) { + const isLast = i === this.providers.length - 1; + // Per-attempt hard deadline (see the method doc — NOT the old FallbackProvider + // stallTimeout, which parallelized a backup rather than aborting a read): + // - MULTI-RPC: cap every attempt — `attemptTimeoutMs` if given, else + // `multiAttemptTimeoutMs` (raised for WIDE log scans so a slow-but-healthy + // getLogs isn't aborted), else the 4s point-read default. + // - SINGLE-RPC: uncapped UNLESS `attemptTimeoutMs` is given (fail-open + // funding reads); `multiAttemptTimeoutMs` never caps single-RPC (#894 — + // nothing to fail over to). + const capMs = opts?.attemptTimeoutMs + ?? (this.providers.length > 1 + ? (opts?.multiAttemptTimeoutMs ?? RPC_READ_STALL_TIMEOUT_MS) + : undefined); + try { + const attempt = fn(this.providers[i]); + return await (capMs == null + ? attempt + : withTimeout(attempt, capMs, `${label} via RPC #${i + 1}`)); + } catch (err) { + if (!isRetryable(err)) throw err; + lastRetryable = err; + if (!isLast) { + noteRpcFailover(label, this.rpcUrls[i], err, this.rpcUrls[i + 1]); + } + } + } + if (lastRetryable) noteRpcExhaustion(label, this.rpcUrls); + // Single provider → carry the typed code but keep the original message + // byte-identical (there is no second endpoint, so the raw message reads + // cleaner and any message-inspecting caller keeps seeing it). Multiple + // providers → the host-only "all endpoints" aggregate (never full URLs — + // a configured rpcUrl may carry an API key and this message can reach HTTP + // clients via response paths that echo err.message). Mirrors the write + // preparation loop's single-vs-multi message handling. + const message = this.providers.length <= 1 + ? errorMessage(lastRetryable) + : `${label} read failed on all configured RPC endpoints ` + + `(${this.rpcUrls.map(rpcHost).join(', ')}): ${errorMessage(lastRetryable)}`; + throw new ChainRpcTransportError('RPC_ENDPOINTS_EXHAUSTED', message, { + cause: lastRetryable, + rpcUrls: this.rpcUrls, + }); + } + + /** + * Rebind a CONTRACT to `runner` (a provider for a view read, or a signer for a + * write populate) for one per-endpoint attempt, leaving the boot-bound + * `this.contracts.*` handle untouched. The `as Contract` recovers the + * dynamic-method index signature ethers' `BaseContract.connect` drops. + */ + protected rebindContract(contract: Contract, runner: JsonRpcProvider | Wallet): Contract { + return contract.connect(runner) as Contract; + } + + /** Rebind a SIGNER to `provider` for one per-endpoint populate+sign attempt. */ + protected rebindSigner(signer: Wallet, provider: JsonRpcProvider): Wallet { + return signer.connect(provider); + } + + /** + * `readWithFailover` for a CONTRACT VIEW read: runs `fn` against `contract` + * rebound to each provider in turn (failover), leaving `this.contracts.*` + * untouched. `fn` MUST be a pure view read. The default failover classifier is + * `isContractViewRetryable` (the transient set MINUS `BAD_DATA`, which on a + * view is a deterministic decode, not an outage, so it is rethrown rather than + * failed over and masked as exhaustion); a caller may pass its own `isRetryable`. + */ + protected contractReadWithFailover( + label: string, + contract: Contract, + fn: (c: Contract) => Promise, + opts?: { + attemptTimeoutMs?: number; + multiAttemptTimeoutMs?: number; + isRetryable?: (err: unknown) => boolean; + }, + ): Promise { + return this.readWithFailover(label, (p) => fn(this.rebindContract(contract, p)), { + ...opts, + isRetryable: opts?.isRetryable ?? isContractViewRetryable, + }); + } + protected async waitForReceiptWithFailover( txHash: string, label: string, @@ -829,19 +986,12 @@ export class EVMChainAdapterBase { } /** - * #888: populate + sign a V10 write tx with one-shot recovery for a - * stale-RPC `TooLowAllowance` revert, shared by BOTH V10 write paths - * (`createKnowledgeAssets` publish and `updateV10` — incl. metadata-only - * updates). ethers estimates gas while populating; on an internally - * load-balanced RPC that estimate can read a stale TRAC allowance and - * revert `TooLowAllowance` even though the approve above succeeded - * (post-approve propagation lag) or was skipped on a stale-high read of an - * allowance the prior write already consumed. This is strictly - * pre-broadcast (before the `onBroadcast` WAL checkpoint), so on that one - * revert we force a fresh approve up to the publish floor — confirming it - * is visible on the same read path via `ensureV10ApproveTrac(force=true)` — - * and retry populate+sign exactly once. Any other error, or a second - * `TooLowAllowance`, is enriched when possible and then propagated. + * #888: populate + sign a V10 write tx (shared by publish + update) with a + * one-shot recovery for a stale-RPC `TooLowAllowance` revert. Gas estimation + * during populate can read a stale TRAC allowance and revert even though the + * approve succeeded; this is strictly pre-broadcast, so on that ONE revert we + * force a fresh approve (`ensureV10ApproveTrac(force=true)`) and retry exactly + * once. Any other error, or a second `TooLowAllowance`, propagates. */ protected async populateAndSignV10WithAllowanceRecovery( signer: Wallet, @@ -852,13 +1002,26 @@ export class EVMChainAdapterBase { tokenAmount: bigint, reapproveLabel: string, ): Promise<{ signedTx: string; txHash: string }> { + // Per-endpoint populate+sign failover lives in the shared + // `populateAndSignAcrossProviders` (so a 429ing primary can't fail-fast the + // publish); the #888 stale-allowance recovery stays a strict ONE-SHOT. OUTER + // (this loop) owns the SINGLE `forcedReapprove` latch + the lone forced + // approve; INNER iterates the bare providers. `TooLowAllowance` is a + // CALL_EXCEPTION (non-retryable), so the inner loop does NOT fail over on it — + // it propagates up here. The latch is never reset per endpoint, so at most + // ONE forced approve fires per publish regardless of endpoints tried. Only the + // one returned signed tx is broadcast; the whole thing runs inside the + // per-wallet `KeyedSerializer` (#953), strictly pre-broadcast / pre-WAL. let forcedReapprove = false; for (;;) { try { - const populated = await (kaContract as any)[method].populateTransaction( - methodParams, + return await this.populateAndSignAcrossProviders( + kaContract, + method, + [methodParams], + signer, + `V10 ${method}`, ); - return await this.signPopulatedTransaction(signer, populated); } catch (err) { enrichEvmError(err); if (!forcedReapprove && isTooLowAllowanceError(err)) { @@ -875,9 +1038,9 @@ export class EVMChainAdapterBase { reapproveLabel, true, ); - continue; + continue; // re-run the WHOLE inner per-provider populate loop, allowance now in place } - throw err; + throw err; // any other error, or a SECOND TooLowAllowance, propagates } } } @@ -887,7 +1050,27 @@ export class EVMChainAdapterBase { txHash: string, label: string, ): Promise { - await this.broadcastSignedTransactionWithFailover(signedTx, txHash, label); + // Bounded set-retry, BROADCAST phase ONLY: after a full per-endpoint + // broadcast pass exhausts with a retryable error (a brief all-endpoints-429), + // re-broadcast the SAME signed tx up to `RPC_ENDPOINT_SET_RETRIES` extra + // passes with a short backoff. tx-safe: this seam is SIGNER-FREE so re-signing + // is structurally impossible, re-broadcasting the byte-identical tx is + // idempotent (`isKnownTransactionError`), and the WAL `onBroadcast` already + // fired once upstream. The receipt wait is NOT re-broadcast (it owns its own + // poll + deadline), so lock-hold (held across the retries for the V10 path) + // stays bounded. + for (let pass = 0; ; pass += 1) { + try { + await this.broadcastSignedTransactionWithFailover(signedTx, txHash, label); + break; + } catch (err) { + if (isRetryableRpcError(err) && pass < RPC_ENDPOINT_SET_RETRIES) { + await sleep(RPC_ENDPOINT_SET_RETRY_BACKOFF_MS); + continue; + } + throw err; + } + } return this.waitForReceiptWithFailover(txHash, label); } @@ -946,31 +1129,34 @@ export class EVMChainAdapterBase { return this.sendSignedTransactionAndWait(signedTx, txHash, label); } - protected async sendContractTransaction( + /** + * Per-endpoint populate+sign loop SHARED by `sendContractTransaction` and the + * V10 publish/update path. Iterates `this.providers[i]` (signer + contract + * rebound to each), populates (gas/nonce/chainId reads, optional OOG-buffer gas + * estimate) + signs, and returns the FIRST successful `{signedTx,txHash}`. + * Advances ONLY on `isRetryableRpcError`; a non-retryable error (a decoded + * revert — e.g. `TooLowAllowance`) propagates AT ONCE so the caller can react. + * Exhaustion → typed `RPC_ENDPOINTS_EXHAUSTED`. + * + * STRICTLY pre-broadcast: signs once on the winning provider, does NOT broadcast + * or fire the WAL — the caller broadcasts the single returned tx. This keeps the + * WAL split intact (onBroadcast between sign and broadcast), so the V10 path + * reuses THIS helper rather than `sendContractTransaction` (which broadcasts + * internally). + */ + protected async populateAndSignAcrossProviders( contract: Contract, method: string, args: readonly unknown[], signer: Wallet, label: string, - // Optional gas headroom for methods whose on-chain gas cost depends on - // per-block randomness. ethers fills `gasLimit` from a single - // `eth_estimateGas` with NO margin, but that estimate runs against the - // CURRENT block while the tx is mined in a LATER block with different - // `prevrandao`/`blockhash`/`timestamp`. If the mined block's entropy - // drives a more expensive code path than the estimate's, the tx runs - // out of gas and reverts with empty (`0x`) data. `RandomSampling.createChallenge` - // is exactly this case (weighted CG draw + historical blockhash access): - // observed estimate-vs-execution spread is small here but unbounded in - // production with many CGs/KCs. When set, we estimate once and inflate - // the limit by `gasLimitBufferBps` basis points so the drift can't OOG. opts?: { gasLimitBufferBps?: number }, - ): Promise { + ): Promise<{ signedTx: string; txHash: string }> { let lastRetryable: unknown; for (let i = 0; i < this.providers.length; i += 1) { - const rpcSigner = signer.connect(this.providers[i]); - let prepared: { signedTx: string; txHash: string } | undefined; + const rpcSigner = this.rebindSigner(signer, this.providers[i]); try { - const connected = contract.connect(rpcSigner) as any; + const connected = this.rebindContract(contract, rpcSigner) as any; const populated = await withTimeout( connected[method].populateTransaction(...args) as Promise, RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS, @@ -986,20 +1172,15 @@ export class EVMChainAdapterBase { populated.gasLimit = (est * BigInt(10_000 + opts.gasLimitBufferBps)) / 10_000n; } catch (estErr) { // A RETRYABLE estimate failure must not silently drop the OOG - // headroom: if another RPC is left, re-throw so the outer loop - // fails over to it (it may estimate fine and apply the buffer). - // Swallowing here would sign against the failing provider with no - // headroom and could reintroduce the exact OOG this guards - // against (Codex review). Only on the LAST provider — or for a - // non-retryable estimate error, where failover can't help — do we - // fall back to ethers' own unbuffered estimate during signing. + // headroom: if another RPC is left, re-throw so the loop fails over + // to it (it may estimate fine and apply the buffer). Only on the LAST + // provider — or for a non-retryable estimate error, where failover + // can't help — fall back to ethers' own unbuffered estimate during + // signing, leaving a breadcrumb so a recurring OOG isn't a mystery. const hasMoreProviders = i < this.providers.length - 1; if (isRetryableRpcError(estErr) && hasMoreProviders) { throw estErr; } - // Best-effort fallback, but DON'T swallow silently: leave a - // breadcrumb that the headroom was never applied so a recurring - // intermittent OOG isn't a mystery. console.warn( `[chain] ${label}: buffered gas estimation failed; falling back to ` + `ethers' unbuffered estimate (no OOG headroom applied): ` + @@ -1007,7 +1188,7 @@ export class EVMChainAdapterBase { ); } } - prepared = await withTimeout( + return await withTimeout( this.signPopulatedTransaction(rpcSigner, populated), RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS, `${label} transaction signing via RPC #${i + 1}`, @@ -1018,31 +1199,19 @@ export class EVMChainAdapterBase { if (i < this.providers.length - 1) { noteRpcFailover(`${label} preparation`, this.rpcUrls[i], err, this.rpcUrls[i + 1]); } - continue; } - if (!prepared) continue; - return this.sendSignedTransactionAndWait(prepared.signedTx, prepared.txHash, label); } if (lastRetryable) noteRpcExhaustion(`${label} preparation`, this.rpcUrls); - // A retryable error from the only configured RPC is still an "endpoints - // exhausted" condition: downstream classifiers (e.g. - // `/api/context-graph/register` → `classifyRegisterContextGraphError`) - // key the transient-outage 503 off the `RPC_ENDPOINTS_EXHAUSTED` code, so - // the code MUST be present even for a single-provider adapter (Codex - // PR #901). What we must NOT do for one provider is REWRITE the - // `.message` into the multi-endpoint "failed on all endpoints (url1, - // url2): ..." aggregate — there is no second endpoint, so the original - // message (e.g. a plain `connect ECONNREFUSED`) reads cleaner and any - // message-inspecting caller keeps seeing it verbatim. So: single provider - // → carry the code on a new error but keep the message byte-identical; - // multiple providers → the aggregated "all endpoints" message is - // meaningful and is asserted by evm-adapter.unit.test.ts. + // Single provider → carry the code on a new error but keep the message + // byte-identical (no second endpoint, so the raw message reads cleaner and + // any message-inspecting caller keeps seeing it). Multiple providers → the + // HOST-ONLY aggregate (never full URLs — a configured rpcUrl may carry an API + // key and this message reaches HTTP clients via response paths that echo + // err.message, e.g. the create+publish 207 tail). Asserted by + // evm-adapter.unit.test.ts. const message = this.providers.length <= 1 ? errorMessage(lastRetryable) : `${label} transaction preparation failed on all configured RPC endpoints ` + - // HOST-ONLY: a configured rpcUrl may carry an API key and this message - // is surfaced to HTTP clients via response paths that echo err.message - // (e.g. the create+publish 207 tail), so never embed full RPC URLs. `(${this.rpcUrls.map(rpcHost).join(', ')}): ${errorMessage(lastRetryable)}`; throw new ChainRpcTransportError('RPC_ENDPOINTS_EXHAUSTED', message, { cause: lastRetryable, @@ -1050,6 +1219,34 @@ export class EVMChainAdapterBase { }); } + protected async sendContractTransaction( + contract: Contract, + method: string, + args: readonly unknown[], + signer: Wallet, + label: string, + // Optional gas headroom for methods whose on-chain gas cost depends on + // per-block randomness. ethers fills `gasLimit` from a single + // `eth_estimateGas` with NO margin, but that estimate runs against the + // CURRENT block while the tx is mined in a LATER block with different + // `prevrandao`/`blockhash`/`timestamp`. If the mined block's entropy + // drives a more expensive code path than the estimate's, the tx runs + // out of gas and reverts with empty (`0x`) data. `RandomSampling.createChallenge` + // is exactly this case (weighted CG draw + historical blockhash access): + // observed estimate-vs-execution spread is small here but unbounded in + // production with many CGs/KCs. When set, we estimate once and inflate + // the limit by `gasLimitBufferBps` basis points so the drift can't OOG. + opts?: { gasLimitBufferBps?: number }, + ): Promise { + // Populate+sign with per-endpoint failover (shared with the V10 path), then + // broadcast+confirm the single signed tx. Split so `onBroadcast` (the WAL + // checkpoint) can sit between sign and broadcast for the V10 callers. + const { signedTx, txHash } = await this.populateAndSignAcrossProviders( + contract, method, args, signer, label, opts, + ); + return this.sendSignedTransactionAndWait(signedTx, txHash, label); + } + /** * V10 approval gate shared by `publishV10` and `updateV10`. * @@ -1081,9 +1278,10 @@ export class EVMChainAdapterBase { ): Promise { if (!this.contracts.token) return; const tokenWithSigner = this.contracts.token.connect(signer) as Contract; - const currentAllowance: bigint = await tokenWithSigner.allowance( - signer.address, - kav10Address, + const currentAllowance: bigint = await this.contractReadWithFailover( + 'token.allowance', + tokenWithSigner, + (c) => c.allowance(signer.address, kav10Address), ); const { needsApprove, targetAllowance } = computeApprovalAction( this.approvalPolicy, @@ -1168,10 +1366,11 @@ export class EVMChainAdapterBase { // recovery poll indefinitely. `withTimeout` rejects after // `RPC_READ_STALL_TIMEOUT_MS`, which the catch below treats as a // not-yet-visible read and backs off (same as a thrown read error). - current = (await withTimeout( - token.allowance(owner, spender), - RPC_READ_STALL_TIMEOUT_MS, + current = (await this.contractReadWithFailover( 'allowance visibility poll', + token, + (c) => c.allowance(owner, spender), + { attemptTimeoutMs: RPC_READ_STALL_TIMEOUT_MS }, )) as bigint; } catch { // Transient read failure / stall timeout — treat as not-yet-visible @@ -1223,7 +1422,10 @@ export class EVMChainAdapterBase { } else { authorized = []; for (const signer of ordered) { - if (await this.contracts.contextGraphs.isAuthorizedPublisher(contextGraphId, signer.address)) { + if (await this.contractReadWithFailover( + 'contextGraphs.isAuthorizedPublisher', this.contracts.contextGraphs, + (c) => c.isAuthorizedPublisher(contextGraphId, signer.address), + )) { authorized.push(signer); } } @@ -1371,10 +1573,12 @@ export class EVMChainAdapterBase { private async readNativeBalance(address: string): Promise { try { - return await withTimeout( - this.provider.getBalance(address), - RPC_READ_STALL_TIMEOUT_MS, + return await this.readWithFailover( 'publish wallet native balance', + (p) => p.getBalance(address), + // Fail-open funding read: keep a HARD per-attempt cap even on the + // last / single provider so a hung RPC can't stall wallet selection. + { attemptTimeoutMs: RPC_READ_STALL_TIMEOUT_MS }, ); } catch { return null; @@ -1385,10 +1589,9 @@ export class EVMChainAdapterBase { const token = this.contracts.token; if (!token) return null; // no token contract: TRAC does not gate selection try { - return (await withTimeout( - token.balanceOf(address), - RPC_READ_STALL_TIMEOUT_MS, - 'publish wallet TRAC balance', + return (await this.contractReadWithFailover( + 'token.balanceOf', token, (c) => c.balanceOf(address), + { attemptTimeoutMs: RPC_READ_STALL_TIMEOUT_MS }, )) as bigint; } catch { return null; @@ -1467,7 +1670,10 @@ export class EVMChainAdapterBase { // No ContextGraphs surface ⇒ every operational wallet is a candidate // (mirrors nextAuthorizedSigner); otherwise only authorized wallets are // viable reroutes. - if (contextGraphs && !(await contextGraphs.isAuthorizedPublisher(contextGraphId, s.address))) return false; + if (contextGraphs && !(await this.contractReadWithFailover( + 'contextGraphs.isAuthorizedPublisher', contextGraphs, + (c) => c.isAuthorizedPublisher(contextGraphId, s.address), + ))) return false; return this.isWalletPublishFundable(s.address, await this.getWalletFunding(s.address), requiredTracWei); }), ); @@ -1534,10 +1740,9 @@ export class EVMChainAdapterBase { identityId: bigint, address: string, ): Promise { - return identityStorage.keyHasPurpose( - identityId, - this.walletKeyHash(address), - ADMIN_KEY_PURPOSE, + return this.contractReadWithFailover( + 'identityStorage.keyHasPurpose', identityStorage, + (c) => c.keyHasPurpose(identityId, this.walletKeyHash(address), ADMIN_KEY_PURPOSE), ); } @@ -1546,10 +1751,9 @@ export class EVMChainAdapterBase { identityId: bigint, address: string, ): Promise { - return identityStorage.keyHasPurpose( - identityId, - this.walletKeyHash(address), - OPERATIONAL_KEY_PURPOSE, + return this.contractReadWithFailover( + 'identityStorage.keyHasPurpose', identityStorage, + (c) => c.keyHasPurpose(identityId, this.walletKeyHash(address), OPERATIONAL_KEY_PURPOSE), ); } @@ -1562,7 +1766,11 @@ export class EVMChainAdapterBase { protected async resolveContract(name: string, abiName?: string): Promise { let address: string; try { - address = await this.contracts.hub.getContractAddress(name); + address = await this.contractReadWithFailover( + `Hub.getContractAddress(${name})`, + this.contracts.hub, + (c) => c.getContractAddress(name), + ); } catch (err) { if (this.isContractMissingRevert(err)) { throw new Error(`Contract "${name}" not found in Hub at ${this.hubAddress}`, { cause: err }); @@ -1578,7 +1786,11 @@ export class EVMChainAdapterBase { protected async resolveAssetStorage(name: string, abiName?: string): Promise { let address: string; try { - address = await this.contracts.hub.getAssetStorageAddress(name); + address = await this.contractReadWithFailover( + `Hub.getAssetStorageAddress(${name})`, + this.contracts.hub, + (c) => c.getAssetStorageAddress(name), + ); } catch (err) { if (this.isContractMissingRevert(err)) { throw new Error(`Asset storage "${name}" not found in Hub at ${this.hubAddress}`, { cause: err }); @@ -1721,7 +1933,11 @@ export class EVMChainAdapterBase { await this.startHubRotationListener(); - const tokenAddress: string = this.tokenAddress ?? await this.contracts.hub.getContractAddress('Token'); + const tokenAddress: string = this.tokenAddress ?? await this.contractReadWithFailover( + 'Hub.getContractAddress(Token)', + this.contracts.hub, + (c) => c.getContractAddress('Token'), + ); if (tokenAddress !== ethers.ZeroAddress) { this.contracts.token = new Contract( tokenAddress, @@ -1747,7 +1963,7 @@ export class EVMChainAdapterBase { } protected async getBlockTimestamp(blockNumber: number): Promise { - const block = await this.provider.getBlock(blockNumber); + const block = await this.readWithFailover('getBlock', (p) => p.getBlock(blockNumber)); return block?.timestamp ?? 0; } @@ -1758,7 +1974,9 @@ export class EVMChainAdapterBase { async getIdentityId(): Promise { await this.init(); const identityStorage = await this.getIdentityStorage(); - const id: bigint = await identityStorage.getIdentityId(this.signer.address); + const id: bigint = await this.contractReadWithFailover( + 'identityStorage.getIdentityId', identityStorage, (c) => c.getIdentityId(this.signer.address), + ); return id; } @@ -1822,27 +2040,43 @@ export class EVMChainAdapterBase { const getMax = (storage as any).getMaxKaNumberForAuthor; if (typeof getMax?.staticCall === 'function') { try { - const max = await getMax.staticCall(normalized); + // Route through `readWithFailover` (which gives the per-attempt stall + // timeout + endpoint failover for free) with a CUSTOM classifier: the + // absent-view shapes (`BAD_DATA` empty-`0x`, bare `CALL_EXCEPTION`) are + // DETERMINISTIC across endpoints and mean "pre-10.0.4 contract lacks the + // selector", so they are NON-retryable here — `readWithFailover` rethrows + // them straight to the catch below (→ scan / bytecode-confirm) instead of + // failing over and masking them as `RPC_ENDPOINTS_EXHAUSTED`. ONLY a + // genuine transient advances to the next endpoint. `isKaHighWaterViewUnavailable` + // runs FIRST (it enriches the error) so the ordering invariant the catch + // below also relies on is preserved. + const max = await this.readWithFailover( + 'DKGKnowledgeAssets.getMaxKaNumberForAuthor', + (p) => this.rebindContract(storage, p).getMaxKaNumberForAuthor.staticCall(normalized), + { + isRetryable: (err) => + isRetryableRpcError(err) + && !isKaHighWaterViewUnavailable(err) + && !isKaHighWaterBareRevert(err), + }, + ); return BigInt(max); } catch (err) { - // Ordering invariant: isKaHighWaterViewUnavailable runs first and calls - // enrichEvmError(err), which only rewrites a message carrying decodable - // `data=0x…` + the literal "unknown custom error" — neither present on a - // bare "missing revert data" (data=null) error — so it leaves the shape - // isKaHighWaterBareRevert keys on untouched. Preserve that if - // enrichEvmError's rewrite rules change. if (isKaHighWaterViewUnavailable(err)) { // Unambiguous absent-view shape → fall through to the bounded scan. } else if (isKaHighWaterBareRevert(err)) { bareRevert = err; // confirm against the deployed bytecode below } else { - throw err; // transient RPC / decoded revert → never crawl + throw err; // transient exhaustion / decoded revert → never crawl } } } const storageAddress = await contractAddress(storage); - const code = await this.provider.getCode(storageAddress); + const code = await this.readWithFailover( + 'DKGKnowledgeAssets getCode', + (p) => p.getCode(storageAddress), + ); if (!code || code === '0x') { throw new Error(`DKGKnowledgeAssets resolved to ${storageAddress}, but no contract code is deployed there.`); } @@ -2246,7 +2480,7 @@ export class EVMChainAdapterBase { if (EVMChainAdapterBase.preflightCacheFresh(this.cachedChainId, now)) { return this.cachedChainId!.value; } - const network = await this.provider.getNetwork(); + const network = await this.readWithFailover('getNetwork (chainId)', (p) => p.getNetwork()); this.cachedChainId = { value: network.chainId, cachedAt: now }; return network.chainId; } @@ -2263,7 +2497,7 @@ export class EVMChainAdapterBase { */ async hasContractCode(address: string): Promise { try { - const code = await this.provider.getCode(address); + const code = await this.readWithFailover('hasContractCode getCode', (p) => p.getCode(address)); return code !== undefined && code !== null && code !== '0x' && code.length > 2; } catch { return false; @@ -2305,9 +2539,9 @@ export class EVMChainAdapterBase { ); } if (this.contracts.contextGraphs) { - const authorized = await this.contracts.contextGraphs.isAuthorizedPublisher( - params.contextGraphId, - selected.address, + const authorized = await this.contractReadWithFailover( + 'contextGraphs.isAuthorizedPublisher', this.contracts.contextGraphs, + (c) => c.isAuthorizedPublisher(params.contextGraphId, selected.address), ); if (!authorized) { throw new Error( @@ -2590,14 +2824,22 @@ export class EVMChainAdapterBase { } async getBlockNumber(): Promise { - return this.provider.getBlockNumber(); + return this.readWithFailover('getBlockNumber', (p) => p.getBlockNumber()); } getProvider(): JsonRpcProvider { return this.primaryProvider; } - getReadProvider(): JsonRpcProvider | FallbackProvider { + /** + * @deprecated Returns the bare PRIMARY provider, which does NOT fail over: the + * ethers `FallbackProvider` was removed and reads now route through the + * adapter's own read methods (`readWithFailover` over `this.providers[]`). Call + * those read methods instead, or `getProvider()` if you explicitly want the + * bare primary. Retained only for backward compatibility — this adapter is a + * published export, so removing a public method would be a breaking change. + */ + getReadProvider(): JsonRpcProvider { return this.provider; } diff --git a/packages/chain/src/evm-adapter-constants.ts b/packages/chain/src/evm-adapter-constants.ts index aadf3e7b7..4cd9ecf8b 100644 --- a/packages/chain/src/evm-adapter-constants.ts +++ b/packages/chain/src/evm-adapter-constants.ts @@ -44,6 +44,21 @@ export const MAX_PROBE_AGE_MS = 30_000; export const RPC_READ_STALL_TIMEOUT_MS = 4_000; +/** + * Per-attempt deadline for WIDE `eth_getLogs` reads (the `evm-adapter-events.ts` + * `queryFilter` scans, which run over `[fromBlock ?? 0, toBlock]` — up to the + * event poller's 9,000-block window, and the full chain on a cold-start + * backfill). These legitimately take tens of seconds on a busy/slow chain, so + * the 4s `RPC_READ_STALL_TIMEOUT_MS` point-read cap would abort a healthy scan + * and fail it over across every endpoint → spurious `RPC_ENDPOINTS_EXHAUSTED` + * (which, in the poller, escapes before the cursor advances → a permanent + * stall). 30s still hard-bounds a genuinely hung backend on a multi-RPC node; + * it is passed as `multiAttemptTimeoutMs`, so single-RPC stays uncapped (#894). + * Larger than `KA_HIGH_WATER_PAGE_TIMEOUT_MS` (15s) because that bounds smaller + * 2,000-block pages, whereas this covers the wider 9,000-block poller window. + */ +export const RPC_LOG_SCAN_TIMEOUT_MS = 30_000; + export const RPC_TRANSACTION_POPULATION_ATTEMPT_TIMEOUT_MS = 10_000; export const RPC_BROADCAST_ATTEMPT_TIMEOUT_MS = 10_000; @@ -54,6 +69,23 @@ export const RPC_RECEIPT_POLL_INTERVAL_MS = 2_000; export const RPC_RECEIPT_TIMEOUT_MS = 180_000; +/** + * Bounded "retry the whole endpoint set" for the BROADCAST phase (S2). After a + * full per-endpoint broadcast pass exhausts with a retryable error (e.g. a brief + * window where ALL endpoints 429), `sendSignedTransactionAndWait` re-broadcasts + * the SAME already-signed/already-WAL-checkpointed tx up to this many extra full + * passes, with `RPC_ENDPOINT_SET_RETRY_BACKOFF_MS` between passes, before + * surfacing `RPC_ENDPOINTS_EXHAUSTED`. Default `1` (one extra pass) keeps the + * "ride out a brief all-down blip" property without per-endpoint latency; `0` + * fails fast on the first full-pass exhaustion. Re-broadcasting the identical + * signed tx is idempotent (`isKnownTransactionError`), so this cannot double- + * submit or change the nonce — it is scoped to broadcast ONLY (receipt waiting + * owns its own deadline), so total lock-hold stays bounded. + */ +export const RPC_ENDPOINT_SET_RETRIES = 1; + +export const RPC_ENDPOINT_SET_RETRY_BACKOFF_MS = 500; + export const ADMIN_KEY_PURPOSE = 1; export const OPERATIONAL_KEY_PURPOSE = 2; diff --git a/packages/chain/src/evm-adapter-context-graph.ts b/packages/chain/src/evm-adapter-context-graph.ts index 4c2e3b1a3..f2e20a349 100644 --- a/packages/chain/src/evm-adapter-context-graph.ts +++ b/packages/chain/src/evm-adapter-context-graph.ts @@ -238,7 +238,9 @@ export class ContextGraphMethods extends EVMChainAdapterBase { async isContextGraphActiveOnChain(contextGraphId: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - return Boolean(await cgs.isContextGraphActive(contextGraphId)); + return Boolean(await this.contractReadWithFailover( + 'cgStorage.isContextGraphActive', cgs, (c) => c.isContextGraphActive(contextGraphId), + )); } async createOnChainContextGraph(params: CreateOnChainContextGraphParams): Promise { @@ -470,7 +472,7 @@ export class ContextGraphMethods extends EVMChainAdapterBase { // Unreachable below (kept for type-completeness until the mirror is removed); // the unsupported-mirror guard above throws before any on-chain side effect. - const v10ChainId = (await this.provider.getNetwork()).chainId; + const v10ChainId = (await this.readWithFailover('getNetwork (chainId)', (p) => p.getNetwork())).chainId; const v10KavAddress = await this.contracts.knowledgeAssetsLifecycle!.getAddress(); const authorTypedData = buildAuthorAttestationTypedData({ chainId: v10ChainId, @@ -514,21 +516,27 @@ export class ContextGraphMethods extends EVMChainAdapterBase { async getKAContextGraphId(kaId: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - const cgId: bigint = await cgs.kaToContextGraph(kaId); + const cgId: bigint = await this.contractReadWithFailover( + 'cgStorage.kaToContextGraph', cgs, (c) => c.kaToContextGraph(kaId), + ); return BigInt(cgId); } async getContextGraphKCCount(contextGraphId: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - const count: bigint = await cgs.getContextGraphKaCount(contextGraphId); + const count: bigint = await this.contractReadWithFailover( + 'cgStorage.getContextGraphKaCount', cgs, (c) => c.getContextGraphKaCount(contextGraphId), + ); return BigInt(count); } async getContextGraphKCAt(contextGraphId: bigint, index: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - const kaId: bigint = await cgs.getContextGraphKaAt(contextGraphId, index); + const kaId: bigint = await this.contractReadWithFailover( + 'cgStorage.getContextGraphKaAt', cgs, (c) => c.getContextGraphKaAt(contextGraphId, index), + ); return BigInt(kaId); } @@ -544,11 +552,15 @@ export class ContextGraphMethods extends EVMChainAdapterBase { await this.init(); const cgs = this.requireContextGraphStorage(); try { - const raw: bigint = BigInt(await cgs.getAccessPolicy(contextGraphId)); + const raw: bigint = BigInt(await this.contractReadWithFailover( + 'cgStorage.getAccessPolicy', cgs, (c) => c.getAccessPolicy(contextGraphId), + )); return Number(raw); } catch (primaryErr) { try { - const cg = await cgs.getContextGraph(contextGraphId); + const cg = await this.contractReadWithFailover( + 'cgStorage.getContextGraph', cgs, (c) => c.getContextGraph(contextGraphId), + ); const raw = cg?.accessPolicy ?? (Array.isArray(cg) ? cg[5] : undefined); @@ -581,7 +593,9 @@ export class ContextGraphMethods extends EVMChainAdapterBase { }> { await this.init(); const cgs = this.requireContextGraphStorage(); - const result = await cgs.getPublishPolicy(contextGraphId); + const result = await this.contractReadWithFailover( + 'cgStorage.getPublishPolicy', cgs, (c) => c.getPublishPolicy(contextGraphId), + ); // Ethers v6 returns named tuple as both array and object access; // destructure positionally to stay robust against ABI naming // changes. @@ -609,7 +623,9 @@ export class ContextGraphMethods extends EVMChainAdapterBase { async getContextGraphParticipantAgents(contextGraphId: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - const raw: string[] = await cgs.getParticipantAgents(contextGraphId); + const raw: string[] = await this.contractReadWithFailover( + 'cgStorage.getParticipantAgents', cgs, (c) => c.getParticipantAgents(contextGraphId), + ); return raw.map((addr: string) => ethers.getAddress(addr)); } @@ -633,7 +649,9 @@ export class ContextGraphMethods extends EVMChainAdapterBase { async getContextGraphNameHash(contextGraphId: bigint): Promise { await this.init(); const cgs = this.requireContextGraphStorage(); - const raw: string = await cgs.getNameHash(contextGraphId); + const raw: string = await this.contractReadWithFailover( + 'cgStorage.getNameHash', cgs, (c) => c.getNameHash(contextGraphId), + ); if (!raw || raw === ethers.ZeroHash) return null; return raw.toLowerCase(); } diff --git a/packages/chain/src/evm-adapter-conviction.ts b/packages/chain/src/evm-adapter-conviction.ts index c85f41e27..491cb4f08 100644 --- a/packages/chain/src/evm-adapter-conviction.ts +++ b/packages/chain/src/evm-adapter-conviction.ts @@ -39,7 +39,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction if (!this.contracts.dkgPublishingConvictionNFT) return 0n; if (!ethers.isAddress(agent)) return 0n; try { - const id: bigint = await this.contracts.dkgPublishingConvictionNFT.agentToAccountId(agent); + const id: bigint = await this.contractReadWithFailover( + 'pcaNFT.agentToAccountId', this.contracts.dkgPublishingConvictionNFT, (c) => c.agentToAccountId(agent), + ); return BigInt(id); } catch (err: any) { if (err?.code === 'CALL_EXCEPTION') return 0n; @@ -56,7 +58,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction // (committedTRAC, createdAtEpoch, expiresAtEpoch, createdAtTimestamp, // expiresAtTimestamp, lockDurationEpochs, discountBps, // lastSettledWindow, fullySwept). Pull index 5. - const tuple = await this.contracts.dkgPublishingConvictionNFT.accounts(accountId); + const tuple = await this.contractReadWithFailover( + 'pcaNFT.accounts', this.contracts.dkgPublishingConvictionNFT, (c) => c.accounts(accountId), + ); const lock = tuple[5]; return Number(lock); } catch (err: any) { @@ -110,7 +114,7 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction // wall clock first to mirror the contract exactly — otherwise the SDK // would coerce, then fall through to full-price direct spend. if (info.expiresAtTimestamp > 0) { - const latestBlock = await this.provider.getBlock('latest'); + const latestBlock = await this.readWithFailover('conviction getBlock', (p) => p.getBlock('latest')); const nowTs = latestBlock ? Number(latestBlock.timestamp) : Math.floor(Date.now() / 1000); if (nowTs >= info.expiresAtTimestamp) return false; } @@ -124,10 +128,12 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction if (!this.contracts.chronos) { this.contracts.chronos = await this.resolveContract('Chronos'); } - const currentEpoch: bigint = BigInt(await this.contracts.chronos.getCurrentEpoch()); - const remaining: bigint = await this.contracts.dkgPublishingConvictionNFT.getRemainingAllowance( - accountId, - currentEpoch, + const currentEpoch: bigint = BigInt(await this.contractReadWithFailover( + 'chronos.getCurrentEpoch', this.contracts.chronos, (c) => c.getCurrentEpoch(), + )); + const remaining: bigint = await this.contractReadWithFailover( + 'pcaNFT.getRemainingAllowance', this.contracts.dkgPublishingConvictionNFT, + (c) => c.getRemainingAllowance(accountId, currentEpoch), ); return BigInt(remaining) >= discountedCost; } catch (err: any) { @@ -139,7 +145,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction async getPublishingConvictionAccountOwner(accountId: bigint): Promise { await this.init(); const nft = await this.resolveContract('DKGPublishingConvictionNFT'); - const owner = await nft.ownerOf(accountId); + const owner = await this.contractReadWithFailover( + 'pcaNFT.ownerOf', nft, (c) => c.ownerOf(accountId), + ); return ethers.getAddress(owner); } @@ -203,7 +211,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction // createAccount() does transferFrom(msg.sender → stakingStorage, // committedTRAC) — the signer must allow the NFT to pull the TRAC. if (this.contracts.token) { - const allowance: bigint = await this.contracts.token.allowance(this.signer.address, nftAddress); + const allowance: bigint = await this.contractReadWithFailover( + 'token.allowance', this.contracts.token, (c) => c.allowance(this.signer.address, nftAddress), + ); if (allowance < committedTRAC) { await this.sendContractTransaction( this.contracts.token, @@ -259,7 +269,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction // for a genuine account-missing revert so the route can disambiguate. if (!this.contracts.dkgPublishingConvictionNFT) throw new PcaUnavailableError(); try { - const t = await this.contracts.dkgPublishingConvictionNFT.getAccountInfo(accountId); + const t = await this.contractReadWithFailover( + 'pcaNFT.getAccountInfo', this.contracts.dkgPublishingConvictionNFT, (c) => c.getAccountInfo(accountId), + ); return { owner: ethers.getAddress(t[0]), committedTRAC: BigInt(t[1]), @@ -286,7 +298,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction const nft = this.requireConvictionNFT(); const nftAddress = await nft.getAddress(); if (this.contracts.token) { - const allowance: bigint = await this.contracts.token.allowance(this.signer.address, nftAddress); + const allowance: bigint = await this.contractReadWithFailover( + 'token.allowance', this.contracts.token, (c) => c.allowance(this.signer.address, nftAddress), + ); if (allowance < amount) { await this.sendContractTransaction( this.contracts.token, @@ -358,7 +372,9 @@ export class ConvictionMethods extends EVMChainAdapterBase implements Conviction if (!this.contracts.dkgPublishingConvictionNFT) return false; if (!ethers.isAddress(agent)) return false; try { - return Boolean(await this.contracts.dkgPublishingConvictionNFT.isAgent(accountId, agent)); + return Boolean(await this.contractReadWithFailover( + 'pcaNFT.isAgent', this.contracts.dkgPublishingConvictionNFT, (c) => c.isAgent(accountId, agent), + )); } catch (err: any) { if (err?.code === 'CALL_EXCEPTION') return false; throw err; diff --git a/packages/chain/src/evm-adapter-events.ts b/packages/chain/src/evm-adapter-events.ts index 369175373..fd64ad4a7 100644 --- a/packages/chain/src/evm-adapter-events.ts +++ b/packages/chain/src/evm-adapter-events.ts @@ -12,12 +12,40 @@ import { EVMChainAdapterBase } from './evm-adapter-base.js'; import { ethers } from 'ethers'; import type { EventFilter, ChainEvent } from './chain-adapter.js'; +import { RPC_LOG_SCAN_TIMEOUT_MS } from './evm-adapter-constants.js'; + +// Wide `eth_getLogs` reads (over `[fromBlock ?? 0, toBlock]`, up to the poller's +// 9,000-block window / a cold-start full backfill) legitimately exceed the 4s +// point-read cap, so they fail over only after RPC_LOG_SCAN_TIMEOUT_MS on a +// multi-RPC node (single-RPC stays uncapped, #894). Without this a slow-but- +// healthy scan would abort, exhaust every endpoint, and stall the event poller. +const LOG_SCAN_OPTS = { multiAttemptTimeoutMs: RPC_LOG_SCAN_TIMEOUT_MS } as const; export class EventsMethods extends EVMChainAdapterBase { // ===================================================================== // Events // ===================================================================== + /** + * A WIDE `eth_getLogs` scan with read-failover, baking in `LOG_SCAN_OPTS` so + * the wide-log multi-RPC timeout is owned HERE once (not by per-call-site + * discipline). Used by every `listenForEvents` branch below. + */ + private queryFilterWithFailover( + contract: ethers.Contract, + label: string, + eventFilter: ethers.ContractEventName, + fromBlock: ethers.BlockTag, + toBlock?: ethers.BlockTag, + ): Promise<(ethers.Log | ethers.EventLog)[]> { + return this.contractReadWithFailover( + label, + contract, + (c) => c.queryFilter(eventFilter, fromBlock, toBlock), + LOG_SCAN_OPTS, + ); + } + async *listenForEvents(filter: EventFilter): AsyncIterable { await this.init(); @@ -31,7 +59,9 @@ export class EventsMethods extends EVMChainAdapterBase { continue; } const eventFilter = storage.filters.KnowledgeBatchCreated(); - const logs = await storage.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.queryFilterWithFailover( + storage, 'kasV9.queryFilter(KnowledgeBatchCreated)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, + ); for (const log of logs) { const parsed = storage.interface.parseLog({ topics: [...log.topics], data: log.data }); @@ -58,7 +88,9 @@ export class EventsMethods extends EVMChainAdapterBase { const cgStorage = this.contracts.contextGraphStorage; if (cgStorage) { const eventFilter = cgStorage.filters.ContextGraphExpanded(); - const logs = await cgStorage.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.queryFilterWithFailover( + cgStorage, 'cgStorage.queryFilter(ContextGraphExpanded)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, + ); for (const log of logs) { const parsed = cgStorage.interface.parseLog({ topics: [...log.topics], data: log.data }); @@ -86,7 +118,9 @@ export class EventsMethods extends EVMChainAdapterBase { const cgStorage = this.contracts.contextGraphStorage; if (cgStorage) { const eventFilter = cgStorage.filters.KnowledgeAssetRegisteredToContextGraph(); - const logs = await cgStorage.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.queryFilterWithFailover( + cgStorage, 'cgStorage.queryFilter(KnowledgeAssetRegisteredToContextGraph)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, + ); for (const log of logs) { const parsed = cgStorage.interface.parseLog({ topics: [...log.topics], data: log.data }); @@ -133,7 +167,9 @@ export class EventsMethods extends EVMChainAdapterBase { : 'KnowledgeAssetCreated'; const kcFilter = kaStorage.filters[createEventName](); - const kcLogs = await kaStorage.queryFilter(kcFilter, fromB, toB); + const kcLogs = await this.queryFilterWithFailover( + kaStorage, 'kas.queryFilter(KnowledgeAssetCreated)', kcFilter, fromB, toB, + ); // Legacy mint range. `KnowledgeAssetsMinted` is still declared on the // greenfield ABI but never emitted by `createKnowledgeAsset`, so @@ -142,7 +178,9 @@ export class EventsMethods extends EVMChainAdapterBase { const mintByTx = new Map(); if (hasEvent('KnowledgeAssetsMinted')) { const mintFilter = kaStorage.filters.KnowledgeAssetsMinted(); - const mintLogs = await kaStorage.queryFilter(mintFilter, fromB, toB); + const mintLogs = await this.queryFilterWithFailover( + kaStorage, 'kas.queryFilter(KnowledgeAssetsMinted)', mintFilter, fromB, toB, + ); for (const ml of mintLogs) { const mp = kaStorage.interface.parseLog({ topics: [...ml.topics], data: ml.data }); if (mp) { @@ -164,7 +202,9 @@ export class EventsMethods extends EVMChainAdapterBase { if (isGreenfield) { try { const transferFilter = kaStorage.filters.Transfer(ethers.ZeroAddress); - const transferLogs = await kaStorage.queryFilter(transferFilter, fromB, toB); + const transferLogs = await this.queryFilterWithFailover( + kaStorage, 'kas.queryFilter(Transfer)', transferFilter, fromB, toB, + ); for (const tl of transferLogs) { const tp = kaStorage.interface.parseLog({ topics: [...tl.topics], data: tl.data }); if (tp && tp.args.tokenId != null) { @@ -223,7 +263,9 @@ export class EventsMethods extends EVMChainAdapterBase { const registry = this.contracts.contextGraphNameRegistry; if (registry) { const eventFilter = registry.filters.NameClaimed(); - const logs = await registry.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.queryFilterWithFailover( + registry, 'cgNameRegistry.queryFilter(NameClaimed)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, + ); for (const log of logs) { const parsed = registry.interface.parseLog({ topics: [...log.topics], data: log.data }); if (parsed) { @@ -246,7 +288,9 @@ export class EventsMethods extends EVMChainAdapterBase { const cgStorage = this.contracts.contextGraphStorage; if (cgStorage) { const eventFilter = cgStorage.filters.ContextGraphCreated(); - const logs = await cgStorage.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.queryFilterWithFailover( + cgStorage, 'cgStorage.queryFilter(ContextGraphCreated)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, + ); for (const log of logs) { const parsed = cgStorage.interface.parseLog({ topics: [...log.topics], data: log.data }); if (parsed) { @@ -279,7 +323,9 @@ export class EventsMethods extends EVMChainAdapterBase { const profileStorage = this.contracts.profileStorage; if (profileStorage) { const eventFilter = profileStorage.filters.RelayCapabilityUpdated(); - const logs = await profileStorage.queryFilter(eventFilter, filter.fromBlock ?? 0, filter.toBlock); + const logs = await this.queryFilterWithFailover( + profileStorage, 'profileStorage.queryFilter(RelayCapabilityUpdated)', eventFilter, filter.fromBlock ?? 0, filter.toBlock, + ); for (const log of logs) { const parsed = profileStorage.interface.parseLog({ topics: [...log.topics], data: log.data }); if (parsed) { diff --git a/packages/chain/src/evm-adapter-identity.ts b/packages/chain/src/evm-adapter-identity.ts index 8cda8d792..6ce81d63d 100644 --- a/packages/chain/src/evm-adapter-identity.ts +++ b/packages/chain/src/evm-adapter-identity.ts @@ -45,7 +45,9 @@ export class IdentityMethods extends EVMChainAdapterBase { } const onChainIds = await Promise.all( - uniqueAddresses.map((addr) => identityStorage.getIdentityId(addr).then(BigInt)), + uniqueAddresses.map((addr) => this.contractReadWithFailover( + 'identityStorage.getIdentityId', identityStorage, (c) => c.getIdentityId(addr), + ).then(BigInt)), ); const missing: string[] = []; for (let i = 0; i < uniqueAddresses.length; i++) { @@ -103,7 +105,9 @@ export class IdentityMethods extends EVMChainAdapterBase { if (!this.contracts.profileStorage) { throw new Error('getRelayCapable: ProfileStorage not deployed on this Hub.'); } - return Boolean(await this.contracts.profileStorage.getRelayCapable(identityId)); + return Boolean(await this.contractReadWithFailover( + 'profileStorage.getRelayCapable', this.contracts.profileStorage, (c) => c.getRelayCapable(identityId), + )); } async setRelayCapable(relayCapable: boolean): Promise { @@ -144,7 +148,9 @@ export class IdentityMethods extends EVMChainAdapterBase { if (cached !== undefined) return cached; await this.init(); const identityStorage = await this.resolveContract('IdentityStorage'); - const id: bigint = await identityStorage.getIdentityId(checksum); + const id: bigint = await this.contractReadWithFailover( + 'identityStorage.getIdentityId', identityStorage, (c) => c.getIdentityId(checksum), + ); if (id > 0n) { // Only memoise positive hits — a 0n result may flip to non-zero // once the operator registers, and we don't want to lock the @@ -215,7 +221,9 @@ export class IdentityMethods extends EVMChainAdapterBase { if (stakeAmount > 0n && this.contracts.token) { try { const stakingNFT = await this.resolveContract('DKGStakingConvictionNFT'); - const stakingV10Addr: string = await this.contracts.hub.getContractAddress('StakingV10'); + const stakingV10Addr: string = await this.contractReadWithFailover( + 'Hub.getContractAddress(StakingV10)', this.contracts.hub, (c) => c.getContractAddress('StakingV10'), + ); if (stakingV10Addr === ethers.ZeroAddress) { throw new Error('StakingV10 not registered in Hub — V10 staking unavailable'); } diff --git a/packages/chain/src/evm-adapter-publish.ts b/packages/chain/src/evm-adapter-publish.ts index 118638728..56823ce1d 100644 --- a/packages/chain/src/evm-adapter-publish.ts +++ b/packages/chain/src/evm-adapter-publish.ts @@ -61,7 +61,9 @@ export class PublishMethods extends EVMChainAdapterBase { const kaAddress = await ka.getAddress(); if (this.contracts.token && params.tokenAmount > 0n) { - const currentAllowance: bigint = await this.contracts.token.allowance(this.signer.address, kaAddress); + const currentAllowance: bigint = await this.contractReadWithFailover( + 'token.allowance', this.contracts.token, (c) => c.allowance(this.signer.address, kaAddress), + ); if (currentAllowance < params.tokenAmount) { await this.sendContractTransaction( this.contracts.token, @@ -180,12 +182,18 @@ export class PublishMethods extends EVMChainAdapterBase { let onChainPublisher: string | undefined; if (this.contracts.knowledgeAssetStorage) { try { - onChainPublisher = await this.contracts.knowledgeAssetStorage.getLatestMerkleRootPublisher(batchId); + onChainPublisher = await this.contractReadWithFailover( + 'kas.getLatestMerkleRootPublisher', this.contracts.knowledgeAssetStorage, + (c) => c.getLatestMerkleRootPublisher(batchId), + ); } catch { /* not found in V10 storage */ } } if ((!onChainPublisher || onChainPublisher === ethers.ZeroAddress) && this.contracts.knowledgeAssetsStorage) { try { - onChainPublisher = await this.contracts.knowledgeAssetsStorage.getBatchPublisher(batchId); + onChainPublisher = await this.contractReadWithFailover( + 'kasV9.getBatchPublisher', this.contracts.knowledgeAssetsStorage, + (c) => c.getBatchPublisher(batchId), + ); } catch { /* not found in V9 storage */ } } if (!onChainPublisher || onChainPublisher.toLowerCase() !== publisherAddress.toLowerCase()) { @@ -208,7 +216,9 @@ export class PublishMethods extends EVMChainAdapterBase { if (!this.contracts.askStorage) { throw new Error('AskStorage not available'); } - const ask = await this.contracts.askStorage.getStakeWeightedAverageAsk(); + const ask = await this.contractReadWithFailover( + 'askStorage.getStakeWeightedAverageAsk', this.contracts.askStorage, (c) => c.getStakeWeightedAverageAsk(), + ); return (BigInt(ask) * publicByteSize * BigInt(epochs)) / 1024n; } @@ -225,9 +235,13 @@ export class PublishMethods extends EVMChainAdapterBase { if (!this.contracts.knowledgeAssetsStorage) return false; const storage = this.contracts.knowledgeAssetsStorage; - const count = await storage.getPublisherRangesCount(publisherAddress); + const count = await this.contractReadWithFailover( + 'kasV9.getPublisherRangesCount', storage, (c) => c.getPublisherRangesCount(publisherAddress), + ); for (let i = 0; i < Number(count); i++) { - const [startId, endId] = await storage.getPublisherRange(publisherAddress, i); + const [startId, endId] = await this.contractReadWithFailover( + 'kasV9.getPublisherRange', storage, (c) => c.getPublisherRange(publisherAddress, i), + ); if (startId <= startKAId && endId >= endKAId) return true; } return false; @@ -394,7 +408,9 @@ export class PublishMethods extends EVMChainAdapterBase { const kas = this.contracts.knowledgeAssetStorage; if (!kas) return 0n; try { - return BigInt(await kas.getTokenAmount(kaId)); + return BigInt(await this.contractReadWithFailover( + 'kas.getTokenAmount', kas, (c) => c.getTokenAmount(kaId), + )); } catch { // KA not yet in storage (would fail later on-chain anyway) — return 0. return 0n; @@ -412,7 +428,9 @@ export class PublishMethods extends EVMChainAdapterBase { let endEpoch = 0n; if (kas) { try { - const ctx = await kas.getKnowledgeAssetUpdateContext(params.kaId); + const ctx = await this.contractReadWithFailover( + 'kas.getKnowledgeAssetUpdateContext', kas, (c) => c.getKnowledgeAssetUpdateContext(params.kaId), + ); // Tuple shape from `DKGKnowledgeAssets.getKnowledgeAssetUpdateContext`: // (preUpdateMerkleRootCount, minted, byteSize, endEpoch, tokenAmount, isImmutable, preUpdateMerkleLeafCount) currentByteSize = BigInt(ctx[2]); @@ -433,7 +451,9 @@ export class PublishMethods extends EVMChainAdapterBase { } if (this.contracts.chronos) { try { - currentEpoch = BigInt(await this.contracts.chronos.getCurrentEpoch()); + currentEpoch = BigInt(await this.contractReadWithFailover( + 'chronos.getCurrentEpoch', this.contracts.chronos, (c) => c.getCurrentEpoch(), + )); } catch (err) { throw new Error( `Failed to read Chronos currentEpoch for update tokenAmount sizing: ${(err as Error).message}`, @@ -445,7 +465,9 @@ export class PublishMethods extends EVMChainAdapterBase { let growthCost = 0n; if (params.newByteSize > currentByteSize && this.contracts.askStorage) { try { - const ask = BigInt(await this.contracts.askStorage.getStakeWeightedAverageAsk()); + const ask = BigInt(await this.contractReadWithFailover( + 'askStorage.getStakeWeightedAverageAsk', this.contracts.askStorage, (c) => c.getStakeWeightedAverageAsk(), + )); const byteSizeGrowth = params.newByteSize - currentByteSize; if (remainingEpochs > 0n) { growthCost = (ask * byteSizeGrowth * remainingEpochs) / 1024n; @@ -497,7 +519,7 @@ export class PublishMethods extends EVMChainAdapterBase { const kas = this.contracts.knowledgeAssetStorage; const kav10Address = await this.contracts.knowledgeAssetsLifecycle.getAddress(); - const evmChainId = BigInt((await this.provider.getNetwork()).chainId); + const evmChainId = BigInt((await this.readWithFailover('getNetwork (chainId)', (p) => p.getNetwork())).chainId); const currentTokenAmount = await this.resolveCurrentTokenAmount(params.kaId); @@ -517,7 +539,10 @@ export class PublishMethods extends EVMChainAdapterBase { if (this.contracts.contextGraphStorage) { try { contextGraphId = BigInt( - await this.contracts.contextGraphStorage.kaToContextGraph(params.kaId), + await this.contractReadWithFailover( + 'cgStorage.kaToContextGraph', this.contracts.contextGraphStorage, + (c) => c.kaToContextGraph(params.kaId), + ), ); } catch { /* use 0 */ } } @@ -525,7 +550,9 @@ export class PublishMethods extends EVMChainAdapterBase { let preUpdateMerkleRootCount = 0n; if (kas) { try { - const roots: unknown[] = await kas.getMerkleRoots(params.kaId); + const roots: unknown[] = await this.contractReadWithFailover( + 'kas.getMerkleRoots', kas, (c) => c.getMerkleRoots(params.kaId), + ); preUpdateMerkleRootCount = BigInt(roots.length); } catch { /* use 0 */ } } @@ -584,7 +611,10 @@ export class PublishMethods extends EVMChainAdapterBase { if (this.contracts.contextGraphStorage) { try { contextGraphId = BigInt( - await this.contracts.contextGraphStorage.kaToContextGraph(params.kaId), + await this.contractReadWithFailover( + 'cgStorage.kaToContextGraph', this.contracts.contextGraphStorage, + (c) => c.kaToContextGraph(params.kaId), + ), ); } catch { /* use 0 */ } } @@ -592,7 +622,9 @@ export class PublishMethods extends EVMChainAdapterBase { let preUpdateMerkleRootCount = 0n; if (kas) { try { - const roots: unknown[] = await kas.getMerkleRoots(params.kaId); + const roots: unknown[] = await this.contractReadWithFailover( + 'kas.getMerkleRoots', kas, (c) => c.getMerkleRoots(params.kaId), + ); preUpdateMerkleRootCount = BigInt(roots.length); } catch { /* use 0 */ } } @@ -627,7 +659,9 @@ export class PublishMethods extends EVMChainAdapterBase { const kas = this.contracts.knowledgeAssetStorage; if (kas) { try { - const onChainPublisher: string = await kas.getLatestMerkleRootPublisher(params.kaId); + const onChainPublisher: string = await this.contractReadWithFailover( + 'kas.getLatestMerkleRootPublisher', kas, (c) => c.getLatestMerkleRootPublisher(params.kaId), + ); if (onChainPublisher && onChainPublisher !== ethers.ZeroAddress) { signer = this.signerPool.find( (s) => s.address.toLowerCase() === onChainPublisher.toLowerCase(), @@ -648,7 +682,7 @@ export class PublishMethods extends EVMChainAdapterBase { const ka = this.contracts.knowledgeAssetsLifecycle.connect(signer) as Contract; const kav10Address = await this.contracts.knowledgeAssetsLifecycle.getAddress(); - const evmChainId = (await this.provider.getNetwork()).chainId; + const evmChainId = (await this.readWithFailover('getNetwork (chainId)', (p) => p.getNetwork())).chainId; const identityId = params.publisherNodeIdentityId ?? await this.getIdentityId(); @@ -689,7 +723,9 @@ export class PublishMethods extends EVMChainAdapterBase { let contextGraphId = 0n; if (contextGraphStorage) { try { - contextGraphId = BigInt(await contextGraphStorage.kaToContextGraph(params.kaId)); + contextGraphId = BigInt(await this.contractReadWithFailover( + 'cgStorage.kaToContextGraph', contextGraphStorage, (c) => c.kaToContextGraph(params.kaId), + )); } catch { /* use 0 */ } } @@ -697,7 +733,9 @@ export class PublishMethods extends EVMChainAdapterBase { let preUpdateMerkleRootCount = 0n; if (kas) { try { - const roots: unknown[] = await kas.getMerkleRoots(params.kaId); + const roots: unknown[] = await this.contractReadWithFailover( + 'kas.getMerkleRoots', kas, (c) => c.getMerkleRoots(params.kaId), + ); preUpdateMerkleRootCount = BigInt(roots.length); } catch { /* use 0 */ } } diff --git a/packages/chain/src/evm-adapter-random-sampling.ts b/packages/chain/src/evm-adapter-random-sampling.ts index 36820dd72..8cd6814cf 100644 --- a/packages/chain/src/evm-adapter-random-sampling.ts +++ b/packages/chain/src/evm-adapter-random-sampling.ts @@ -75,7 +75,9 @@ export class RandomSamplingMethods extends EVMChainAdapterBase { await this.init(); const identityStorage = await this.getIdentityStorage(); - const identityId: bigint = await identityStorage.getIdentityId(this.signer.address); + const identityId: bigint = await this.contractReadWithFailover( + 'identityStorage.getIdentityId', identityStorage, (c) => c.getIdentityId(this.signer.address), + ); return this.withHubStaleRetry(async () => { const { rs, rss } = await this.getRandomSampling(); @@ -126,7 +128,9 @@ export class RandomSamplingMethods extends EVMChainAdapterBase { ); } - const challengeRaw = await rss.getNodeChallenge(identityId); + const challengeRaw = await this.contractReadWithFailover( + 'rss.getNodeChallenge', rss, (c) => c.getNodeChallenge(identityId), + ); const challenge = this.toNodeChallenge(challengeRaw); if (!challenge) { throw new Error( @@ -282,7 +286,9 @@ export class RandomSamplingMethods extends EVMChainAdapterBase { async getNodeChallenge(identityId: bigint): Promise { await this.init(); const { rss } = await this.getRandomSampling(); - const raw = await rss.getNodeChallenge(identityId); + const raw = await this.contractReadWithFailover( + 'rss.getNodeChallenge', rss, (c) => c.getNodeChallenge(identityId), + ); return this.toNodeChallenge(raw); } @@ -293,7 +299,9 @@ export class RandomSamplingMethods extends EVMChainAdapterBase { ): Promise { await this.init(); const { rss } = await this.getRandomSampling(); - const score: bigint = await rss.getNodeEpochProofPeriodScore(identityId, epoch, periodStartBlock); + const score: bigint = await this.contractReadWithFailover( + 'rss.getNodeEpochProofPeriodScore', rss, (c) => c.getNodeEpochProofPeriodScore(identityId, epoch, periodStartBlock), + ); return BigInt(score); } } diff --git a/packages/chain/src/evm-adapter-rpc.ts b/packages/chain/src/evm-adapter-rpc.ts index ccabe3eff..3132f56b5 100644 --- a/packages/chain/src/evm-adapter-rpc.ts +++ b/packages/chain/src/evm-adapter-rpc.ts @@ -69,16 +69,34 @@ export function resolveRpcUrls(rpcUrl: string, rpcUrls?: string[]): string[] { } /** - * Build a `FetchRequest` whose retry loop gives up after - * `RPC_REQUEST_MAX_RETRIES` retries. A bare string URL would use ethers' - * unbounded default; we install a bounded `retryFunc` instead. The bound is - * evaluated from `attempt` (per-request), so every request — no matter how - * long the node has been running — gets the same fresh retry budget. + * Build a `FetchRequest` whose retry loop gives up after `maxRetries` retries. + * A bare string URL would use ethers' unbounded default; we install a bounded + * `retryFunc` instead. The bound is evaluated from `attempt` (per-request), so + * every request — no matter how long the node has been running — gets the same + * fresh retry budget. + * + * `maxRetries` is chosen by the adapter from the configured endpoint count + * (`evm-adapter-base.ts` constructor): + * - **Multi-RPC (≥2 endpoints): `0`** — the FIRST retryable failure (429/5xx/ + * network) propagates immediately so the adapter's explicit per-provider + * read/write failover loops advance to the NEXT endpoint at once, instead of + * burning ~7.5s of same-endpoint backoff on an endpoint we already know is + * failing. With ≥2 endpoints the failover loop IS the resilience. + * - **Single-RPC: `RPC_REQUEST_MAX_RETRIES` (5)** — unchanged. There is + * nowhere to fail over to, so the bounded same-endpoint retry is the only + * resilience and rides out a transient blip while still surfacing a + * perpetual error as a bounded `RPC_ENDPOINTS_EXHAUSTED`→503 (#894). + * + * `maxRetries = 0` makes `retryFunc` return `false` on attempt 0 with NO sleep, + * so the failure surfaces synchronously to the failover loop. */ -export function boundedRetryFetchRequest(url: string): FetchRequest { +export function boundedRetryFetchRequest( + url: string, + maxRetries: number = RPC_REQUEST_MAX_RETRIES, +): FetchRequest { const req = new FetchRequest(url); req.retryFunc = async (_req, _response, attempt) => { - if (attempt >= RPC_REQUEST_MAX_RETRIES) return false; + if (attempt >= maxRetries) return false; await sleep(Math.min(500 * (attempt + 1), RPC_REQUEST_RETRY_BACKOFF_CAP_MS)); return true; }; diff --git a/packages/chain/src/evm-adapter-storage-reads.ts b/packages/chain/src/evm-adapter-storage-reads.ts index e8cd77fe0..d4135af37 100644 --- a/packages/chain/src/evm-adapter-storage-reads.ts +++ b/packages/chain/src/evm-adapter-storage-reads.ts @@ -31,42 +31,54 @@ export class StorageReadMethods extends EVMChainAdapterBase { async getLatestMerkleRoot(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const rootHex: string = await kas.getLatestMerkleRoot(kaId); + const rootHex: string = await this.contractReadWithFailover( + 'kas.getLatestMerkleRoot', kas, (c) => c.getLatestMerkleRoot(kaId), + ); return ethers.getBytes(rootHex); } async getMerkleLeafCount(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const count: bigint = BigInt(await kas.getMerkleLeafCount(kaId)); + const count: bigint = BigInt(await this.contractReadWithFailover( + 'kas.getMerkleLeafCount', kas, (c) => c.getMerkleLeafCount(kaId), + )); return Number(count); } async getCatalogRoot(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const rootHex: string = await kas.getCatalogRoot(kaId); + const rootHex: string = await this.contractReadWithFailover( + 'kas.getCatalogRoot', kas, (c) => c.getCatalogRoot(kaId), + ); return ethers.getBytes(rootHex); } async getCatalogLeafCount(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const count: bigint = BigInt(await kas.getCatalogLeafCount(kaId)); + const count: bigint = BigInt(await this.contractReadWithFailover( + 'kas.getCatalogLeafCount', kas, (c) => c.getCatalogLeafCount(kaId), + )); return Number(count); } async getLatestMerkleRootPublisher(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const publisher: string = await kas.getLatestMerkleRootPublisher(kaId); + const publisher: string = await this.contractReadWithFailover( + 'kas.getLatestMerkleRootPublisher', kas, (c) => c.getLatestMerkleRootPublisher(kaId), + ); return publisher; } async getLatestMerkleRootAuthor(kaId: bigint): Promise { await this.init(); const kas = this.requireKCStorage(); - const author: string = await kas.getLatestMerkleRootAuthor(kaId); + const author: string = await this.contractReadWithFailover( + 'kas.getLatestMerkleRootAuthor', kas, (c) => c.getLatestMerkleRootAuthor(kaId), + ); return author; } } diff --git a/packages/chain/src/rpc-failover-log.ts b/packages/chain/src/rpc-failover-log.ts index f3ac854c5..9e4f0464c 100644 --- a/packages/chain/src/rpc-failover-log.ts +++ b/packages/chain/src/rpc-failover-log.ts @@ -1,9 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 /** - * Observability for multi-RPC WRITE failover. Records + logs the per-provider - * failovers in the EVM adapter's write loops (and the CLI stack in - * `cli-rpc.ts`), under three invariants the callers depend on: + * Observability for multi-RPC failover. Records + logs the per-provider + * failovers in BOTH the EVM adapter's write loops AND its read-failover loop + * (`readWithFailover` / `contractReadWithFailover` / the V10 populate loop), + * plus the CLI stack in `cli-rpc.ts`, under three invariants the callers + * depend on: * * 1. Dedup-gated logging — at most one line per `host|errorClass` per * `DEFAULT_DEDUP_WINDOW_MS` (5 min) with a suppressed-count rollup, so a @@ -17,8 +19,10 @@ * * Counters are a PROCESS-WIDE singleton: the daemon builds one adapter per * agent + per publisher wallet, and `/api/status` reads the aggregate via a - * direct getter import. Scope is WRITE failover only — read failover is - * internal to the ethers FallbackProvider. + * direct getter import. Scope covers BOTH read and write failover — reads route + * through the adapter's explicit `readWithFailover` loop (the ethers + * `FallbackProvider` was removed; see `evm-adapter-base.ts`), so both halves + * funnel their per-hop failovers and exhaustions through this module. */ import { errorCode, errorMessage, errorStatus } from './evm-adapter-errors.js'; diff --git a/packages/chain/test/bounded-retry-fetch.test.ts b/packages/chain/test/bounded-retry-fetch.test.ts new file mode 100644 index 000000000..b6205781c --- /dev/null +++ b/packages/chain/test/bounded-retry-fetch.test.ts @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * T2 (immediate-RPC-failover): the per-endpoint retry budget contract. + * + * `boundedRetryFetchRequest` installs the `FetchRequest.retryFunc` that decides + * whether ethers retries a 429/5xx on the SAME endpoint. The whole + * immediate-failover change hinges on making that count configurable: + * - multi-RPC => maxRetries = 0 => give up on the first failure so the + * adapter's own failover advances to the next endpoint AT ONCE; + * - single-RPC => maxRetries = RPC_REQUEST_MAX_RETRIES (5) => bounded retry + * preserved (there is nowhere to fail over to — the #894 resilience). + * + * The existing ~170 adapter tests inject bare provider mocks and never touch + * this retryFunc at all, so the retry-COUNT contract is otherwise unproven. + * This drives the real `FetchRequest.retryFunc` directly — no network, no + * native deps — with fake timers so the backoff `sleep` is instant. + * + * The first block locks the CURRENT (single-arg) behaviour and is GREEN today. + * The `describe.skip` block specifies the parametrised `(url, maxRetries)` API + * ChainEngineer is adding — UN-SKIP it the moment that signature lands. Until + * then it is staged (not red) so the shared chain suite stays green during S1. + */ +import { describe, it, expect, vi, afterEach } from 'vitest'; +import type { FetchResponse } from 'ethers'; +import { boundedRetryFetchRequest } from '../src/evm-adapter-rpc.js'; + +const URL = 'https://rpc.example/key'; +// retryFunc ignores req/response; a bare stub satisfies the signature. +const RESP = undefined as unknown as FetchResponse; + +async function decideWithFakeTimers( + fn: NonNullable['retryFunc']>, + req: ReturnType, + attempt: number, +): Promise { + // The retry DECISION resolves only after the internal backoff sleep; drive + // fake timers so a "retry=true" verdict returns without real wall time. + const pending = fn(req, RESP, attempt); + await vi.advanceTimersByTimeAsync(2_000); + return pending; +} + +describe('boundedRetryFetchRequest — current single-arg retry budget (#894)', () => { + afterEach(() => vi.useRealTimers()); + + it('retries through attempt 4 and gives up at attempt 5 (RPC_REQUEST_MAX_RETRIES)', async () => { + vi.useFakeTimers(); + const req = boundedRetryFetchRequest(URL); + const retryFunc = req.retryFunc!; + expect(typeof retryFunc).toBe('function'); + + // attempts 0..4 → retry (true), after a bounded backoff sleep. + expect(await decideWithFakeTimers(retryFunc, req, 0)).toBe(true); + expect(await decideWithFakeTimers(retryFunc, req, 4)).toBe(true); + + // attempt 5 → give up (false), and WITHOUT sleeping (no timer advance). + expect(await retryFunc(req, RESP, 5)).toBe(false); + // ...still false past the cap. + expect(await retryFunc(req, RESP, 9)).toBe(false); + }); + + it('the give-up verdict at the cap is synchronous (no backoff sleep before failing over)', async () => { + // A single-RPC node must not add a stray sleep to its terminal give-up; + // the false at the cap resolves with no pending timer. + vi.useFakeTimers(); + const req = boundedRetryFetchRequest(URL); + const verdict = req.retryFunc!(req, RESP, 5); + // No timer advance — if the impl slept here this would still be pending. + await expect(verdict).resolves.toBe(false); + }); +}); + +// The parametrised `(url, maxRetries)` contract — the core of immediate failover: +// maxRetries=0 fails over on the FIRST failure (multi-RPC), maxRetries=5 keeps +// the #894 budget (single-RPC). ChainEngineer landed the 2-arg signature +// (default = RPC_REQUEST_MAX_RETRIES, so single-arg callers are unchanged), so +// this runs live. +describe('boundedRetryFetchRequest(url, maxRetries) — parametrised failover budget', () => { + afterEach(() => vi.useRealTimers()); + + it('maxRetries=0 → gives up immediately at attempt 0 (multi-RPC: fail over at once, no backoff)', async () => { + vi.useFakeTimers(); + const req = boundedRetryFetchRequest(URL, 0); + // attempt 0 must be false AND synchronous — no per-endpoint backoff sleep. + // (If the impl slept here, this would stay pending with no timer advanced.) + await expect(req.retryFunc!(req, RESP, 0)).resolves.toBe(false); + }); + + it('maxRetries=5 → preserves the #894 budget (true through 4, false at 5)', async () => { + vi.useFakeTimers(); + const req = boundedRetryFetchRequest(URL, 5); + expect(await decideWithFakeTimers(req.retryFunc!, req, 0)).toBe(true); + expect(await decideWithFakeTimers(req.retryFunc!, req, 4)).toBe(true); + expect(await req.retryFunc!(req, RESP, 5)).toBe(false); + }); + + it('default maxRetries (no 2nd arg) equals the single-RPC #894 budget of 5', async () => { + vi.useFakeTimers(); + const req = boundedRetryFetchRequest(URL); // default = RPC_REQUEST_MAX_RETRIES + expect(await decideWithFakeTimers(req.retryFunc!, req, 4)).toBe(true); + expect(await req.retryFunc!(req, RESP, 5)).toBe(false); + }); +}); diff --git a/packages/chain/test/connectable.ts b/packages/chain/test/connectable.ts new file mode 100644 index 000000000..ebed7dc9b --- /dev/null +++ b/packages/chain/test/connectable.ts @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Shared test helper (NOT a test file — no `.test.` suffix, so vitest skips it). + * + * The round-2 review split `withRunner` into `rebindContract` (contract.connect(p)) + * and `rebindSigner` (signer.connect(provider)) and removed the no-`.connect` + * test-double fallback, so production handles MUST be `.connect`-able (they are — + * real ethers `Contract`s). Unit tests, however, mock `this.contracts.*` as plain + * method objects. `connectable` makes such a mock satisfy that boundary with a + * single-provider NO-OP self-rebind (`connect` returns the mock itself), so the + * REAL `rebindContract`/`rebindSigner` runs against it — a genuine rebind + * regression is still caught. Idempotent: a mock that already carries a MEANINGFUL + * `.connect` (e.g. `token.connect(signer) → tokenWithSigner`) is left untouched. + */ +export function connectable(mock: T): T { + const m = mock as { connect?: unknown }; + if (m && typeof m.connect !== 'function') m.connect = () => mock; + return mock; +} diff --git a/packages/chain/test/evm-adapter.unit.test.ts b/packages/chain/test/evm-adapter.unit.test.ts index 235b954c6..312efde10 100644 --- a/packages/chain/test/evm-adapter.unit.test.ts +++ b/packages/chain/test/evm-adapter.unit.test.ts @@ -27,6 +27,7 @@ import { } from '../src/chain-adapter.js'; import { _resetRpcFailoverStatsForTest } from '../src/rpc-failover-log.js'; import { isChainRpcTransportError } from '../src/chain-rpc-transport-error.js'; +import { connectable } from './connectable.js'; // Isolate the process-wide RPC failover stats + dedup window before EVERY test // so a failover/exhaustion warning emitted by one test can't suppress (via the @@ -36,6 +37,7 @@ beforeEach(() => { _resetRpcFailoverStatsForTest(); }); + const DEPLOYER_PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; const OTHER_PK = '0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b63b91100'; const ADMIN_PK = '0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a'; @@ -204,7 +206,6 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { const a = new EVMChainAdapter(minimalConfig()); expect(a.getProvider()).toBeDefined(); expect(typeof a.getProvider().getBlockNumber).toBe('function'); - expect(a.getReadProvider()).toBeDefined(); }); it('issues un-batched JSON-RPC requests (batchMaxCount=1) so a rate-limited read rejects on its own awaited promise — issue #939', async () => { @@ -266,10 +267,10 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { publishAuthorityAccountId: 0n, })); (a as any).init = async () => undefined; - (a as any).contracts.contextGraphStorage = { + (a as any).contracts.contextGraphStorage = connectable({ getAccessPolicy, getContextGraph, - }; + }); await expect(a.getContextGraphAccessPolicy(6n)).resolves.toBe(1); expect(getAccessPolicy.calls.at(-1)).toEqual([6n]); @@ -279,7 +280,7 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { it('parses accessPolicy from tuple fallback results', async () => { const a = new EVMChainAdapter(minimalConfig()); (a as any).init = async () => undefined; - (a as any).contracts.contextGraphStorage = { + (a as any).contracts.contextGraphStorage = connectable({ getAccessPolicy: recorder(async () => { throw new Error('selector unavailable'); }), @@ -294,7 +295,7 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { ethers.ZeroAddress, 0n, ]), - }; + }); await expect(a.getContextGraphAccessPolicy(7n)).resolves.toBe(0); }); @@ -303,11 +304,11 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { const a = new EVMChainAdapter(minimalConfig()); const rpcError = new Error('rpc unavailable'); (a as any).init = async () => undefined; - (a as any).contracts.contextGraphStorage = { + (a as any).contracts.contextGraphStorage = connectable({ isContextGraphActive: recorder(async () => { throw rpcError; }), - }; + }); await expect(a.isContextGraphActiveOnChain(8n)).rejects.toThrow('rpc unavailable'); }); @@ -316,7 +317,7 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { const a = new EVMChainAdapter(minimalConfig()); const isContextGraphActive = recorder(async (_id: bigint) => false); (a as any).init = async () => undefined; - (a as any).contracts.contextGraphStorage = { isContextGraphActive }; + (a as any).contracts.contextGraphStorage = connectable({ isContextGraphActive }); await expect(a.isContextGraphActiveOnChain(9n)).resolves.toBe(false); expect(isContextGraphActive.calls.at(-1)).toEqual([9n]); @@ -339,11 +340,11 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { if (name === 'Token') throw new Error('Hub.Token should not be resolved when tokenAddress is configured'); return contractAddress; }); - (a as any).contracts.hub = { + (a as any).contracts.hub = connectable({ getContractAddress, getAssetStorageAddress: recorder(async () => assetStorageAddress), on: recorder(async () => undefined), - }; + }); await (a as any).init(); @@ -1027,12 +1028,12 @@ describe('EVMChainAdapter constructor / getters (no init)', () => { const a = new EVMChainAdapter(minimalConfig({ additionalKeys: [OTHER_PK] })); const [firstAddress, secondAddress] = a.getSignerAddresses(); (a as any).init = async () => undefined; - (a as any).contracts.contextGraphs = { + (a as any).contracts.contextGraphs = connectable({ isAuthorizedPublisher: recorder(async () => { await Promise.resolve(); return true; }), - }; + }); const [firstReserved, secondReserved] = await Promise.all([ a.getAuthorizedPublisherAddress(1n), @@ -1625,9 +1626,13 @@ describe('PR3 / RC11 — publish-preflight TTL cache', () => { it('getEvmChainId issues exactly one provider.getNetwork call across repeat reads', async () => { const a = new EVMChainAdapter(minimalConfig()); const getNetwork = recorder(async () => ({ chainId: 31337n })); - (a as unknown as { provider: { getNetwork: () => Promise<{ chainId: bigint }> } }).provider = { + // R1: getEvmChainId now reads via readWithFailover over this.providers[] + // (was this.provider.getNetwork). Mock this.providers[0]; the TTL-cache / + // dedup / no-cache-on-failure behaviour is unchanged (the cache wraps + // readWithFailover), so the assertions below are preserved verbatim. + (a as unknown as { providers: Array<{ getNetwork: () => Promise<{ chainId: bigint }> }> }).providers = [{ getNetwork: getNetwork as unknown as () => Promise<{ chainId: bigint }>, - }; + }]; expect(await a.getEvmChainId()).toBe(31337n); expect(await a.getEvmChainId()).toBe(31337n); @@ -1653,9 +1658,9 @@ describe('PR3 / RC11 — publish-preflight TTL cache', () => { const minimumRequiredSignatures = recorder(async () => 3n); (a as unknown as { init: () => Promise }).init = async () => undefined; (a as unknown as { contracts: { parametersStorage: { minimumRequiredSignatures: () => Promise } } }).contracts = { - parametersStorage: { + parametersStorage: connectable({ minimumRequiredSignatures: minimumRequiredSignatures as unknown as () => Promise, - }, + }), }; expect(await a.getMinimumRequiredSignatures()).toBe(3); @@ -1669,9 +1674,13 @@ describe('PR3 / RC11 — publish-preflight TTL cache', () => { const a = new EVMChainAdapter(minimalConfig()); let returned = 31337n; const getNetwork = recorder(async () => ({ chainId: returned })); - (a as unknown as { provider: { getNetwork: () => Promise<{ chainId: bigint }> } }).provider = { + // R1: getEvmChainId now reads via readWithFailover over this.providers[] + // (was this.provider.getNetwork). Mock this.providers[0]; the TTL-cache / + // dedup / no-cache-on-failure behaviour is unchanged (the cache wraps + // readWithFailover), so the assertions below are preserved verbatim. + (a as unknown as { providers: Array<{ getNetwork: () => Promise<{ chainId: bigint }> }> }).providers = [{ getNetwork: getNetwork as unknown as () => Promise<{ chainId: bigint }>, - }; + }]; expect(await a.getEvmChainId()).toBe(31337n); expect(getNetwork.calls).toHaveLength(1); @@ -1689,9 +1698,13 @@ describe('PR3 / RC11 — publish-preflight TTL cache', () => { it('invalidatePublishPreflightCache forces a fresh read on next call', async () => { const a = new EVMChainAdapter(minimalConfig()); const getNetwork = recorder(async () => ({ chainId: 31337n })); - (a as unknown as { provider: { getNetwork: () => Promise<{ chainId: bigint }> } }).provider = { + // R1: getEvmChainId now reads via readWithFailover over this.providers[] + // (was this.provider.getNetwork). Mock this.providers[0]; the TTL-cache / + // dedup / no-cache-on-failure behaviour is unchanged (the cache wraps + // readWithFailover), so the assertions below are preserved verbatim. + (a as unknown as { providers: Array<{ getNetwork: () => Promise<{ chainId: bigint }> }> }).providers = [{ getNetwork: getNetwork as unknown as () => Promise<{ chainId: bigint }>, - }; + }]; await a.getEvmChainId(); await a.getEvmChainId(); @@ -1709,9 +1722,13 @@ describe('PR3 / RC11 — publish-preflight TTL cache', () => { if (attempts === 1) throw new Error('rate limited'); return { chainId: 31337n }; }); - (a as unknown as { provider: { getNetwork: () => Promise<{ chainId: bigint }> } }).provider = { + // R1: getEvmChainId now reads via readWithFailover over this.providers[] + // (was this.provider.getNetwork). Mock this.providers[0]; the TTL-cache / + // dedup / no-cache-on-failure behaviour is unchanged (the cache wraps + // readWithFailover), so the assertions below are preserved verbatim. + (a as unknown as { providers: Array<{ getNetwork: () => Promise<{ chainId: bigint }> }> }).providers = [{ getNetwork: getNetwork as unknown as () => Promise<{ chainId: bigint }>, - }; + }]; await expect(a.getEvmChainId()).rejects.toThrow('rate limited'); // Second call should retry — failure was not memoised. @@ -2015,10 +2032,12 @@ const V10_KA_ADDRESS = '0x' + 'aa'.repeat(20); // `allowance(...)` and connects it to the signer. `approve` itself goes through // the (stubbed) `sendContractTransaction`, so the recorder just needs to exist. function makeStubToken(allowance: bigint) { - const tokenWithSigner = { + // tokenWithSigner is read via contractReadWithFailover after token→signer + // rebind, so it too must be .connect-able (self no-op rebind). + const tokenWithSigner = connectable({ allowance: recorder(async (..._a: unknown[]) => allowance), approve: recorder(() => undefined), - }; + }); const tokenRoot = { connect: recorder((..._a: unknown[]) => tokenWithSigner), }; @@ -2567,36 +2586,43 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign (a as any).provider.getBalance = recorder(async (addr: string) => nativeByAddr.get(String(addr).toLowerCase()) ?? ABUNDANT_WEI); - const tokenWithSigner = { + // R1: readTracBalance now reads via contractReadWithFailover → withRunner + // does `token.connect(p).balanceOf(addr)`, so the CONNECTED contract (what + // token.connect returns) must expose balanceOf — not just the top-level + // token. (Native getBalance still works: the helper mutates + // this.provider.getBalance and this.provider === this.providers[0], so the + // shared object is what readWithFailover reads.) + const balanceOf = recorder(async (addr: string) => + tracByAddr.get(String(addr).toLowerCase()) ?? ABUNDANT_WEI); + const tokenWithSigner = connectable({ allowance: recorder(async (owner: string, _spender: string) => { return allowanceByOwner.get(owner.toLowerCase()) ?? 0n; }), approve: recorder(() => undefined), - }; + balanceOf, + }); (a as any).contracts.token = { connect: recorder(() => tokenWithSigner), - // Read path (`readTracBalance`) calls this.contracts.token.balanceOf. - balanceOf: recorder(async (addr: string) => - tracByAddr.get(String(addr).toLowerCase()) ?? ABUNDANT_WEI), + balanceOf, // kept for any direct (non-connected) top-level reader }; const populateSpy = recorder(async () => ({ to: PARITY_KA_ADDRESS, data: '0xdeadbeef', })); - const kavContract = { + const kavContract = connectable({ getAddress: recorder(async () => PARITY_KA_ADDRESS), publish: { populateTransaction: populateSpy }, update: { populateTransaction: populateSpy }, - }; + }); (a as any).contracts.knowledgeAssetsLifecycle = { connect: recorder(() => kavContract), getAddress: recorder(async () => PARITY_KA_ADDRESS), }; - (a as any).contracts.contextGraphs = { + (a as any).contracts.contextGraphs = connectable({ isAuthorizedPublisher: recorder(async () => true), - }; + }); const sendSpy = recorder(async (..._a: unknown[]) => ({} as unknown)); (a as any).sendContractTransaction = sendSpy; @@ -2684,7 +2710,11 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign expect(approveSender).toBe(walletB); expect(signSpy.calls).toHaveLength(1); - expect(signSpy.calls[0][0]).toBe(walletB); + // R1/OBS-1: populateAndSignAcrossProviders signs on the per-provider runner + // (signer.connect(providers[i])) — same key/ADDRESS as walletB, new object. + // Assert the signer ADDRESS, not object identity (#870 "publish signed by + // walletB, no mid-flight rotation" invariant is preserved). + expect((signSpy.calls[0][0] as ethers.Wallet).address).toBe(walletB.address); }); it('publish path: when publisherAddress is omitted, round-robin signer is also the approve signer (no mid-flight rotation)', async () => { @@ -2710,7 +2740,8 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign expect(approveMethod).toBe('approve'); expect(approveArgs).toEqual([PARITY_KA_ADDRESS, 1n]); expect(approveSender).toBe(walletA); - expect(signSpy.calls[0][0]).toBe(walletA); + // R1/OBS-1: signer reconnected per-provider — assert ADDRESS not identity. + expect((signSpy.calls[0][0] as ethers.Wallet).address).toBe(walletA.address); }); it('update path: approve fires from the on-chain publisher wallet, NOT a round-robin pick from the pool', async () => { @@ -2731,21 +2762,23 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign // Injected DI seams the update path needs in addition to the publish ones. const kaId = 42n; - (a as any).contracts.knowledgeAssetStorage = { + (a as any).contracts.knowledgeAssetStorage = connectable({ getLatestMerkleRootPublisher: recorder(async () => walletB.address), getMerkleRoots: recorder(async () => []), - }; - (a as any).contracts.contextGraphStorage = { + }); + (a as any).contracts.contextGraphStorage = connectable({ kaToContextGraph: recorder(async () => 0n), - }; + }); (a as any).resolveCurrentTokenAmount = recorder(async () => 0n); (a as any).computeUpdateNewTokenAmount = recorder(async () => 0n); (a as any).getIdentityId = recorder(async () => 0n); - // `provider.getNetwork()` is called for chainId; stub it so the update - // path doesn't try to hit the placeholder RPC. - (a as any).provider = { - getNetwork: recorder(async () => ({ chainId: 31337n })), - }; + // `provider.getNetwork()` is called for chainId; stub it so the update path + // doesn't hit the placeholder RPC. R1: getEvmChainId reads via + // readWithFailover over this.providers[0] (=== this.provider), so MUTATE + // getNetwork on the shared object — REPLACING this.provider would orphan + // this.providers[0] (and the helper's getBalance mock) and the read would + // dial the dead RPC instead. + (a as any).provider.getNetwork = recorder(async () => ({ chainId: 31337n })); const updateParams: any = { kaId, @@ -2789,7 +2822,11 @@ describe('createKnowledgeAssets / updateKnowledgeCollectionV10 — approval sign expect(approveLabel).toBe('approve V10 update TRAC'); expect(signSpy.calls).toHaveLength(1); - expect(signSpy.calls[0][0]).toBe(walletB); + // R1/OBS-1: populateAndSignAcrossProviders signs on the per-provider runner + // (signer.connect(providers[i])) — same key/ADDRESS as walletB, new object. + // Assert the signer ADDRESS, not object identity (#870 "publish signed by + // walletB, no mid-flight rotation" invariant is preserved). + expect((signSpy.calls[0][0] as ethers.Wallet).address).toBe(walletB.address); }); // ----------------------------------------------------------------------------- @@ -2879,7 +2916,7 @@ describe('createKnowledgeAssets — funding-aware wallet selection', () => { it('still throws "no authorized publisher" when no wallet is authorized (unchanged)', async () => { const { a } = makeMultiWalletV10Adapter(makeAllowanceByOwner()); - (a as any).contracts.contextGraphs = { isAuthorizedPublisher: recorder(async () => false) }; + (a as any).contracts.contextGraphs = connectable({ isAuthorizedPublisher: recorder(async () => false) }); await expect((a as any).nextAuthorizedSigner(CG)).rejects.toThrow(/No authorized publisher wallet/); }); @@ -3218,10 +3255,10 @@ describe('isTooLowAllowanceError (#888)', () => { function makeV10AdapterWithAllowanceSequence(values: bigint[]) { const a = new EVMChainAdapter(minimalConfig()); let i = 0; - const tokenWithSigner = { + const tokenWithSigner = connectable({ allowance: recorder(async () => values[Math.min(i++, values.length - 1)]), approve: recorder(() => undefined), - }; + }); const tokenRoot = { connect: recorder(() => tokenWithSigner) }; (a as any).contracts.token = tokenRoot; const sendSpy = recorder(async (..._a: unknown[]) => ({} as unknown)); @@ -3308,7 +3345,7 @@ describe('ensureV10ApproveTrac — forced re-approve + visibility poll (#888)', try { const a = new EVMChainAdapter(minimalConfig()); // allowance() returns a promise that never settles — a hung RPC read. - const token = { allowance: recorder(() => new Promise(() => {})) }; + const token = connectable({ allowance: recorder(() => new Promise(() => {})) }); const done = recorder(() => undefined); const poll = (a as any) .confirmAllowanceVisible(token, '0xowner', V10_KA_ADDRESS, 1n) @@ -3373,7 +3410,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco const populate = recorder(async () => ( populateQueue.shift() ?? (async () => ({ to: V10_KA_ADDRESS, data: '0xabcd' })) )()); - const kaContract = { [method]: { populateTransaction: populate } }; + const kaContract = connectable({ [method]: { populateTransaction: populate } }); const result = await (a as any).populateAndSignV10WithAllowanceRecovery( signer, @@ -3405,7 +3442,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco const populate = recorder(async () => ( populateQueue.shift() ?? (async () => ({ to: V10_KA_ADDRESS, data: '0xabcd' })) )()); - const kaContract = { publish: { populateTransaction: populate } }; + const kaContract = connectable({ publish: { populateTransaction: populate } }); const result = await (a as any).populateAndSignV10WithAllowanceRecovery( signer, @@ -3427,7 +3464,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco it('propagates a SECOND consecutive TooLowAllowance (recovery is one-shot, no infinite loop)', async () => { const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); const populate = recorder(async () => { throw tooLowAllowanceRevert(); }); - const kaContract = { publish: { populateTransaction: populate } }; + const kaContract = connectable({ publish: { populateTransaction: populate } }); await expect( (a as any).populateAndSignV10WithAllowanceRecovery( @@ -3440,10 +3477,65 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco expect(signSpy.calls).toEqual([]); }); + it('C6 (G-OBS1b): forces EXACTLY ONE approve across a provider-failover × TooLowAllowance interleaving (shared OUTER latch, not per-provider)', async () => { + // The case a PER-PROVIDER latch would double-fire: the inner per-provider + // populate loop fails over on provider #1's RETRYABLE 429, then provider #2 + // reverts TooLowAllowance (non-retryable → propagates to the OUTER recovery), + // which fires ONE forced approve and re-runs the WHOLE inner loop (now + // succeeds). The forcedReapprove latch lives at the recovery OUTER scope, so + // it fires exactly once no matter how many endpoints the inner loop tried — + // immediate failover introduces ZERO extra approve txs (INV-1 + G-OBS1b). + const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); + (a as any).providers = [{}, {}]; // two endpoints so the inner loop fails over + const r429 = () => { const e = new Error('429 too many requests'); (e as any).status = 429; return e; }; + let call = 0; + const populate = recorder(async () => { + call += 1; + if (call === 1) throw r429(); // provider[0], pass 1 → retryable → fail over + if (call === 2) throw tooLowAllowanceRevert(); // provider[1], pass 1 → non-retryable → propagate + return { to: V10_KA_ADDRESS, data: '0xabcd' }; // provider[0], pass 2 (post-approve) → succeeds + }); + const kaContract = connectable({ publish: { populateTransaction: populate } }); + + const result = await (a as any).populateAndSignV10WithAllowanceRecovery( + signer, kaContract, 'publish', {}, V10_KA_ADDRESS, 0n, 'label', + ); + + expect(result).toEqual({ signedTx: '0xsigned', txHash: '0xhash' }); + expect(ensureSpy.calls).toHaveLength(1); // EXACTLY ONE forced approve across the failover + expect(ensureSpy.calls[0][4]).toBe(true); // force=true + expect(signSpy.calls).toHaveLength(1); // publish signed exactly once (INV-1) + expect(populate.calls).toHaveLength(3); // p0(429) → p1(TooLow) → [approve] → p0(ok) + }); + + it('OBS-1: a RETRYABLE populate failure fails over to the next provider and signs exactly once (no double-sign)', async () => { + // Plain OBS-1 populate failover (no allowance recovery): provider #1's + // populate is rate-limited, provider #2 populates fine → signed once on #2. + const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); + (a as any).providers = [{}, {}]; + const r429 = () => { const e = new Error('429 too many requests'); (e as any).status = 429; return e; }; + let call = 0; + const populate = recorder(async () => { + call += 1; + if (call === 1) throw r429(); // provider[0] → fail over + return { to: V10_KA_ADDRESS, data: '0xabcd' }; // provider[1] → populates + }); + const kaContract = connectable({ publish: { populateTransaction: populate } }); + + const result = await (a as any).populateAndSignV10WithAllowanceRecovery( + signer, kaContract, 'publish', {}, V10_KA_ADDRESS, 0n, 'label', + ); + + expect(result).toEqual({ signedTx: '0xsigned', txHash: '0xhash' }); + expect(populate.calls).toHaveLength(2); // p0(429) → p1(ok) + expect(signSpy.calls).toHaveLength(1); // signed once, on the healthy provider + expect(ensureSpy.calls).toEqual([]); // no TooLowAllowance → no forced approve + }); + it('enriches the SECOND raw TooLowAllowance before throwing the one-shot failure', async () => { const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); const populate = recorder(async () => { throw rawTooLowAllowanceRevert(); }); - const kaContract = { publish: { populateTransaction: populate } }; + const kaContract = connectable({ publish: { populateTransaction: populate } }); let thrown: any; try { @@ -3466,7 +3558,7 @@ describe('populateAndSignV10WithAllowanceRecovery — shared publish/update reco it('propagates an unrelated revert immediately without forcing a re-approve', async () => { const { a, ensureSpy, signSpy, signer } = makeRecoveryAdapter(); const populate = recorder(async () => { throw new Error('execution reverted: NotBatchPublisher()'); }); - const kaContract = { update: { populateTransaction: populate } }; + const kaContract = connectable({ update: { populateTransaction: populate } }); await expect( (a as any).populateAndSignV10WithAllowanceRecovery( diff --git a/packages/chain/test/getmaxkanumberforauthor.unit.test.ts b/packages/chain/test/getmaxkanumberforauthor.unit.test.ts index d8b1d0392..64d465d74 100644 --- a/packages/chain/test/getmaxkanumberforauthor.unit.test.ts +++ b/packages/chain/test/getmaxkanumberforauthor.unit.test.ts @@ -20,6 +20,8 @@ import { describe, it, expect, vi } from 'vitest'; import { ethers } from 'ethers'; import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; +import { RPC_READ_STALL_TIMEOUT_MS } from '../src/evm-adapter-constants.js'; +import { connectable } from './connectable.js'; function recorder(impl: (...args: A) => R) { const calls: A[] = []; @@ -219,7 +221,7 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# // set `initialized = false` but the cached binding still points at the OLD // contract. The getter must `await this.init()` to re-resolve before // reading, or it answers from the pre-rotation DKGKnowledgeAssets. - const mkStorage = (n: bigint) => ({ + const mkStorage = (n: bigint) => connectable({ getMaxKaNumberForAuthor: viewMock(async () => n), filters: { KnowledgeAssetCreated: recorder(() => 'F') }, queryFilter: recorder(() => undefined), @@ -248,6 +250,77 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# expect(stale.getMaxKaNumberForAuthor.staticCall.calls).toEqual([]); }); + // The view staticCall now routes THROUGH readWithFailover (base:2132) with a + // CUSTOM classifier (isRetryableRpcError minus the absent-view / bareRevert + // shapes), giving it the per-attempt stall timeout + endpoint failover the old + // bespoke loop lacked — while preserving the boundary: a TRANSIENT error fails + // over to the next endpoint, but a DETERMINISTIC absent-view + // (BAD_DATA/CALL_EXCEPTION) is non-retryable -> rethrown straight to the catch + // -> the pre-10.0.4 scan (never failed over / masked as RPC_ENDPOINTS_EXHAUSTED). + it('getMaxKaNumber view (readWithFailover): a TRANSIENT 429 fails over to the next endpoint and answers from the view (no scan)', async () => { + const queryFilter = recorder(async () => []); + let attempt = 0; + const storage: any = { + getMaxKaNumberForAuthor: viewMock(async () => { + attempt += 1; + if (attempt === 1) { const e: any = new Error('429 too many requests'); e.status = 429; throw e; } + return 5n; + }), + filters: { KnowledgeAssetCreated: recorder(() => 'F') }, + queryFilter, + }; + const a = makeAdapter(storage, 100_000); + (a as any).providers = [{}, {}]; // two endpoints → the view read fails over + expect(await a.getMaxKaNumberForAuthor(AUTHOR)).toBe(5n); // served by endpoint #2 + expect(storage.getMaxKaNumberForAuthor.staticCall.calls).toHaveLength(2); // failed over once + expect(queryFilter.calls).toEqual([]); // the view answered → NEVER scans logs + }); + + it('getMaxKaNumber view (readWithFailover): an ABSENT-view (BAD_DATA) is deterministic across endpoints — no failover, straight to the scan', async () => { + const badData: any = new Error(EMPTY_VIEW_RESULT); + badData.code = 'BAD_DATA'; + const queryFilter = recorder(async () => []); + const storage: any = { + target: '0x5555555555555555555555555555555555555555', + getMaxKaNumberForAuthor: viewMock(async () => { throw badData; }), + filters: { KnowledgeAssetCreated: recorder(() => 'F') }, + queryFilter, + }; + const a = makeAdapter(storage, 3_000); + const backend = () => ({ getBlockNumber: recorder(async () => 3_000), getCode: recorder(async () => '0x6000') }); + (a as any).providers = [backend(), backend()]; + expect(await a.getMaxKaNumberForAuthor(AUTHOR)).toBe(-1n); // degraded to the (empty) scan + // Absent-view is non-retryable → rethrown after ONE attempt, endpoint #2 never consulted. + expect(storage.getMaxKaNumberForAuthor.staticCall.calls).toHaveLength(1); + expect(queryFilter.calls.length).toBeGreaterThan(0); // the scan ran instead + }); + + it('getMaxKaNumber view (readWithFailover): a HUNG primary staticCall times out (per-attempt cap the bespoke loop lacked) and fails over to the backup', async () => { + vi.useFakeTimers(); + try { + let attempt = 0; + const storage: any = { + getMaxKaNumberForAuthor: viewMock(() => + (attempt += 1) === 1 + ? new Promise(() => {}) // primary hangs forever + : Promise.resolve(7n)), // backup answers + filters: { KnowledgeAssetCreated: recorder(() => 'F') }, + queryFilter: recorder(async () => []), + }; + const a = makeAdapter(storage, 100_000); + (a as any).providers = [{}, {}]; + const p = a.getMaxKaNumberForAuthor(AUTHOR); + // The new readWithFailover cap aborts the hung primary at the 4s multi-RPC + // default and fails over (the old bespoke loop had NO per-attempt timeout → + // a hung backend stalled the whole resolution). + await vi.advanceTimersByTimeAsync(RPC_READ_STALL_TIMEOUT_MS + 500); + expect(await p).toBe(7n); // answered by the backup's staticCall + expect(storage.getMaxKaNumberForAuthor.staticCall.calls).toHaveLength(2); + } finally { + vi.useRealTimers(); + } + }); + it('rethrows malformed BAD_DATA instead of treating every decode failure as an absent view', async () => { const err: any = new Error( 'could not decode result data (value="0x1234", info={ method: "getMaxKaNumberForAuthor", signature: "getMaxKaNumberForAuthor(address)" }, code=BAD_DATA, version=6.16.0)', @@ -752,9 +825,18 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# }; const a = makeAdapter(storage, head); const code = (block?: number) => (block === undefined || block >= deployBlock ? '0x6000' : '0x'); - // b0 is reachable (head ok) but HANGS on getCode; the deploy-block search - // must time out its attempts and fail over to b1 rather than stalling. - const b0 = { getBlockNumber: recorder(async () => head), getCode: recorder(() => new Promise(() => {})) }; + // b0 is reachable (head ok) and serves the STANDALONE bytecode probe + // (block===undefined → readWithFailover, R1) but HANGS on the deploy-block + // SEARCH reads (block!==undefined); the search must time out its 3×4s + // attempts and fail over to b1 rather than stalling. (Hanging the + // standalone probe too would add a 4s readWithFailover hop and push the + // total past the 13s advance — out of scope for this "search fails over" + // assertion.) + const b0 = { + getBlockNumber: recorder(async () => head), + getCode: recorder((_a: string, block?: number) => + block === undefined ? Promise.resolve('0x6000') : new Promise(() => {})), + }; const b1 = { getBlockNumber: recorder(async () => head), getCode: recorder(async (_a: string, block?: number) => code(block)) }; (a as any).providers = [b0, b1]; @@ -1008,8 +1090,15 @@ describe('EVMChainAdapter.getMaxKaNumberForAuthor — view + bounded fallback (# queryFilter, }; const a = makeAdapter(storage, 0); - // backend 0: completely down — getBlockNumber itself fails (a real, non-historical error) - const downBackend = { getBlockNumber: recorder(async () => { throw new Error('503 node is down'); }), getCode: recorder(() => undefined) }; + // backend 0: completely down — getBlockNumber AND getCode fail (a real, + // non-historical error). R1: the standalone bytecode probe now consults + // getCode via readWithFailover, so a down node must THROW (not return + // undefined, which would be read as a valid empty result and short-circuit + // before failover to the pruned-but-reachable backend). + const downBackend = { + getBlockNumber: recorder(async () => { throw new Error('503 node is down'); }), + getCode: recorder(async () => { throw new Error('503 node is down'); }), + }; // backend 1: reachable but pruned — historical getCode unavailable const prunedBackend = { getBlockNumber: recorder(async () => head), diff --git a/packages/chain/test/loopback-rpc-harness.ts b/packages/chain/test/loopback-rpc-harness.ts new file mode 100644 index 000000000..cf0bf942e --- /dev/null +++ b/packages/chain/test/loopback-rpc-harness.ts @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Shared loopback JSON-RPC harness for the immediate-RPC-failover tests + * (T1 read failover, the real-provider WRITE failover gate, and the staged + * T3/T6). NOT a test file (no `.test.` suffix) so vitest does not run it. + * + * Spins real `node:http` JSON-RPC servers so tests drive REAL + * `ethers.JsonRpcProvider`s / `FetchRequest` / the adapter's failover loops — + * the thing the existing ~170 bare-object provider mocks bypass entirely. No + * Hardhat, no native deps; runs locally (`--ignore-scripts`) and in CI. + * + * Teardown discipline (REQUIRED by callers): under a perpetual 429 ethers keeps + * retrying on keep-alive sockets AFTER the awaited call rejects. Always + * `adapter.destroy()` then `rpc.close()` (which `closeAllConnections()` first) + * in afterEach, or the hook hangs past vitest's timeout — the known flaky-CI + * failure mode (see evm-adapter.unit.test.ts:1549). + */ +import { createServer, type Server } from 'node:http'; +import type { AddressInfo } from 'node:net'; + +/** chainId 31337 (matches the tests' `chainId: 'evm:31337'`). */ +export const CHAIN_ID_HEX = '0x7a69'; + +export interface LoopbackRpc { + url: string; + server: Server; + /** Per-JSON-RPC-method request counts, e.g. `hits('eth_chainId')`. */ + hits: (method: string) => number; + totalHits: () => number; + /** Force-close sockets then close the server (afterEach teardown). */ + close: () => Promise; +} + +export interface LoopbackOptions { + /** JSON-RPC methods that respond HTTP 429 (rate-limited). */ + throttle?: Iterable; + /** Override canned results per method (return a hex string). */ + results?: Record; +} + +const DEFAULT_RESULTS: Record = { + eth_chainId: CHAIN_ID_HEX, + eth_blockNumber: '0x10', + eth_getCode: '0x1234', + eth_call: '0x' + '00'.repeat(32), + eth_sendRawTransaction: '0x' + '11'.repeat(32), + eth_getTransactionReceipt: '', // '' → null result (receipt not yet mined) +}; + +/** + * Start a loopback JSON-RPC server. Methods in `throttle` answer HTTP 429; + * everything else returns a canned OK result. `eth_chainId`/`eth_blockNumber` + * are always answerable (unless throttled) so a healthy endpoint satisfies + * ethers' network detection. + */ +export async function startLoopbackRpc(options: LoopbackOptions = {}): Promise { + const throttle = new Set(options.throttle ?? []); + const results = { ...DEFAULT_RESULTS, ...(options.results ?? {}) }; + const counts = new Map(); + + const server = createServer((req, res) => { + let raw = ''; + req.on('data', (c) => { raw += c; }); + req.on('end', () => { + let body: unknown; + try { body = JSON.parse(raw); } catch { body = {}; } + const reqs = (Array.isArray(body) ? body : [body]) as Array<{ id: number; method: string }>; + let throttled = false; + const out: unknown[] = []; + for (const r of reqs) { + counts.set(r.method, (counts.get(r.method) ?? 0) + 1); + if (throttle.has(r.method)) { throttled = true; continue; } + const result = r.method in results ? results[r.method] : '0x'; + out.push({ jsonrpc: '2.0', id: r.id, result: result === '' ? null : result }); + } + if (throttled) { + res.writeHead(429, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + jsonrpc: '2.0', + id: reqs[0]?.id ?? null, + error: { code: -32005, message: 'rate limited' }, + })); + return; + } + res.setHeader('content-type', 'application/json'); + res.end(JSON.stringify(Array.isArray(body) ? out : out[0])); + }); + }); + + await new Promise((resolve) => server.listen(0, '127.0.0.1', () => resolve())); + const addr = server.address() as AddressInfo; + return { + url: `http://127.0.0.1:${addr.port}`, + server, + hits: (method) => counts.get(method) ?? 0, + totalHits: () => [...counts.values()].reduce((a, b) => a + b, 0), + close: async () => { + server.closeAllConnections?.(); + await new Promise((resolve) => server.close(() => resolve())); + }, + }; +} diff --git a/packages/chain/test/mock-adapter-parity.test.ts b/packages/chain/test/mock-adapter-parity.test.ts index 1e261bc47..44b4890c7 100644 --- a/packages/chain/test/mock-adapter-parity.test.ts +++ b/packages/chain/test/mock-adapter-parity.test.ts @@ -76,7 +76,7 @@ const MOCK_EXEMPT_FROM_EVM = new Set([ 'getContract', // resolves a Contract from the Hub — not applicable off-chain 'getBlockNumber', // the mock exposes its own block counter differently (advanceBlock) 'getProvider', // returns a JsonRpcProvider; mock has none - 'getReadProvider', // returns the EVM fallback read provider; mock has no RPC provider + 'getReadProvider', // @deprecated bare-primary accessor; mock has no RPC provider 'getSignerAddress', // mock exposes `signerAddress` as a field 'getSignerAddresses', // pool not applicable to mock 'getAuthorizedPublisherAddress', // pool-specific signer selection; mock has one signerAddress @@ -116,6 +116,18 @@ const MOCK_EXEMPT_FROM_EVM = new Set([ 'getTransactionReceiptWithFailover', 'waitForReceiptWithFailover', 'signPopulatedTransaction', + // R1 immediate-RPC-failover read/populate plumbing: the per-provider read + // failover loop, its contract-view wrapper, the event-log scan wrapper, the + // contract/signer rebind helpers, and the populate+sign-across-providers loop. + // Protected EVM-only helpers over `this.providers[]` (the mock has no RPC + // provider pool), not ChainAdapter contract methods — same category as the + // write-failover helpers above. + 'readWithFailover', + 'contractReadWithFailover', + 'queryFilterWithFailover', + 'rebindContract', + 'rebindSigner', + 'populateAndSignAcrossProviders', 'sendSignedTransactionAndWait', 'sendPopulatedTransaction', 'sendContractTransaction', diff --git a/packages/chain/test/multi-rpc-provider-shape.test.ts b/packages/chain/test/multi-rpc-provider-shape.test.ts index 9fb577a37..ef5faaf79 100644 --- a/packages/chain/test/multi-rpc-provider-shape.test.ts +++ b/packages/chain/test/multi-rpc-provider-shape.test.ts @@ -7,9 +7,14 @@ const PK = '0x' + '1'.repeat(64); const HUB = '0x0000000000000000000000000000000000000001'; // Constructing the adapter is offline (providers are lazy / never dialled), so -// these assertions need no live RPC — they exercise the backwards-compat split -// in the constructor: 1 endpoint => bare JsonRpcProvider (identical to the -// pre-multi-RPC path), >1 endpoint => FallbackProvider failover. +// these assertions need no live RPC — they exercise the constructor's provider +// topology: 1 endpoint => bare JsonRpcProvider (identical to the pre-multi-RPC +// path); >1 endpoint => N bare JsonRpcProviders in `this.providers[]` with the +// bare PRIMARY exposed as the read provider. R1 removed the ethers +// FallbackProvider — reads fail over EXPLICITLY via `readWithFailover` over +// `this.providers[]` (the immediate-failover behaviour itself is covered by +// multi-rpc-read-failover.test.ts, so that coverage is NOT dropped here, only +// the now-removed FallbackProvider topology assertion is updated). describe('multi-RPC provider shape (backwards compatibility)', () => { it('a single rpcUrl yields a bare JsonRpcProvider (no FallbackProvider)', () => { const a = new EVMChainAdapter({ @@ -18,13 +23,13 @@ describe('multi-RPC provider shape (backwards compatibility)', () => { privateKey: PK, allowNoAdminSigner: true, }); - const read = a.getReadProvider(); + const read = a.getProvider(); expect(read).toBeInstanceOf(JsonRpcProvider); expect(read).not.toBeInstanceOf(FallbackProvider); expect(a.getRpcUrls()).toEqual(['http://127.0.0.1:1']); }); - it('multiple rpcUrls yield a FallbackProvider over all endpoints (primary first)', () => { + it('multiple rpcUrls build a bare-primary read provider + N providers for readWithFailover (no FallbackProvider)', () => { const a = new EVMChainAdapter({ rpcUrl: 'http://127.0.0.1:1', rpcUrls: ['http://127.0.0.1:2', 'http://127.0.0.1:3'], @@ -32,8 +37,21 @@ describe('multi-RPC provider shape (backwards compatibility)', () => { privateKey: PK, allowNoAdminSigner: true, }); - expect(a.getReadProvider()).toBeInstanceOf(FallbackProvider); + // R1: getProvider() is the bare PRIMARY JsonRpcProvider — the + // FallbackProvider is gone; reads fail over explicitly via readWithFailover + // over this.providers[] (getReadProvider() was removed as obsolete: there is + // no single read provider anymore). + const read = a.getProvider(); + expect(read).toBeInstanceOf(JsonRpcProvider); + expect(read).not.toBeInstanceOf(FallbackProvider); + // All endpoints stay configured, primary first — the failover topology now + // lives in this.providers[] (one bare JsonRpcProvider per endpoint), which + // readWithFailover iterates. expect(a.getRpcUrls()).toEqual(['http://127.0.0.1:1', 'http://127.0.0.1:2', 'http://127.0.0.1:3']); + const providers = (a as unknown as { providers: unknown[] }).providers; + expect(providers).toHaveLength(3); + expect(providers.every((p) => p instanceof JsonRpcProvider)).toBe(true); + expect(providers.some((p) => p instanceof FallbackProvider)).toBe(false); }); it('dedupes a backup that repeats the primary (no redundant provider)', () => { @@ -45,7 +63,7 @@ describe('multi-RPC provider shape (backwards compatibility)', () => { allowNoAdminSigner: true, }); // Collapses to a single unique endpoint -> no FallbackProvider. - expect(a.getReadProvider()).not.toBeInstanceOf(FallbackProvider); + expect(a.getProvider()).not.toBeInstanceOf(FallbackProvider); expect(a.getRpcUrls()).toEqual(['http://127.0.0.1:1']); }); }); diff --git a/packages/chain/test/multi-rpc-read-failover.test.ts b/packages/chain/test/multi-rpc-read-failover.test.ts new file mode 100644 index 000000000..08e135493 --- /dev/null +++ b/packages/chain/test/multi-rpc-read-failover.test.ts @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * T1 (immediate-RPC-failover): REAL multi-RPC READ failover over loopback + * JSON-RPC servers — the regression gate the existing suite cannot provide. + * + * Why this file exists: the ~170 adapter failover tests inject bare-object + * provider mocks (`(a as any).providers = [...]`) that bypass JsonRpcProvider / + * FetchRequest / the read path entirely, AND they only cover WRITES. The READ + * failover path (today: ethers `FallbackProvider`; after R1: an explicit + * `readWithFailover` loop over the bare providers) has ZERO real coverage. + * Empirically the current FallbackProvider read path does NOT reliably fail over + * on a fast 429 (it advances only on a 4s STALL; even at the default retry + * budget it can surface the primary's error with a healthy backup) — so R1 is a + * correctness fix, not just a latency win, and it needs a REAL-provider test. + * + * Harness: `startLoopbackRpc` (node:http, no Hardhat / native deps). Teardown + * MUST destroy() each adapter and close() each server (closeAllConnections) — a + * perpetual 429 keeps ethers retrying on keep-alive sockets after the call + * rejects, which otherwise hangs afterEach past vitest's hook timeout. + * + * The CONTROL tests (healthy endpoints) are GREEN today and prove the harness + + * the real-provider-over-loopback adapter works. The `describe.skip` TARGET + * blocks specify the immediate-failover behaviour R1 delivers. + * + * ── UN-SKIP PROTOCOL (HARD S1-ACCEPTANCE GATE, lead-mandated 2026-06-25) ── + * Trigger: the moment ChainEngineer signals reads (getEvmChainId / Hub + * getContractAddress / resolveContract / contract-views) route through + * `readWithFailover` over the bare `this.providers[]` with per-endpoint + * retries = 0 for multi-RPC, flip ALL `describe.skip` TARGET blocks below to + * live `describe`. They MUST then be GREEN — S1 is NOT complete until these are + * live-green (lead + TxSafetyReviewer require it at S1 sign-off; the skip is + * provably temporary, not optional). Kept skip ONLY while S1 is mid-flight so a + * transient red from an unrelated in-progress edit can't mask a real regression. + * VERIFIED 2026-06-25: with the targets un-skipped against ChainEngineer's S1 + * working tree these PASS (read 5/5, write 3/3) — the gate flips RED→GREEN + * exactly as designed (current code: getEvmChainId rejects SERVER_ERROR in ~64ms + * with a healthy backup = no failover). Do NOT weaken the assertions. + */ +import { describe, it, expect, afterEach, beforeEach } from 'vitest'; +import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; +import { startLoopbackRpc, type LoopbackRpc } from './loopback-rpc-harness.js'; +import { getRpcFailoverStats, _resetRpcFailoverStatsForTest } from '../src/rpc-failover-log.js'; + +const DEPLOYER_PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; +const HUB = '0x0000000000000000000000000000000000000001'; + +function minimalConfig(overrides: Partial = {}): EVMAdapterConfig { + return { + rpcUrl: 'http://127.0.0.1:1', + privateKey: DEPLOYER_PK, + hubAddress: HUB, + chainId: 'evm:31337', + allowNoAdminSigner: true, + ...overrides, + }; +} + +describe('multi-RPC read failover (real loopback providers)', () => { + const adapters: EVMChainAdapter[] = []; + const servers: LoopbackRpc[] = []; + function track(a: EVMChainAdapter): EVMChainAdapter { adapters.push(a); return a; } + function trackServer(s: LoopbackRpc): LoopbackRpc { servers.push(s); return s; } + + afterEach(async () => { + // Stop ethers' background retry loop / idle sockets FIRST, then close the + // servers, so close() resolves promptly instead of hanging on an in-flight + // 429 retry (the flaky-CI failure mode). + for (const a of adapters.splice(0)) { + try { a.destroy(); } catch { /* destroy() is idempotent */ } + } + for (const s of servers.splice(0)) await s.close(); + }); + + // ── CONTROL (GREEN today): the harness + real-provider adapter works ── + + it('control: a healthy 2-RPC adapter resolves chainId over real loopback providers', async () => { + const primary = trackServer(await startLoopbackRpc()); + const backup = trackServer(await startLoopbackRpc()); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + const start = Date.now(); + await expect(a.getEvmChainId()).resolves.toBe(31337n); + expect(Date.now() - start).toBeLessThan(10_000); + // Two distinct endpoints really were wired (not deduped to one). + expect(a.getRpcUrls()).toEqual([primary.url, backup.url]); + }); + + it('control: a single-RPC adapter resolves chainId over a real loopback provider', async () => { + const only = trackServer(await startLoopbackRpc()); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: only.url }))); + await expect(a.getEvmChainId()).resolves.toBe(31337n); + expect(only.hits('eth_chainId')).toBeGreaterThanOrEqual(1); + }); + + // ── TARGET (immediate failover) ────────────────────────────────────────── + // UN-SKIP when R1 routes reads through `readWithFailover` over the bare + // providers AND multi-RPC sets per-endpoint retries = 0. On the CURRENT code + // these are RED (the FallbackProvider read path does not immediately fail over + // a fast 429), which is exactly the regression this gate locks. Verified + // entrypoint: getEvmChainId() -> this.provider.getNetwork() (a "direct + // this.provider.*" read in the locked read surface). + // GATE-HAS-TEETH CHECK (QA, 2026-06-25): with this block un-skipped against the + // current code, getEvmChainId() REJECTS with SERVER_ERROR in ~64ms while the + // backup is healthy — i.e. no failover — so the first test fails exactly as a + // regression gate should. It must flip GREEN once R1 lands; do not weaken it. + describe('TARGET — immediate read failover (R1)', () => { + it('primary 429 on the read → served by the healthy backup, primary hit exactly once (no per-endpoint retry)', async () => { + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_chainId'] })); + const backup = trackServer(await startLoopbackRpc()); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + const start = Date.now(); + // The read is served by the backup despite the primary 429ing it. + await expect(a.getEvmChainId()).resolves.toBe(31337n); + const elapsed = Date.now() - start; + + // Immediate: the primary's eth_chainId is attempted exactly ONCE (no 5× + // same-endpoint backoff before failing over) and the whole read is fast. + expect(primary.hits('eth_chainId')).toBe(1); + expect(backup.hits('eth_chainId')).toBeGreaterThanOrEqual(1); + expect(elapsed).toBeLessThan(2_000); + }); + + it('all endpoints 429 → surfaces RPC_ENDPOINTS_EXHAUSTED (retryable), one attempt per endpoint', async () => { + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_chainId'] })); + const backup = trackServer(await startLoopbackRpc({ throttle: ['eth_chainId'] })); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + const start = Date.now(); + await expect(a.getEvmChainId()).rejects.toMatchObject({ code: 'RPC_ENDPOINTS_EXHAUSTED' }); + expect(Date.now() - start).toBeLessThan(3_000); + // Exactly one attempt per endpoint per pass (no 5× per-endpoint retry). + expect(primary.hits('eth_chainId')).toBe(1); + expect(backup.hits('eth_chainId')).toBe(1); + }); + }); + + // Direct unit of the new read-failover primitive. UN-SKIP when + // `readWithFailover` exists. Asserts the loop advances on a retryable error + // and serves from the next provider — the read-side mirror of the write loops. + describe('TARGET — readWithFailover primitive (R1)', () => { + it('advances to the next provider on a retryable error and serves its result', async () => { + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_getCode'] })); + const backup = trackServer(await startLoopbackRpc()); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + const readWithFailover = (a as unknown as { + readWithFailover: (label: string, fn: (p: { getCode: (addr: string) => Promise }) => Promise) => Promise; + }).readWithFailover; + + const code = await readWithFailover.call(a, 'getCode', (p) => p.getCode(HUB)); + expect(code).toBe('0x1234'); + expect(primary.hits('eth_getCode')).toBe(1); + expect(backup.hits('eth_getCode')).toBeGreaterThanOrEqual(1); + }); + }); + + // T6: read failover is OBSERVABLE and HOST-ONLY (no key leak) + bumps the + // process-wide /api/status counters. The rpc-failover-log unit test already + // proves rpcHost/noteRpcFailover are host-only in isolation; this proves the + // READ failover path actually routes through that logger end-to-end — the + // read-side W3 observability the old code lacked (logging was "WRITE failover + // only"). Part of the same hard S1-acceptance gate (un-skip with the others). + describe('TARGET — read failover observability is host-only (R1, T6)', () => { + beforeEach(() => { _resetRpcFailoverStatsForTest(); }); + afterEach(() => { _resetRpcFailoverStatsForTest(); }); + + it('a real read failover logs HOST-ONLY (no URL/key) and increments the failover counter', async () => { + // Primary URL carries a fake API-key path; the host-only logger must never + // emit it. The loopback server ignores the path and 429s the read. + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_chainId'] })); + const backup = trackServer(await startLoopbackRpc()); + const primaryWithKey = `${primary.url}/v1/SECRET-API-KEY-abc123`; + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primaryWithKey, rpcUrls: [backup.url] }))); + + const warnings: string[] = []; + const origWarn = console.warn; + console.warn = ((...args: unknown[]) => { warnings.push(String(args[0])); }) as typeof console.warn; + try { + await expect(a.getEvmChainId()).resolves.toBe(31337n); + } finally { + console.warn = origWarn; + } + + // A failover line was emitted, host-only: contains the loopback host but + // NEVER the scheme or the API key embedded in the configured URL. + const failoverLines = warnings.filter((w) => w.includes('RPC failover')); + expect(failoverLines.length).toBeGreaterThanOrEqual(1); + const joined = failoverLines.join('\n'); + expect(joined).toContain('127.0.0.1'); + expect(joined).not.toContain('SECRET-API-KEY'); + expect(joined).not.toContain('://'); + + // Process-wide /api/status counters recorded the read failover, host-only. + const stats = getRpcFailoverStats(); + expect(stats.failovers).toBeGreaterThanOrEqual(1); + const hosts = Object.keys(stats.byEndpointHost); + expect(hosts.some((h) => h.startsWith('127.0.0.1'))).toBe(true); + expect(hosts.every((h) => !h.includes('SECRET-API-KEY') && !h.includes('://'))).toBe(true); + }); + }); +}); diff --git a/packages/chain/test/multi-rpc-write-failover.test.ts b/packages/chain/test/multi-rpc-write-failover.test.ts new file mode 100644 index 000000000..1cf3f731e --- /dev/null +++ b/packages/chain/test/multi-rpc-write-failover.test.ts @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * INV-7 (writes): REAL multi-RPC WRITE failover over loopback JSON-RPC servers + * — the write-side mirror of T1, closing the same false-green. + * + * The ~170 existing write-failover tests inject bare-object providers + * (`(a as any).providers = [primary, backup]`) whose `broadcastTransaction` + * throws synchronously, so they prove the failover LOOP advances but can NEVER + * prove "no per-endpoint backoff": they bypass `boundedRetryFetchRequest` / + * `FetchRequest` entirely, so they pass identically whether the multi-RPC + * per-endpoint retry budget is 0 or 5. This drives a REAL `JsonRpcProvider`'s + * `broadcastTransaction` against loopback servers, so the failed primary's + * `eth_sendRawTransaction` is counted and "exactly once" actually means + * immediate failover. + * + * Note: ethers' `broadcastTransaction` re-parses the signed tx and asserts the + * node's returned hash equals the tx hash — so the accepting server must return + * the REAL hash of the signed tx (computed below), not a dummy. + * + * CONTROL (healthy primary) is GREEN today. The `describe.skip` TARGET is part of + * the HARD S1-acceptance gate (see multi-rpc-read-failover.test.ts header): + * UN-SKIP it the moment ChainEngineer wires multi-RPC provider construction to + * `boundedRetryFetchRequest(url, 0)`; it MUST then be GREEN (S1 not complete + * until live-green). On the current code (retry budget 5) the failed primary's + * broadcast is retried ~6× over ~7.5s before failover, so the "hit exactly once + * / bounded" assertions are RED today — exactly the immediate-failover + * regression this gate locks. VERIFIED 2026-06-25: un-skipped against S1 these + * PASS (3/3). Kept skip only while S1 is mid-flight. + */ +import { describe, it, expect, afterEach } from 'vitest'; +import { ethers } from 'ethers'; +import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; +import { startLoopbackRpc, type LoopbackRpc } from './loopback-rpc-harness.js'; + +const DEPLOYER_PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; +const HUB = '0x0000000000000000000000000000000000000001'; + +function minimalConfig(overrides: Partial = {}): EVMAdapterConfig { + return { + rpcUrl: 'http://127.0.0.1:1', + privateKey: DEPLOYER_PK, + hubAddress: HUB, + chainId: 'evm:31337', + allowNoAdminSigner: true, + ...overrides, + }; +} + +/** A real, offline-signed legacy tx + its canonical hash (no RPC contact). */ +async function signTx(): Promise<{ signedTx: string; txHash: string }> { + const wallet = new ethers.Wallet(DEPLOYER_PK); + const signedTx = await wallet.signTransaction({ + to: HUB, + nonce: 0, + gasLimit: 21_000, + gasPrice: 1_000_000_000, + value: 0, + chainId: 31337, + }); + return { signedTx, txHash: ethers.Transaction.from(signedTx).hash! }; +} + +describe('multi-RPC write failover (real loopback providers)', () => { + const adapters: EVMChainAdapter[] = []; + const servers: LoopbackRpc[] = []; + function track(a: EVMChainAdapter): EVMChainAdapter { adapters.push(a); return a; } + function trackServer(s: LoopbackRpc): LoopbackRpc { servers.push(s); return s; } + + afterEach(async () => { + for (const a of adapters.splice(0)) { + try { a.destroy(); } catch { /* idempotent */ } + } + for (const s of servers.splice(0)) await s.close(); + }); + + // ── CONTROL (GREEN today): real broadcast over a healthy loopback provider ── + it('control: broadcasts a real signed tx to a healthy primary (no failover)', async () => { + const { signedTx, txHash } = await signTx(); + const primary = trackServer(await startLoopbackRpc({ results: { eth_sendRawTransaction: txHash } })); + const backup = trackServer(await startLoopbackRpc({ results: { eth_sendRawTransaction: txHash } })); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + await expect( + (a as any).broadcastSignedTransactionWithFailover(signedTx, txHash, 'unit write'), + ).resolves.toBeUndefined(); + expect(primary.hits('eth_sendRawTransaction')).toBe(1); + expect(backup.hits('eth_sendRawTransaction')).toBe(0); // healthy primary → never reached + }); + + // ── TARGET (immediate failover): un-skip after retries=0 on write providers ── + // RED today (retry budget 5 → primary broadcast retried ~6× over ~7.5s before + // failover); GREEN once multi-RPC providers are built with + // boundedRetryFetchRequest(url, 0). The SAME signed tx is re-broadcast to the + // backup (idempotent, INV-4); the failed primary is hit exactly once (INV-7). + describe('TARGET — immediate write failover (retries=0, R1)', () => { + it('primary 429 on broadcast → SAME tx accepted by backup, primary hit exactly once, bounded', async () => { + const { signedTx, txHash } = await signTx(); + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_sendRawTransaction'] })); + const backup = trackServer(await startLoopbackRpc({ results: { eth_sendRawTransaction: txHash } })); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + const start = Date.now(); + await expect( + (a as any).broadcastSignedTransactionWithFailover(signedTx, txHash, 'unit write'), + ).resolves.toBeUndefined(); + const elapsed = Date.now() - start; + + // Immediate: the 429ing primary's eth_sendRawTransaction is attempted + // EXACTLY once (no 5× same-endpoint backoff) and the SAME raw tx is + // accepted by the backup. Bounded well under the 10s broadcast timeout. + expect(primary.hits('eth_sendRawTransaction')).toBe(1); + expect(backup.hits('eth_sendRawTransaction')).toBe(1); + expect(elapsed).toBeLessThan(2_000); + }); + + it('all endpoints 429 on broadcast → RPC_ENDPOINTS_EXHAUSTED, one attempt per endpoint', async () => { + const { signedTx, txHash } = await signTx(); + const primary = trackServer(await startLoopbackRpc({ throttle: ['eth_sendRawTransaction'] })); + const backup = trackServer(await startLoopbackRpc({ throttle: ['eth_sendRawTransaction'] })); + const a = track(new EVMChainAdapter(minimalConfig({ rpcUrl: primary.url, rpcUrls: [backup.url] }))); + + await expect( + (a as any).broadcastSignedTransactionWithFailover(signedTx, txHash, 'unit write'), + ).rejects.toMatchObject({ code: 'RPC_ENDPOINTS_EXHAUSTED' }); + expect(primary.hits('eth_sendRawTransaction')).toBe(1); + expect(backup.hits('eth_sendRawTransaction')).toBe(1); + }); + }); +}); diff --git a/packages/chain/test/readwithfailover-loop.unit.test.ts b/packages/chain/test/readwithfailover-loop.unit.test.ts new file mode 100644 index 000000000..0ccb061bf --- /dev/null +++ b/packages/chain/test/readwithfailover-loop.unit.test.ts @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Read-failover LOOP LOGIC for R1's `readWithFailover` — the read-side mirror of + * the write-loop unit tests in evm-adapter.unit.test.ts. Drives the loop with + * bare-object `(a as any).providers = [...]` mocks (the same style the write + * loops use): cheap, fast, no HTTP server, exercises advance-on-retryable / + * exhaustion / single-RPC / non-retryable-throws / host-only logging. + * + * SCOPE NOTE (deliberate): bare-object mocks prove the loop CONTROL FLOW but NOT + * the "immediate / no per-endpoint backoff" property — they reject synchronously, + * bypassing boundedRetryFetchRequest / FetchRequest, so they pass identically + * whether per-endpoint retries is 0 or 5. The retries=0 IMMEDIACY guarantee is + * proven separately with REAL providers in multi-rpc-read-failover.test.ts. This + * file is necessary-but-not-sufficient on its own; the two together are the net. + */ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; +import { isChainRpcTransportError } from '../src/chain-rpc-transport-error.js'; +import { getRpcFailoverStats, _resetRpcFailoverStatsForTest } from '../src/rpc-failover-log.js'; +import { RPC_LOG_SCAN_TIMEOUT_MS, RPC_READ_STALL_TIMEOUT_MS } from '../src/evm-adapter-constants.js'; + +const PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; +const HUB = '0x0000000000000000000000000000000000000001'; + +function recorder(impl: (...args: A) => R) { + const calls: A[] = []; + const fn = (...args: A): R => { calls.push(args); return impl(...args); }; + return Object.assign(fn, { calls }); +} + +function minimalConfig(overrides: Partial = {}): EVMAdapterConfig { + return { + rpcUrl: 'https://primary.example', + privateKey: PK, + hubAddress: HUB, + chainId: 'evm:31337', + allowNoAdminSigner: true, + ...overrides, + }; +} + +// The constructor builds REAL JsonRpcProviders over the (fake) URLs + wires +// error-listener network detection; destroy() them immediately so no socket / +// detection promise lingers ("Vite server not exiting" → flaky CI exit 1), THEN +// override `this.providers` with the bare-object mocks below. +function freshAdapter(cfg: EVMAdapterConfig): EVMChainAdapter { + const a = new EVMChainAdapter(cfg); + try { a.destroy(); } catch { /* idempotent; never dialled */ } + return a; +} + +const retryable429 = () => { const e = new Error('429 too many requests'); (e as any).status = 429; return e; }; +const callExceptionErr = () => { const e = new Error('execution reverted'); (e as any).code = 'CALL_EXCEPTION'; return e; }; + +// readWithFailover is protected; reach it via the same `as any` convention the +// rest of the chain suite uses. `fn` receives the bare provider; we give each +// mock provider a single `read()` method. +function readWithFailover(a: EVMChainAdapter, fn: (p: any) => Promise, label = 'unit read'): Promise { + return (a as any).readWithFailover(label, fn); +} + +describe('readWithFailover — read-failover loop logic (bare-mock, R1)', () => { + beforeEach(() => { _resetRpcFailoverStatsForTest(); }); + afterEach(() => { _resetRpcFailoverStatsForTest(); }); + + it('advances to the next provider on a retryable error and serves its result', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const primary = { read: recorder(async () => { throw retryable429(); }) }; + const backup = { read: recorder(async () => 'served-by-backup') }; + (a as any).providers = [primary, backup]; + + await expect(readWithFailover(a, (p) => p.read())).resolves.toBe('served-by-backup'); + expect(primary.read.calls).toHaveLength(1); + expect(backup.read.calls).toHaveLength(1); + }); + + it('exhausts ALL endpoints → ChainRpcTransportError RPC_ENDPOINTS_EXHAUSTED, one attempt each', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const primary = { read: recorder(async () => { throw retryable429(); }) }; + const backup = { read: recorder(async () => { throw retryable429(); }) }; + (a as any).providers = [primary, backup]; + + let thrown: any; + try { await readWithFailover(a, (p) => p.read()); } catch (e) { thrown = e; } + expect(thrown).toMatchObject({ code: 'RPC_ENDPOINTS_EXHAUSTED', rpcUrls: ['https://primary.example', 'https://backup.example'] }); + expect(isChainRpcTransportError(thrown)).toBe(true); + // HOST-ONLY aggregate message: names hosts, never the full https:// URL. + expect(thrown.message).toContain('primary.example'); + expect(thrown.message).not.toContain('https://'); + expect(primary.read.calls).toHaveLength(1); + expect(backup.read.calls).toHaveLength(1); + }); + + it('single-RPC: a retryable failure still stamps RPC_ENDPOINTS_EXHAUSTED but keeps the original message verbatim', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://only.example' })); + const only = { read: recorder(async () => { throw new Error('connect ECONNREFUSED 127.0.0.1:8545'); }) }; + (a as any).providers = [only]; + + let thrown: any; + try { await readWithFailover(a, (p) => p.read()); } catch (e) { thrown = e; } + expect(thrown).toMatchObject({ code: 'RPC_ENDPOINTS_EXHAUSTED' }); + // No second endpoint → message stays byte-identical (no "all endpoints" aggregate). + expect(thrown.message).toBe('connect ECONNREFUSED 127.0.0.1:8545'); + expect(thrown.message).not.toContain('all configured RPC endpoints'); + expect(only.read.calls).toHaveLength(1); + }); + + it('does NOT fail over a deterministic non-retryable error (CALL_EXCEPTION) — throws it, backup untouched', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const err = callExceptionErr(); + const primary = { read: recorder(async () => { throw err; }) }; + const backup = { read: recorder(async () => 'should-not-be-reached') }; + (a as any).providers = [primary, backup]; + + await expect(readWithFailover(a, (p) => p.read())).rejects.toBe(err); + expect(backup.read.calls).toEqual([]); + }); + + it('logs each failover hop HOST-ONLY and bumps the process-wide counters', async () => { + const a = freshAdapter(minimalConfig({ + rpcUrl: 'https://primary.example/v1/SECRET-KEY', rpcUrls: ['https://backup.example'], + })); + const primary = { read: recorder(async () => { throw retryable429(); }) }; + const backup = { read: recorder(async () => 'ok') }; + (a as any).providers = [primary, backup]; + + const warnings: string[] = []; + const origWarn = console.warn; + console.warn = ((...args: unknown[]) => { warnings.push(String(args[0])); }) as typeof console.warn; + try { + await expect(readWithFailover(a, (p) => p.read())).resolves.toBe('ok'); + } finally { + console.warn = origWarn; + } + + const failoverLines = warnings.filter((w) => w.includes('RPC failover')); + expect(failoverLines.length).toBeGreaterThanOrEqual(1); + const joined = failoverLines.join('\n'); + expect(joined).toContain('primary.example'); + expect(joined).not.toContain('SECRET-KEY'); + expect(joined).not.toContain('://'); + + const stats = getRpcFailoverStats(); + expect(stats.failovers).toBeGreaterThanOrEqual(1); + expect(Object.keys(stats.byEndpointHost).every((h) => !h.includes('SECRET-KEY') && !h.includes('://'))).toBe(true); + }); +}); + +// Regression for the log-scan cap fix (adversarial-review find): the 4s +// point-read cap was aborting WIDE events.ts queryFilter/getLogs reads (9000-block +// poller ranges legitimately >4s) → threw RPC_ENDPOINTS_EXHAUSTED before the +// publisher poller advanced its cursor → permanent stall. Fix: +// `multiAttemptTimeoutMs` (caps MULTI-RPC attempts only) + RPC_LOG_SCAN_TIMEOUT_MS +// (30s) on the 9 events.ts reads; SINGLE-RPC stays uncapped (#894 — nothing to +// fail over to). Fake timers throughout — no real 5s/30s sleeps. +describe('readWithFailover — per-attempt cap (log-scan stall fix)', () => { + // A read whose promise resolves after `ms` of (fake) time. + const delayedRead = (ms: number, value: string) => + recorder(() => new Promise((resolve) => { setTimeout(() => resolve(value), ms); })); + + afterEach(() => { vi.useRealTimers(); }); + + it('MULTI-RPC: a wide read >4s but <30s COMPLETES on the primary with multiAttemptTimeoutMs (not aborted, no failover)', async () => { + vi.useFakeTimers(); + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const primary = { read: delayedRead(5_000, 'PRIMARY') }; + const backup = { read: recorder(async () => 'BACKUP') }; + (a as any).providers = [primary, backup]; + + const p = (a as any).readWithFailover('log scan', (pr: any) => pr.read(), { multiAttemptTimeoutMs: RPC_LOG_SCAN_TIMEOUT_MS }); + await vi.advanceTimersByTimeAsync(6_000); // past the 5s read, well under the 30s cap + expect(await p).toBe('PRIMARY'); + expect(primary.read.calls).toHaveLength(1); + expect(backup.read.calls).toEqual([]); // completed on the primary → backup never consulted + }); + + it('MULTI-RPC: the SAME wide read under the DEFAULT point-read cap aborts at ~4s and fails over (proves the cap matters)', async () => { + vi.useFakeTimers(); + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const primary = { read: delayedRead(5_000, 'PRIMARY') }; + const backup = { read: recorder(async () => 'BACKUP') }; + (a as any).providers = [primary, backup]; + + const p = (a as any).readWithFailover('point read', (pr: any) => pr.read()); // no opt → RPC_READ_STALL_TIMEOUT_MS (4s) + await vi.advanceTimersByTimeAsync(RPC_READ_STALL_TIMEOUT_MS + 1_500); // primary times out at 4s → fail over + expect(await p).toBe('BACKUP'); + expect(primary.read.calls).toHaveLength(1); + expect(backup.read.calls).toHaveLength(1); + }); + + it('SINGLE-RPC: multiAttemptTimeoutMs NEVER caps — a >30s healthy read still completes, no abort (#894)', async () => { + vi.useFakeTimers(); + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://only.example' })); // single endpoint + const only = { read: delayedRead(35_000, 'ONLY') }; + (a as any).providers = [only]; + + const p = (a as any).readWithFailover('log scan', (pr: any) => pr.read(), { multiAttemptTimeoutMs: RPC_LOG_SCAN_TIMEOUT_MS }); + await vi.advanceTimersByTimeAsync(36_000); // would exceed the 30s cap IF it applied to single-RPC + expect(await p).toBe('ONLY'); // uncapped → completes + expect(only.read.calls).toHaveLength(1); + }); + + it('precedence: attemptTimeoutMs caps EVEN single-RPC (fail-open funding reads) — an over-budget read aborts → exhausted', async () => { + vi.useFakeTimers(); + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://only.example' })); + const only = { read: delayedRead(5_000, 'ONLY') }; + (a as any).providers = [only]; + + const settled = (a as any).readWithFailover('funding', (pr: any) => pr.read(), { attemptTimeoutMs: 1_000 }) + .then((r: unknown) => r, (e: unknown) => e); + await vi.advanceTimersByTimeAsync(1_500); // exceeds the 1s hard cap → aborts; single → exhausted + const outcome: any = await settled; + expect(outcome.code).toBe('RPC_ENDPOINTS_EXHAUSTED'); + expect(only.read.calls).toHaveLength(1); + }); +}); + +// #2 review fix: contractReadWithFailover DEFAULTS its failover classifier to +// isContractViewRetryable = isRetryableRpcError MINUS BAD_DATA. A contract VIEW's +// BAD_DATA ("could not decode result data") is a DETERMINISTIC client-side decode, +// not an RPC outage — failing over would re-hit the same decode on every endpoint +// and mask it as RPC_ENDPOINTS_EXHAUSTED (the pre-PR FallbackProvider never failed +// over on a post-decode error). So BAD_DATA must surface DIRECTLY (no failover), +// while a real transient (429) still fails over; opts.isRetryable overrides it. +describe('contractReadWithFailover — per-provider rebinding + view classifier (B-2, #2/#3)', () => { + const badDataError = () => { + const e: any = new Error('could not decode result data (value="0x", code=BAD_DATA)'); + e.code = 'BAD_DATA'; + return e; + }; + // A contract whose `.connect(provider)` returns a PROVIDER-SPECIFIC view double + // (round-2 rebindContract = contract.connect(p), no fallback). This PROVES the + // failover loop rebinds to the BACKUP provider's contract — a regression that + // re-ran the primary-bound contract would call primaryView twice, never backupView. + const perProviderContract = (byProvider: (p: unknown) => { view: ReturnType }) => + ({ connect: (p: unknown) => byProvider(p) }) as any; + + it('a BAD_DATA view decode is NON-retryable → surfaces DIRECTLY, NO failover (backup-connected view never called)', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const p0 = {}; const p1 = {}; + (a as any).providers = [p0, p1]; + const bad = badDataError(); + const primaryView = recorder(async () => { throw bad; }); + const backupView = recorder(async () => 'BACKUP-RESULT'); + const contract = perProviderContract((p) => (p === p0 ? { view: primaryView } : { view: backupView })); + + await expect((a as any).contractReadWithFailover('someView', contract, (c: any) => c.view())) + .rejects.toBe(bad); // the ORIGINAL BAD_DATA, NOT a ChainRpcTransportError/RPC_ENDPOINTS_EXHAUSTED + expect(primaryView.calls).toHaveLength(1); // primary-connected view called once + expect(backupView.calls).toEqual([]); // deterministic → backup-connected view NEVER consulted + }); + + it('a real transient (429) IS retryable → REBINDS to and is served by the BACKUP provider\'s view', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const p0 = {}; const p1 = {}; + (a as any).providers = [p0, p1]; + const primaryView = recorder(async () => { const e: any = new Error('429 too many requests'); e.status = 429; throw e; }); + const backupView = recorder(async () => 'BACKUP-RESULT'); + const contract = perProviderContract((p) => (p === p0 ? { view: primaryView } : { view: backupView })); + + await expect((a as any).contractReadWithFailover('someView', contract, (c: any) => c.view())).resolves.toBe('BACKUP-RESULT'); + // B-2: the loop rebound to the BACKUP provider's contract — proven by the + // result coming from backupView, and primaryView hit exactly once (not twice). + expect(primaryView.calls).toHaveLength(1); + expect(backupView.calls).toHaveLength(1); + }); + + it('an explicit opts.isRetryable OVERRIDES the isContractViewRetryable default (BAD_DATA → rebinds to the BACKUP view)', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const p0 = {}; const p1 = {}; + (a as any).providers = [p0, p1]; + const primaryView = recorder(async () => { throw badDataError(); }); + const backupView = recorder(async () => 'BACKUP-RESULT'); + const contract = perProviderContract((p) => (p === p0 ? { view: primaryView } : { view: backupView })); + + await expect( + (a as any).contractReadWithFailover('someView', contract, (c: any) => c.view(), { isRetryable: () => true }), + ).resolves.toBe('BACKUP-RESULT'); + expect(primaryView.calls).toHaveLength(1); + expect(backupView.calls).toHaveLength(1); // default overridden → BAD_DATA failed over to the backup view + }); + + it('readWithFailover honours a custom opts.isRetryable that makes a normally-retryable 429 NON-retryable (surfaces it, no failover)', async () => { + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + const err429 = (() => { const e: any = new Error('429 too many requests'); e.status = 429; return e; })(); + const primary = { read: recorder(async () => { throw err429; }) }; + const backup = { read: recorder(async () => 'BACKUP') }; + (a as any).providers = [primary, backup]; + + // Custom classifier: nothing is retryable → the 429 surfaces directly, no failover. + await expect((a as any).readWithFailover('t', (p: any) => p.read(), { isRetryable: () => false })) + .rejects.toBe(err429); + expect(primary.read.calls).toHaveLength(1); + expect(backup.read.calls).toEqual([]); + }); +}); + +// B-5 (#894 guard): the CONSTRUCTOR wires the per-endpoint FetchRequest retry +// budget from the endpoint count — SINGLE-RPC keeps the bounded retry (its only +// resilience; nothing to fail over to), MULTI-RPC uses 0 (the explicit adapter +// failover advances on the first error). A regression to perEndpointRetries=0 +// for a single endpoint would surface RPC_ENDPOINTS_EXHAUSTED on the first 429. +// We inspect the constructed provider's real retryFunc directly (deterministic + +// fast) rather than driving ~7.5s of real loopback backoff — the behavioural +// real-429 path is already covered by evm-adapter.unit.test.ts's perpetual-429 +// block; this pins the wiring robustly. +describe('constructor RPC-retry budget wiring (#894 single-RPC vs multi-RPC, B-5)', () => { + afterEach(() => { vi.useRealTimers(); }); + const retryFuncOf = (a: EVMChainAdapter) => + (a.getProvider() as unknown as { + _getConnection: () => { retryFunc?: (r: unknown, x: unknown, n: number) => Promise }; + })._getConnection().retryFunc!; + + it('SINGLE-RPC: the one provider keeps the bounded retry budget (retries, NOT 0)', async () => { + const a = new EVMChainAdapter(minimalConfig({ rpcUrl: 'https://only.example' })); + try { + const retry = retryFuncOf(a); + expect(typeof retry).toBe('function'); + vi.useFakeTimers(); + const p0 = retry({}, {}, 0); await vi.advanceTimersByTimeAsync(2_000); expect(await p0).toBe(true); // retries + const p4 = retry({}, {}, 4); await vi.advanceTimersByTimeAsync(2_000); expect(await p4).toBe(true); // ...through the budget + expect(await retry({}, {}, 5)).toBe(false); // bounded → eventually RPC_ENDPOINTS_EXHAUSTED (#894) + } finally { + a.destroy(); + } + }); + + it('MULTI-RPC: each provider gives up at attempt 0 (retries=0) so the explicit failover advances at once', async () => { + const a = new EVMChainAdapter(minimalConfig({ rpcUrl: 'https://a.example', rpcUrls: ['https://b.example'] })); + try { + expect(await retryFuncOf(a)({}, {}, 0)).toBe(false); + } finally { + a.destroy(); + } + }); +}); + +// B-6: listenForEvents' wide eth_getLogs reads go through queryFilterWithFailover, +// which bakes in LOG_SCAN_OPTS (multiAttemptTimeoutMs = RPC_LOG_SCAN_TIMEOUT_MS, +// 30s) so a slow-but-healthy getLogs (a 9000-block poller range) isn't aborted by +// the 4s point-read cap. Exercises the REAL events.ts path (not readWithFailover +// directly) so dropping LOG_SCAN_OPTS from a branch would re-introduce the stall. +describe('listenForEvents — wide getLogs honours the 30s LOG_SCAN cap (B-6)', () => { + afterEach(() => { vi.useRealTimers(); }); + + it('a queryFilter that resolves at >4s but <30s COMPLETES (not aborted at the 4s point-read cap)', async () => { + vi.useFakeTimers(); + const a = freshAdapter(minimalConfig({ rpcUrl: 'https://primary.example', rpcUrls: ['https://backup.example'] })); + (a as any).initialized = true; // listenForEvents awaits init() first + (a as any).providers = [{}, {}]; // MULTI-RPC → the per-attempt cap applies + const log = { + topics: ['0x' + '00'.repeat(32)], data: '0x', blockNumber: 1, + transactionHash: '0x' + '11'.repeat(32), transactionIndex: 0, + }; + const parsed = { args: { batchId: 1n, publisher: '0x' + '22'.repeat(20), merkleRoot: '0x' + '33'.repeat(32), startKAId: 1n, endKAId: 1n } }; + // A wide getLogs that takes 5s (>4s point-read cap, <30s LOG_SCAN cap). + const queryFilter = recorder(() => new Promise((resolve) => { setTimeout(() => resolve([log]), 5_000); })); + const storage: any = { + connect: () => storage, // rebindContract(storage, p) = storage.connect(p) + filters: { KnowledgeBatchCreated: () => 'F' }, + interface: { parseLog: () => parsed }, + queryFilter, + }; + (a as any).contracts.knowledgeAssetsStorage = storage; + + const collected: Array<{ type: string }> = []; + const done = (async () => { + for await (const ev of a.listenForEvents({ eventTypes: ['KnowledgeBatchCreated'], fromBlock: 0 } as any)) { + collected.push(ev as { type: string }); + } + })(); + await vi.advanceTimersByTimeAsync(6_000); // past the 5s getLogs, under the 30s LOG_SCAN cap + await done; + + expect(queryFilter.calls).toHaveLength(1); // completed on the first endpoint, NOT aborted+failed-over + expect(collected).toHaveLength(1); + expect(collected[0].type).toBe('KnowledgeBatchCreated'); + }); +}); diff --git a/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts b/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts index cf7d13969..c1b5cd9a6 100644 --- a/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts +++ b/packages/chain/test/v10-update-ack-digest-parity.unit.test.ts @@ -16,6 +16,7 @@ import { describe, it, expect } from 'vitest'; import { ethers } from 'ethers'; import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; import { computeUpdateACKDigest } from '@origintrail-official/dkg-core'; +import { connectable } from './connectable.js'; const DEPLOYER_PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; const ADMIN_PK = '0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a'; @@ -46,16 +47,21 @@ function makeStubbedAdapter(opts: { }) { const a = new EVMChainAdapter(minimalConfig()); (a as any).initialized = true; - (a as any).provider = { - getNetwork: async () => ({ chainId: TEST_CHAIN_ID }), - }; - (a as any).contracts.knowledgeAssetsLifecycle = { + // R1: getEvmChainId reads chainId via readWithFailover over this.providers[0] + // (=== this.provider in prod). Set the mock on BOTH so the digest's chainId + // read resolves to the stub instead of dialling the placeholder RPC. + const provider = { getNetwork: async () => ({ chainId: TEST_CHAIN_ID }) }; + (a as any).provider = provider; + (a as any).providers = [provider]; + // The contract view reads go through contractReadWithFailover → rebindContract + // (contract.connect(p)), so the contract stubs must be .connect-able. + (a as any).contracts.knowledgeAssetsLifecycle = connectable({ getAddress: async () => KAV10_ADDRESS, - }; - (a as any).contracts.contextGraphStorage = { + }); + (a as any).contracts.contextGraphStorage = connectable({ kaToContextGraph: async () => opts.contextGraphId, - }; - (a as any).contracts.knowledgeAssetStorage = { + }); + (a as any).contracts.knowledgeAssetStorage = connectable({ getMerkleRoots: async () => new Array(Number(opts.preUpdateMerkleRootCount)).fill('0x00'), getTokenAmount: async () => opts.currentTokenAmount, // (preUpdateMerkleRootCount, minted, byteSize, endEpoch, tokenAmount, isImmutable, preUpdateMerkleLeafCount) @@ -68,7 +74,7 @@ function makeStubbedAdapter(opts: { false, 0n, ], - }; + }); return a; } diff --git a/packages/chain/test/write-tx-safety-invariants.unit.test.ts b/packages/chain/test/write-tx-safety-invariants.unit.test.ts new file mode 100644 index 000000000..7fe693937 --- /dev/null +++ b/packages/chain/test/write-tx-safety-invariants.unit.test.ts @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Write-path TX-SAFETY invariants (TxSafetyReviewer's contract for the + * immediate-failover change). The existing ~170 adapter failover tests inject + * bare-object providers that bypass the real broadcast/receipt path — a FALSE + * GREEN for tx-safety. These drive the REAL + * `dispatchSerializedV10Write → sendSignedTransactionAndWait → + * broadcastSignedTransactionWithFailover / waitForReceiptWithFailover` path + * (the seam where the S2 set-retry will live), stubbing ONLY the providers' + * `broadcastTransaction`/`getTransactionReceipt`, so the sign / WAL call-counts + * are observable. This is the evm-adapter-nonce-serialization seam MINUS its + * `sendSignedTransactionAndWait` mock, so the failover (and future set-retry) is + * actually exercised. + * + * NOW (current write path): INV-1 single-sign across a broadcast-failover sweep, + * INV-3 WAL-once + ordering, INV-4 broadcast idempotency, INV-5 + * reverted-receipt-no-resubmit + non-retryable-no-failover. + * + * STAGED (describe.skip → un-skip when ChainEngineer's S2 set-retry lands inside + * sendSignedTransactionAndWait): INV-1/2/3 across the set-retry MULTI-PASS, INV-6 + * nonce/lock under set-retry. STAGED for S3 breaker: INV-8. The set-retry-multi + * assertions (buildSignedTx==1 / onBroadcast==1 regardless of pass count) are the + * headline regressions — they have teeth only once the set-retry exists. + */ +import { describe, it, expect, vi } from 'vitest'; +import { ethers } from 'ethers'; +import { EVMChainAdapter, type EVMAdapterConfig } from '../src/evm-adapter.js'; +import { RPC_ENDPOINT_SET_RETRIES, RPC_ENDPOINT_SET_RETRY_BACKOFF_MS } from '../src/evm-adapter-constants.js'; + +const PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; +const HUB = '0x0000000000000000000000000000000000000001'; + +function recorder(impl: (...args: A) => R) { + const calls: A[] = []; + const fn = (...args: A): R => { calls.push(args); return impl(...args); }; + return Object.assign(fn, { calls }); +} + +function minimalConfig(overrides: Partial = {}): EVMAdapterConfig { + return { + rpcUrl: 'https://primary.example', + rpcUrls: ['https://backup.example'], + privateKey: PK, + hubAddress: HUB, + chainId: 'evm:31337', + allowNoAdminSigner: true, + ...overrides, + }; +} + +// The constructor builds REAL JsonRpcProviders over the (fake) URLs and wires +// error-listener network detection on them. We immediately destroy() those so +// no keep-alive socket / detection promise lingers ("Vite server not exiting" → +// intermittent CI exit 1), THEN the test overrides `this.providers` with stubs. +function freshAdapter(cfg: EVMAdapterConfig): EVMChainAdapter { + const a = new EVMChainAdapter(cfg); + try { a.destroy(); } catch { /* idempotent; never dialled */ } + return a; +} + +const SIGNED = '0xSIGNEDTX'; +const TXHASH = '0x' + '11'.repeat(32); +const fakeReceipt = (status: number) => + ({ hash: TXHASH, blockNumber: 1, index: 0, status, logs: [] }) as unknown as ethers.TransactionReceipt; +const retryable429 = () => { const e = new Error('429 too many requests'); (e as any).status = 429; return e; }; +const neverNull = (pre: string): never => { throw new Error(`unexpected null receipt for ${pre}`); }; + +// Drive the REAL dispatch → send → broadcast/receipt path. `buildSignedTx` and +// `onBroadcast` are spied; only the providers are stubbed. +async function runDispatch(a: EVMChainAdapter, opts: { + onBroadcast?: (info: { txHash: string }) => Promise | void; + buildSignedTx?: () => Promise<{ signedTx: string; txHash: string }>; +}) { + const signer = new ethers.Wallet(PK); + const buildSignedTx = opts.buildSignedTx ?? (async () => ({ signedTx: SIGNED, txHash: TXHASH })); + return (a as any).dispatchSerializedV10Write(signer, 'publish', opts.onBroadcast, buildSignedTx, neverNull); +} + +describe('write-path tx-safety invariants (real dispatch/broadcast/receipt path)', () => { + it('INV-1: signs EXACTLY once across a broadcast-failover sweep (#1 429 → #2 accepts)', async () => { + const a = freshAdapter(minimalConfig()); + const buildSignedTx = recorder(async () => ({ signedTx: SIGNED, txHash: TXHASH })); + const onBroadcast = recorder(async () => undefined); + const primary = { + broadcastTransaction: recorder(async () => { throw retryable429(); }), + getTransactionReceipt: recorder(async () => null), + }; + const backup = { + broadcastTransaction: recorder(async () => ({ hash: TXHASH })), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + (a as any).providers = [primary, backup]; + + const receipt = await runDispatch(a, { onBroadcast, buildSignedTx }); + expect(receipt.status).toBe(1); + // The signed tx is built ONCE even though broadcast failed over to #2. + expect(buildSignedTx.calls).toHaveLength(1); + expect(onBroadcast.calls).toHaveLength(1); + // The SAME signed raw tx was handed to both endpoints (no re-sign). + expect(primary.broadcastTransaction.calls).toEqual([[SIGNED]]); + expect(backup.broadcastTransaction.calls).toEqual([[SIGNED]]); + }); + + it('INV-3: WAL onBroadcast fires exactly once and STRICTLY before any broadcast', async () => { + const a = freshAdapter(minimalConfig({ rpcUrls: [] })); // single endpoint, happy path + const timeline: string[] = []; + const onBroadcast = recorder(async () => { timeline.push('wal'); }); + const only = { + broadcastTransaction: recorder(async () => { timeline.push('broadcast'); return { hash: TXHASH }; }), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + (a as any).providers = [only]; + + await runDispatch(a, { onBroadcast }); + expect(onBroadcast.calls).toHaveLength(1); + expect(timeline[0]).toBe('wal'); + expect(timeline.indexOf('wal')).toBeLessThan(timeline.indexOf('broadcast')); + }); + + it('INV-4: broadcast idempotency — #1 transient error → #2 "already known" → success, single signed tx', async () => { + const a = freshAdapter(minimalConfig()); + const buildSignedTx = recorder(async () => ({ signedTx: SIGNED, txHash: TXHASH })); + const primary = { + broadcastTransaction: recorder(async () => { throw retryable429(); }), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + const backup = { + broadcastTransaction: recorder(async () => { throw new Error('already known'); }), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + (a as any).providers = [primary, backup]; + + const receipt = await runDispatch(a, { buildSignedTx }); + expect(receipt.status).toBe(1); + // "already known" on #2 is treated as accepted — no error surfaced. + // Exactly one signed tx; the SAME raw tx hit both endpoints (no 2nd distinct tx). + expect(buildSignedTx.calls).toHaveLength(1); + expect(primary.broadcastTransaction.calls).toEqual([[SIGNED]]); + expect(backup.broadcastTransaction.calls).toEqual([[SIGNED]]); + }); + + it('INV-5a: a mined REVERTED receipt (status=0) throws CALL_EXCEPTION with NO resubmit', async () => { + const a = freshAdapter(minimalConfig({ rpcUrls: [] })); + const only = { + broadcastTransaction: recorder(async () => ({ hash: TXHASH })), + getTransactionReceipt: recorder(async () => fakeReceipt(0)), // mined, reverted + }; + (a as any).providers = [only]; + + await expect(runDispatch(a, {})).rejects.toMatchObject({ code: 'CALL_EXCEPTION' }); + // Broadcast happened once; the revert must NOT trigger a resubmit. + expect(only.broadcastTransaction.calls).toHaveLength(1); + }); + + it('INV-5b: a non-retryable build/sign error throws BEFORE any broadcast or WAL checkpoint', async () => { + const a = freshAdapter(minimalConfig()); + const onBroadcast = recorder(async () => undefined); + const buildSignedTx = recorder(async () => { + const e = new Error('insufficient funds for gas'); (e as any).code = 'INSUFFICIENT_FUNDS'; throw e; + }); + const primary = { + broadcastTransaction: recorder(async () => ({ hash: TXHASH })), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + (a as any).providers = [primary]; + + await expect(runDispatch(a, { onBroadcast, buildSignedTx })).rejects.toThrow(/insufficient funds/i); + // Fails closed: no WAL checkpoint, no broadcast. + expect(onBroadcast.calls).toEqual([]); + expect(primary.broadcastTransaction.calls).toEqual([]); + }); +}); + +// S2 set-retry lives INSIDE sendSignedTransactionAndWait (base:1073), wrapping +// broadcastSignedTransactionWithFailover ONLY, up to RPC_ENDPOINT_SET_RETRIES +// extra full passes with a fixed RPC_ENDPOINT_SET_RETRY_BACKOFF_MS sleep between +// (a non-injectable const → fake timers). It re-broadcasts the SAME +// {signedTx,txHash}; the seam is signer-free so re-sign is structurally +// impossible (INV-2). These are the headline tx-safety regressions. +describe('write-path tx-safety invariants — set-retry MULTI-PASS (S2)', () => { + // Spy broadcastSignedTransactionWithFailover (the PASS-count seam, per TxSafety: + // raw provider.broadcastTransaction is called providers.length× PER pass) while + // still running the real per-provider broadcast loop underneath. + function spyBroadcastPasses(a: EVMChainAdapter) { + const orig = (a as any).broadcastSignedTransactionWithFailover.bind(a); + const spy = recorder((...args: unknown[]) => orig(...args)); + (a as any).broadcastSignedTransactionWithFailover = spy; + return spy; + } + + it('INV-1/2/3 + C3: all-exhaust set-retry — signs once / WAL once / re-broadcasts the SAME tx for setRetries+1 passes', async () => { + vi.useFakeTimers(); + try { + const a = freshAdapter(minimalConfig()); + const buildSignedTx = recorder(async () => ({ signedTx: SIGNED, txHash: TXHASH })); + const onBroadcast = recorder(async () => undefined); + const throttled = () => ({ + broadcastTransaction: recorder(async (_raw: string) => { throw retryable429(); }), + getTransactionReceipt: recorder(async () => null), + }); + const p0 = throttled(); const p1 = throttled(); + (a as any).providers = [p0, p1]; + const broadcastPasses = spyBroadcastPasses(a); + + const settled = runDispatch(a, { onBroadcast, buildSignedTx }).then(() => 'ok', (e) => e); + await vi.advanceTimersByTimeAsync((RPC_ENDPOINT_SET_RETRIES + 1) * RPC_ENDPOINT_SET_RETRY_BACKOFF_MS + 50); + const outcome: any = await settled; + + expect(outcome.code).toBe('RPC_ENDPOINTS_EXHAUSTED'); + // PASS count = setRetries + 1 (one initial pass + the bounded retries). + expect(broadcastPasses.calls).toHaveLength(RPC_ENDPOINT_SET_RETRIES + 1); + expect(buildSignedTx.calls).toHaveLength(1); // INV-1/2: signed once across all passes + expect(onBroadcast.calls).toHaveLength(1); // INV-3: WAL fired once, never re-fired + // C3 (idempotency root): every broadcast got the byte-identical SIGNED tx. + const allRaw = [...p0.broadcastTransaction.calls, ...p1.broadcastTransaction.calls]; + expect(allRaw.length).toBeGreaterThan(1); + expect(allRaw.every((c) => c[0] === SIGNED)).toBe(true); + } finally { + vi.useRealTimers(); + } + }); + + it('INV-1/2: a later set-retry pass that succeeds yields the receipt with buildSignedTx still == 1', async () => { + vi.useFakeTimers(); + try { + const a = freshAdapter(minimalConfig()); + const buildSignedTx = recorder(async () => ({ signedTx: SIGNED, txHash: TXHASH })); + const onBroadcast = recorder(async () => undefined); + let attempt = 0; // shared across both providers: pass 0 = first 2 attempts (429), pass 1 accepts + const provider = () => ({ + broadcastTransaction: recorder(async (_raw: string) => { attempt += 1; if (attempt <= 2) throw retryable429(); return { hash: TXHASH }; }), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }); + (a as any).providers = [provider(), provider()]; + const broadcastPasses = spyBroadcastPasses(a); + + const settled = runDispatch(a, { onBroadcast, buildSignedTx }).then((r) => r, (e) => e); + await vi.advanceTimersByTimeAsync((RPC_ENDPOINT_SET_RETRIES + 1) * RPC_ENDPOINT_SET_RETRY_BACKOFF_MS + 50); + const receipt: any = await settled; + + expect(receipt.status).toBe(1); + expect(broadcastPasses.calls).toHaveLength(2); // pass 0 (exhaust) + pass 1 (accept) + expect(buildSignedTx.calls).toHaveLength(1); + expect(onBroadcast.calls).toHaveLength(1); + } finally { + vi.useRealTimers(); + } + }); + + it('INV-5/C2: a mined REVERTED receipt does NOT trigger a set-retry re-broadcast (broadcast pass == 1)', async () => { + const a = freshAdapter(minimalConfig({ rpcUrls: [] })); + const buildSignedTx = recorder(async () => ({ signedTx: SIGNED, txHash: TXHASH })); + const only = { + broadcastTransaction: recorder(async () => ({ hash: TXHASH })), // broadcast SUCCEEDS + getTransactionReceipt: recorder(async () => fakeReceipt(0)), // but mined REVERTED + }; + (a as any).providers = [only]; + const broadcastPasses = spyBroadcastPasses(a); + + await expect(runDispatch(a, { buildSignedTx })).rejects.toMatchObject({ code: 'CALL_EXCEPTION' }); + // The set-retry wraps the BROADCAST phase only; the reverted receipt comes + // from waitForReceiptWithFailover OUTSIDE the loop → broadcast fired ONCE, + // no resubmit of a tx that already executed-and-reverted on chain. + expect(broadcastPasses.calls).toHaveLength(1); + expect(only.broadcastTransaction.calls).toHaveLength(1); + expect(buildSignedTx.calls).toHaveLength(1); + }); + + it('INV-6: the per-wallet lock is HELD across set-retry passes — a concurrent same-wallet write cannot sign until the first resolves', async () => { + vi.useFakeTimers(); + try { + const a = freshAdapter(minimalConfig({ rpcUrls: [] })); + const signer = new ethers.Wallet(PK); + const events: string[] = []; + let attempt = 0; + const provider = { + // Write-1: 429 on its first broadcast (forces a set-retry backoff), accepts next. + broadcastTransaction: recorder(async () => { attempt += 1; if (attempt === 1) throw retryable429(); return { hash: TXHASH }; }), + getTransactionReceipt: recorder(async () => fakeReceipt(1)), + }; + (a as any).providers = [provider]; + const w1build = recorder(async () => { events.push('w1:build'); return { signedTx: SIGNED, txHash: TXHASH }; }); + const w2build = recorder(async () => { events.push('w2:build'); return { signedTx: '0xW2', txHash: '0x' + '22'.repeat(32) }; }); + + const w1 = (a as any).dispatchSerializedV10Write(signer, 'publish', undefined, w1build, neverNull); + const w2 = (a as any).dispatchSerializedV10Write(signer, 'publish', undefined, w2build, neverNull); + + // Park write-1 mid-backoff: it has built + 429'd + is sleeping. Write-2 must + // NOT have started building (the per-wallet lock is held across the set-retry). + await vi.advanceTimersByTimeAsync(RPC_ENDPOINT_SET_RETRY_BACKOFF_MS / 2); + expect(w1build.calls).toHaveLength(1); + expect(w2build.calls).toHaveLength(0); + + await vi.advanceTimersByTimeAsync(RPC_ENDPOINT_SET_RETRY_BACKOFF_MS + 50); + await Promise.all([w1, w2]); + // Strict end-to-end serialization survives the set-retry. + expect(events).toEqual(['w1:build', 'w2:build']); + } finally { + vi.useRealTimers(); + } + }); +}); + +describe.skip('write-path tx-safety invariants — circuit breaker (S3, un-skip when breaker lands)', () => { + it('INV-8: with all hosts cooling-down, a WRITE still broadcasts to ≥1 endpoint (no zero-attempt exhaustion)', async () => { + // TODO(S3): mark every host cooling-down in the process-wide breaker, then a + // write must still attempt ≥1 broadcast (eligible-set never empties to a + // zero-attempt RPC_ENDPOINTS_EXHAUSTED). + expect(true).toBe(true); + }); +});