Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion packages/agent/src/dkg-agent-cg-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,21 @@ export class ContextGraphRegistryMethods extends DKGAgentBase {
* unavailable, contract not deployed, transient errors) are logged
* and the field is left undefined.
*/
async getContextGraphOnChainPolicy(this: DKGAgent, contextGraphId: string): Promise<{
async getContextGraphOnChainPolicy(this: DKGAgent, contextGraphId: string, options?: {
/**
* Force a fresh chain RPC for `publishPolicy`, ignoring the (≤60s-TTL)
* cache. `publishPolicy` is mutable on-chain (`PublishPolicyUpdated`) and the
* agent has no event watcher, so the cache can be stale-PERMISSIVE for up to
* the TTL after an owner downgrades open→curated publish. Most callers (e.g.
* the import-artifact owner guard) tolerate that window, but a SECURITY-
* POSITIVE admission decision (host-mode self-signed plaintext ingest, see
* isConfirmedPublicForHostMode) must NOT — a stale `publishPolicy=1` there
* would admit a non-authorized write. With this set, the publishPolicy cache
* is treated as always-stale so the chain RPC re-verifies; an RPC
* failure/timeout leaves it undefined → the caller fails closed.
*/
forcePublishPolicyChainRead?: boolean;
}): Promise<{
accessPolicy?: number;
publishPolicy?: number;
}> {
Expand All @@ -490,6 +504,9 @@ export class ContextGraphRegistryMethods extends DKGAgentBase {
// escalate privilege (gossip decrypt is gated by sender-key
// issuance) and stale-restrictive only causes a transient deny.
const isPublishPolicyCacheFresh = (key: string): boolean => {
// A security-positive caller can force a fresh chain re-verify (the cache
// may be stale-permissive for up to the TTL after an open→curated downgrade).
if (options?.forcePublishPolicyChainRead) return false;
const fetchedAt = this.onChainPublishPolicyCacheUpdatedAt.get(key);
if (fetchedAt === undefined) return false;
return Date.now() - fetchedAt <= ON_CHAIN_PUBLISH_POLICY_CACHE_TTL_MS;
Expand Down
8 changes: 8 additions & 0 deletions packages/agent/src/dkg-agent-crypto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,14 @@ export class WorkspaceCryptoMethods extends DKGAgentBase {
let fallback: (AgentKeyRecord & { privateKey: string }) | null = null;
for (const record of this.localAgents.values()) {
if (!record.privateKey) continue;
// GH #787 — a node-level key record can carry a privateKey but no (or an
// invalid) agentAddress (an operational identity, not an agent). Such a
// record is NOT a usable gossip signer: encodeWorkspaceGossipMessage emits
// `agentAddress` into the envelope and the downstream host-mode authority
// check rejects a missing/invalid one. Skip it entirely — that both avoids
// the original `toLowerCase()`-of-undefined crash (HTTP 500 on SWM write)
// AND prevents it becoming a fallback that emits an unverifiable envelope.
if (!record.agentAddress || !ethers.isAddress(record.agentAddress)) continue;
const signingRecord = { ...record, privateKey: record.privateKey };
if (defaultAddress && record.agentAddress.toLowerCase() === defaultAddress) {
return signingRecord;
Expand Down
91 changes: 66 additions & 25 deletions packages/agent/src/dkg-agent-swm-host.ts
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,46 @@ export class SwmHostModeMethods extends DKGAgentBase {
}
}

/**
* GH #1124 — DEFINITIVE "fully-open CG" check gating the self-signed public
* host-mode ingest path. "Open" requires BOTH axes, because this codebase
* separates READ visibility from WRITE authority:
* - accessPolicy === 0 → publicly READABLE (SWM is plaintext), AND
* - publishPolicy === 1 → OPEN PUBLISH (anyone may write).
* A public-readable but curated-publish CG (accessPolicy 0, publishPolicy 0 /
* PCA) still restricts WHO may publish, so the self-signed path must NOT apply
* — otherwise any key could store plaintext SWM on host-mode cores and bypass
* the on-chain publisher authorization (otReviewAgent #1239-r3). Curated OR
* unknown on EITHER axis → false: the conservative ciphertext + allowlist gates
* stay in force and a chain-event race heals via member catchup, so a curated
* (or restricted-publish) CG is never misclassified as self-publishable.
*/
async isConfirmedPublicForHostMode(this: DKGAgent, contextGraphId: string): Promise<boolean> {
// Resolve via the SHARED on-chain policy resolver rather than a direct
// cleartext `subscribedContextGraphs` lookup. `getContextGraphOnChainPolicy`
// re-keys cleartext↔on-chain-id, consults the cache + local `_meta`, AND
// falls back to a direct chain RPC — so it resolves BOTH policies even for a
// host-only core keyed by the wire HASH with no local `_meta` (the #1124
// sharded topology). Both must positively resolve to their open value; any
// undefined (unknown) → false (safe).
try {
// `forcePublishPolicyChainRead`: publishPolicy is mutable on-chain and the
// cache is only ≤60s-TTL'd, so it could be stale-PERMISSIVE for up to the
// TTL after an owner downgrades open→curated publish. This is a
// security-positive gate (it admits a self-signed plaintext write that host
// catchup later applies under trustedReplay), so it must re-verify the
// publishPolicy on-chain rather than trust the cached value. An RPC
// failure/timeout leaves publishPolicy undefined → we fail CLOSED (drop;
// the share heals via retry/catchup once the policy re-resolves).
const { accessPolicy, publishPolicy } = await this.getContextGraphOnChainPolicy(
contextGraphId, { forcePublishPolicyChainRead: true },
);
return accessPolicy === 0 && publishPolicy === 1;
} catch {
return false;
}
}

/**
* Register the host-mode gossip handler for `contextGraphId` and
* track its reference so {@link unwireSwmHostModeHandler} can
Expand Down Expand Up @@ -1009,33 +1049,35 @@ export class SwmHostModeMethods extends DKGAgentBase {
isCiphertext = skm.type === SWM_SENDER_KEY_MESSAGE_TYPE;
} catch { /* fall through */ }
}
if (!isCiphertext) return;

// Authority check: verify the envelope signature against the
// curated CG's agent allowlist. Without this, a topic-reachable
// peer can fill per-CG storage with valid-looking ciphertext
// and evict legitimate history.
// GH #1124 — a curated CG MUST carry ciphertext, so a non-ciphertext
// envelope there is garbage → drop early. A CONFIRMED-public (open) CG
// legitimately gossips PLAINTEXT SWM. Resolve the public flag ONCE
// (key-independent — see isConfirmedPublicForHostMode) and reuse it for both
// the plaintext gate and the authority check. UNKNOWN CGs stay on the drop
// path (safe; member catchup heals once the policy resolves).
const confirmedPublic = await this.isConfirmedPublicForHostMode(storageCgId);
if (!isCiphertext && !confirmedPublic) return;

// Authority check. Curated traffic verifies the envelope signature against
// the CG's agent allowlist. A confirmed-public CG has no allowlist, so pass
// `allowSelfSignedForPublicCg`: the SHARED verifier then validates signature
// + timestamp-freshness (the replay/eviction guard) AND binds the inner
// request to THIS CG — same envelope validation as curated, only the
// allowlist decision diverges (see SharedMemoryHandler.verifyHostModeEnvelopeAuthority).
//
// Use `storageCgId` (cleartext from the envelope) so the
// member-side meta-graph + chain-fallback resolvers in
// `verifyHostModeEnvelopeAuthority` work on the canonical id
// shape. The hash subscription key is internal bookkeeping;
// never crosses an external authorization boundary.
// Use `storageCgId` (cleartext from the envelope) so the meta-graph +
// chain-fallback resolvers work on the canonical id shape.
const handler = this.getOrCreateSharedMemoryHandler();
const verdict = await handler.verifyHostModeEnvelopeAuthority(data, storageCgId, fromPeerId);
const verdict = await handler.verifyHostModeEnvelopeAuthority(
data, storageCgId, fromPeerId, { allowSelfSignedForPublicCg: confirmedPublic },
);
if (!verdict.accepted) {
// "no agent allowlist" is the expected outcome during the brief
// chain-event race window (cores see the beacon, auto-engage
// host-mode, then receive ciphertext BEFORE the
// `ContextGraphCreated` event lands AND before the curator
// beacon arrived). The beaconCuratorOracle fallback closes
// most of that window; the remaining race (envelope arrives
// before the beacon is received & verified) is recoverable
// via member catchup and should not spam WARN logs in steady-
// state operation. Other rejection reasons (sig mismatch, peer
// not in allowlist, decode failure) remain WARN — those are
// real authority failures that operators need to see.
const isTransientRace = verdict.reason === 'no agent allowlist on context graph';
// 'no agent allowlist' on a NON-public CG is the expected brief chain-event
// race (curated allowlist not loaded yet) — recoverable via member catchup,
// so log at debug. Every other rejection (decode / unsigned / signature-or-
// freshness / peer-not-allowed / CG-mismatch) is a real authority failure
// operators should see.
const isTransientRace = verdict.reasonCode === 'NO_AGENT_ALLOWLIST';
if (isTransientRace) {
this.log.debug(
ctx,
Expand Down Expand Up @@ -1112,7 +1154,6 @@ export class SwmHostModeMethods extends DKGAgentBase {
}
}
}

const seqno = await this.swmHostModeStore.append(storageCgId, data);
Comment thread
Bojan131 marked this conversation as resolved.
this.log.debug(
ctx,
Expand Down
34 changes: 34 additions & 0 deletions packages/agent/test/dkg-agent-on-chain-policy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,40 @@ describe('DKGAgent.getContextGraphOnChainPolicy', () => {
expect(Date.now() - newTs).toBeLessThan(1_000);
});

// Branimir review on #1239 — the host-mode self-signed admission gate
// (`isConfirmedPublicForHostMode`) is the first SECURITY-POSITIVE consumer of
// the cached `publishPolicy`, so it must NOT tolerate the ≤60s stale-permissive
// window: a host with a fresh `publishPolicy=1` entry would otherwise admit a
// self-signed plaintext write for up to the TTL after an owner downgrades
// open→curated. `forcePublishPolicyChainRead` treats the cache as always-stale
// so the chain RPC re-verifies. Same state, two answers: cached `1` without the
// flag, fresh-from-chain `0` with it.
it('forcePublishPolicyChainRead bypasses a FRESH publishPolicy cache and re-verifies on-chain (Branimir #1239)', async () => {
const getContextGraphPublishPolicy = recorder(async () => ({
publishPolicy: 0, // the owner just flipped open → curated
publishAuthority: '0x0000000000000000000000000000000000000000',
}));
const stub = makeStub({
subscribedContextGraphs: new Map([['cg-force', { onChainId: '88' }]]),
// FRESH cache says "1" (open) — seeded just now, well within the TTL.
onChainPublishPolicyCache: new Map([['cg-force', 1]]),
onChainPublishPolicyCacheUpdatedAt: new Map([['cg-force', Date.now()]]),
onChainAccessPolicyCache: new Map([['cg-force', 0]]),
isContextGraphRegistered: recorder(async () => true),
chain: { getContextGraphPublishPolicy },
});
// Default (no flag): the fresh cache hit returns the stale-permissive "1".
expect(await callPolicy(stub, 'cg-force')).toEqual({ accessPolicy: 0, publishPolicy: 1 });
expect(getContextGraphPublishPolicy.calls).toEqual([]); // no RPC — cache hit
// Forced: the cache is ignored, the chain RPC re-verifies → "0" (curated),
// so the admission gate sees the real (downgraded) policy and fails closed.
const forced = await (DKGAgent.prototype as any).getContextGraphOnChainPolicy.call(
stub, 'cg-force', { forcePublishPolicyChainRead: true },
);
expect(forced).toEqual({ accessPolicy: 0, publishPolicy: 0 });
expect(getContextGraphPublishPolicy.calls.at(-1)).toEqual([88n]);
});

// Round 3 — degenerate state: registered locally but the on-chain
// id resolution fails (e.g. corrupted ontology graph). We can't
// RPC without a numeric id, so we return whatever local triples
Expand Down
77 changes: 77 additions & 0 deletions packages/agent/test/gossip-signer-selection-787.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/**
* GH #787 (regression) — `getWorkspaceGossipSigningAgent` must skip a local key
* record that has a privateKey but NO valid `agentAddress` (a node-level
* operational identity, not an agent). Such a record can't be a usable gossip
* signer: `encodeWorkspaceGossipMessage` emits `agentAddress` into the envelope
* and the host-mode authority check rejects a missing one.
*
* The #306/#787 daemon test exercises only the HTTP quad-shape validation, which
* now short-circuits at the route boundary BEFORE the signer is selected — so it
* would NOT catch a revert of this guard. This test drives the signer selection
* directly: a keyless-agent record placed AHEAD of a valid signer must be
* skipped (no `toLowerCase()`-of-undefined crash, and not chosen as fallback).
*/
import { afterEach, describe, expect, it } from 'vitest';
import { ethers } from 'ethers';
import { MockChainAdapter } from '@origintrail-official/dkg-chain';
import { DKGAgent, agentFromPrivateKey, type AgentKeyRecord } from '../src/index.js';

interface Internals {
localAgents: Map<string, AgentKeyRecord>;
defaultAgentAddress?: string;
getWorkspaceGossipSigningAgent(): (AgentKeyRecord & { privateKey: string }) | null;
encodeWorkspaceGossipMessage(cg: string, msg: Uint8Array): Promise<Uint8Array>;
}

function keylessAgentRecord(label: string): AgentKeyRecord {
const rec = agentFromPrivateKey(ethers.Wallet.createRandom().privateKey, label);
// A node-level operational key: has a privateKey but no agent identity.
delete (rec as { agentAddress?: string }).agentAddress;
return rec;
}

describe('GH #787 — gossip signer selection skips keyless-agent records', () => {
let agent: DKGAgent | null = null;
afterEach(async () => { if (agent) { await agent.stop().catch(() => {}); agent = null; } });

it('keyless record placed FIRST + default match present → returns the valid signer (no throw)', async () => {
agent = await DKGAgent.create({ name: 'Signer787A', chainAdapter: new MockChainAdapter() });
const g = agent as unknown as Internals;
g.localAgents.clear();
const keyless = keylessAgentRecord('node-op');
const valid = agentFromPrivateKey(ethers.Wallet.createRandom().privateKey, 'agent');
g.localAgents.set('node-op-key', keyless); // FIRST — pre-fix this crashed on `.toLowerCase()` of undefined
g.localAgents.set(valid.agentAddress, valid);
g.defaultAgentAddress = valid.agentAddress;

const signer = g.getWorkspaceGossipSigningAgent();
expect(signer).not.toBeNull();
expect(signer!.agentAddress).toBe(valid.agentAddress);
// And signing actually works end to end (a real signed envelope, not a crash
// or the raw-payload passthrough that happens with no usable signer).
const env = await g.encodeWorkspaceGossipMessage('cg-787', new TextEncoder().encode('payload'));
expect(env.length).toBeGreaterThan(64);
});

it('keyless record FIRST + NO default match → falls back to the valid signer (skips the keyless one)', async () => {
agent = await DKGAgent.create({ name: 'Signer787B', chainAdapter: new MockChainAdapter() });
const g = agent as unknown as Internals;
g.localAgents.clear();
g.localAgents.set('node-op-key', keylessAgentRecord('node-op'));
const valid = agentFromPrivateKey(ethers.Wallet.createRandom().privateKey, 'agent');
g.localAgents.set(valid.agentAddress, valid);
g.defaultAgentAddress = undefined; // no default → exercise fallback selection

const signer = g.getWorkspaceGossipSigningAgent();
expect(signer?.agentAddress).toBe(valid.agentAddress);
});

it('ONLY keyless-agent records → no usable signer (null, no throw)', async () => {
agent = await DKGAgent.create({ name: 'Signer787C', chainAdapter: new MockChainAdapter() });
const g = agent as unknown as Internals;
g.localAgents.clear();
g.localAgents.set('k1', keylessAgentRecord('k1'));
g.defaultAgentAddress = undefined;
expect(g.getWorkspaceGossipSigningAgent()).toBeNull();
});
});
Loading
Loading