Skip to content
12 changes: 9 additions & 3 deletions packages/agent/src/dkg-agent-publish.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ import {
resolveWorkspaceAgentRecipients,
computeTripleHashV10 as computeTripleHash, computeFlatKCRootV10 as computeFlatKCRoot, skolemizeByEntity, isReservedSubject,
canonicalPublishPayload,
preparePublicWriteQuads,
generatedPrivateCatalogTripleKeys,
resolveLiftWorkspaceSlice,
validateLiftPublishPayload,
Expand Down Expand Up @@ -426,6 +427,10 @@ function rejectOversizedRdfLiterals(quads: Quad[] | undefined, label: string): v
assertQuadLiteralsMutf8Safe(quads, { label });
}

function normalizePublicRdfLiterals(quads: Quad[], label: string): Quad[] {
Comment thread
Jurij89 marked this conversation as resolved.
return preparePublicWriteQuads(quads, { label }).quads;
}

export class PublishMethods extends DKGAgentBase {
async publishWorkspaceGossip(this: DKGAgent,
contextGraphId: string,
Expand Down Expand Up @@ -995,7 +1000,7 @@ export class PublishMethods extends DKGAgentBase {
if (publicQuads.length === 0 && privateQuads.length === 0) {
throw new InvalidContentError('Content must include at least one public or private payload');
}
rejectOversizedRdfLiterals(publicQuads, 'publishAsync.publicQuads');
publicQuads = normalizePublicRdfLiterals(publicQuads, 'publishAsync.publicQuads');
rejectOversizedRdfLiterals(privateQuads, 'publishAsync.privateQuads');

const partitioned = partitionPublishAsyncQuads(publicQuads, privateQuads);
Expand Down Expand Up @@ -1292,8 +1297,8 @@ export class PublishMethods extends DKGAgentBase {
): Promise<PublishResult> {
const ctx = opts?.operationCtx ?? createOperationContext('publish');
const onPhase = opts?.onPhase;
quads = normalizePublicRdfLiterals(quads, 'agent.publish.quads');
this.log.info(ctx, `Starting publish to context graph "${contextGraphId}" with ${quads.length} triples`);
rejectOversizedRdfLiterals(quads, 'agent.publish.quads');
rejectOversizedRdfLiterals(privateQuads, 'agent.publish.privateQuads');

const isSystem = contextGraphId === SYSTEM_CONTEXT_GRAPHS.AGENTS || contextGraphId === SYSTEM_CONTEXT_GRAPHS.ONTOLOGY;
Expand Down Expand Up @@ -1528,8 +1533,8 @@ export class PublishMethods extends DKGAgentBase {
): Promise<PublishResult> {
const ctx = opts?.operationCtx ?? createOperationContext('update');
const onPhase = opts?.onPhase;
quads = normalizePublicRdfLiterals(quads, 'agent.update.quads');
this.log.info(ctx, `Starting update of kaId=${kaId} in context graph "${contextGraphId}" with ${quads.length} triples`);
rejectOversizedRdfLiterals(quads, 'agent.update.quads');
rejectOversizedRdfLiterals(privateQuads, 'agent.update.privateQuads');
// GH #842: thread the on-chain cgId so the publisher can promote the update
// payload into the per-cgId partition the RS prover reads. Without it,
Expand Down Expand Up @@ -2640,6 +2645,7 @@ export class PublishMethods extends DKGAgentBase {
privateQuads?: Quad[];
},
): Promise<PublishOptions['precomputedAttestation']> {
quads = normalizePublicRdfLiterals(quads, '_buildPrecomputedAttestationForSelection.quads');
Comment thread
Jurij89 marked this conversation as resolved.
if (
opts?.authorAgentAddress != null &&
opts?.preSignedAuthorAttestation != null
Expand Down
161 changes: 157 additions & 4 deletions packages/agent/test/publish-literal-size.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { describe, expect, it, vi } from 'vitest';
import {
DKG_CHUNK_VALUE,
DKG_HAS_TEXT_BODY,
} from '@origintrail-official/dkg-core';
import { canonicalPublishPayload, preparePublicWriteQuads } from '@origintrail-official/dkg-publisher';
import { PublishMethods } from '../src/dkg-agent-publish.js';
import type { Quad } from '@origintrail-official/dkg-storage';
import { OxigraphStore, type Quad } from '@origintrail-official/dkg-storage';

const OVERSIZED_TEXT_QUAD: Quad = {
subject: 'http://example.org/root',
Expand All @@ -9,6 +14,26 @@ const OVERSIZED_TEXT_QUAD: Quad = {
graph: 'http://example.org/graph',
};

const OVERSIZED_NAME_QUAD: Quad = {
...OVERSIZED_TEXT_QUAD,
predicate: 'http://schema.org/name',
};

const LINKED_BLANK_OVERSIZED_TEXT_QUADS: Quad[] = [
{
subject: 'http://example.org/root',
predicate: 'http://schema.org/hasPart',
object: '_:body',
graph: 'http://example.org/graph',
},
{
subject: '_:body',
predicate: 'http://schema.org/text',
object: `"${'x'.repeat(60_000)}"`,
graph: 'http://example.org/graph',
},
];

describe('agent publish literal size validation', () => {
it('rejects publishAsync private quads before workspace staging', async () => {
const agentStub = {
Expand All @@ -32,7 +57,48 @@ describe('agent publish literal size validation', () => {
});
});

it('rejects direct publish quads before chain or publisher work', async () => {
it('chunks publishAsync public schema:text before workspace staging', async () => {
const writeToWorkspace = vi.fn(async () => ({
shareOperationId: 'swm-test',
message: new Uint8Array([1, 2, 3]),
}));
const agentStub = {
contextGraphExists: vi.fn(async () => true),
publisher: {
writeToWorkspace,
},
peerId: 'peer-test',
store: new OxigraphStore(),
log: {
warn: vi.fn(),
},
buildAsyncLiftSeal: vi.fn(async () => undefined),
};

const result = await PublishMethods.prototype.publishAsync.call(
agentStub as never,
'computer-history',
{
publicQuads: [OVERSIZED_TEXT_QUAD],
privateQuads: [],
},
{ localOnly: true },
);

expect(result.captureID).toEqual(expect.any(String));
const stagedQuads = writeToWorkspace.mock.calls[0]?.[1] as Quad[];
expect(stagedQuads.some((quad) =>
quad.subject === OVERSIZED_TEXT_QUAD.subject &&
quad.predicate === 'http://schema.org/text'
)).toBe(false);
expect(stagedQuads.some((quad) =>
quad.subject === OVERSIZED_TEXT_QUAD.subject &&
quad.predicate === DKG_HAS_TEXT_BODY
)).toBe(true);
expect(stagedQuads.some((quad) => quad.predicate === DKG_CHUNK_VALUE)).toBe(true);
});

it('rejects direct publish non-text quads before chain or publisher work', async () => {
const agentStub = {
log: { info: vi.fn() },
};
Expand All @@ -41,13 +107,100 @@ describe('agent publish literal size validation', () => {
PublishMethods.prototype._publish.call(
agentStub as never,
'computer-history',
[OVERSIZED_TEXT_QUAD],
[OVERSIZED_NAME_QUAD],
),
).rejects.toMatchObject({
code: 'OVERSIZED_RDF_LITERAL',
actualBytes: 60_002,
maxBytes: 60_000,
predicate: 'http://schema.org/text',
predicate: 'http://schema.org/name',
});
});

it('builds selection precomputed attestations over chunked public text quads', async () => {
const authorAddress = '0x000000000000000000000000000000000000dEaD';
const agentStub = {
chain: {
getEvmChainId: vi.fn(async () => 31337n),
getKnowledgeAssetsLifecycleAddress: vi.fn(async () => '0x000000000000000000000000000000000000c0de'),
},
getContextGraphOnChainId: vi.fn(async () => 42n),
isPrivateContextGraph: vi.fn(async () => false),
publisher: {
publisherFallbackAuthorAddress: vi.fn(async () => authorAddress),
signAuthorAttestationAsPublisher: vi.fn(async () => ({
r: new Uint8Array(32).fill(1),
vs: new Uint8Array(32).fill(2),
})),
},
kaNumberAllocator: {
reconcile: vi.fn(),
markReconciled: vi.fn(),
allocate: vi.fn(() => ({ number: 7n })),
},
reconciledKaAuthors: new Set<string>(),
};

const attestation =
await PublishMethods.prototype._buildPrecomputedAttestationForSelection.call(
agentStub as never,
'computer-history',
[OVERSIZED_TEXT_QUAD],
);

const chunkedQuads = preparePublicWriteQuads(
[OVERSIZED_TEXT_QUAD],
{ label: 'test.expected' },
).quads;
const expectedChunkedRoot = canonicalPublishPayload(chunkedQuads, []).kcMerkleRoot;
const unchunkedRoot = canonicalPublishPayload([OVERSIZED_TEXT_QUAD], []).kcMerkleRoot;

expect(Array.from(attestation?.expectedMerkleRoot ?? [])).toEqual(Array.from(expectedChunkedRoot));
expect(Array.from(attestation?.expectedMerkleRoot ?? [])).not.toEqual(Array.from(unchunkedRoot));
});

it('builds selection precomputed attestations over skolemized linked blank-node text chunks', async () => {
const authorAddress = '0x000000000000000000000000000000000000dEaD';
const agentStub = {
chain: {
getEvmChainId: vi.fn(async () => 31337n),
getKnowledgeAssetsLifecycleAddress: vi.fn(async () => '0x000000000000000000000000000000000000c0de'),
},
getContextGraphOnChainId: vi.fn(async () => 42n),
isPrivateContextGraph: vi.fn(async () => false),
publisher: {
publisherFallbackAuthorAddress: vi.fn(async () => authorAddress),
signAuthorAttestationAsPublisher: vi.fn(async () => ({
r: new Uint8Array(32).fill(1),
vs: new Uint8Array(32).fill(2),
})),
},
kaNumberAllocator: {
reconcile: vi.fn(),
markReconciled: vi.fn(),
allocate: vi.fn(() => ({ number: 7n })),
},
reconciledKaAuthors: new Set<string>(),
};

const attestation =
await PublishMethods.prototype._buildPrecomputedAttestationForSelection.call(
agentStub as never,
'computer-history',
LINKED_BLANK_OVERSIZED_TEXT_QUADS,
);

const prepared = preparePublicWriteQuads(LINKED_BLANK_OVERSIZED_TEXT_QUADS, { label: 'test.expected' }).quads;
const child = 'http://example.org/root/.well-known/genid/body';
expect(prepared.some((quad) =>
quad.subject === child &&
quad.predicate === DKG_HAS_TEXT_BODY
)).toBe(true);
expect(prepared.some((quad) =>
quad.subject === child &&
quad.predicate === 'http://schema.org/text'
)).toBe(false);
const expectedChunkedRoot = canonicalPublishPayload(prepared, []).kcMerkleRoot;
expect(Array.from(attestation?.expectedMerkleRoot ?? [])).toEqual(Array.from(expectedChunkedRoot));
});
});
Loading
Loading