From 3b2c37c10b85100283bb98251f3faabc2643a44c Mon Sep 17 00:00:00 2001 From: Branimir Rakic Date: Wed, 1 Jul 2026 12:54:17 +0200 Subject: [PATCH 1/8] fix(core): backend-independent V10 leaf canon for xsd:dateTime/time (OT-RFC-57) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The V10 merkle leaf is keccak256(canonicalizeObjectTermForHash(term)). The publisher builds leaves from the in-memory input; the RS prover rebuilds them from the triple-store read-back. The canon reproduced oxigraph 0.5.5's stored form, but Blazegraph (mainnet core nodes) and Neptune normalize temporal literals to a different value form (force UTC, truncate sub-ms), so canon(input) != canon(store-readback) for xsd:dateTime/xsd:time -> the publisher and a Blazegraph prover compute DIFFERENT leaves for the same triple -> RandomSampling fork (and a publisher/prover mismatch even on one backend). Root cause of the OKF->VM MERKLE_MISMATCH; #1386 matched oxigraph only. This makes the canon a backend-INDEPENDENT value canon for xsd:dateTime and xsd:time: normalize to UTC (subtract the tz offset, rolling the date across midnight via civilFromDays), truncate the fraction to milliseconds, always emit Z. The publisher's input AND every backend's read-back (oxigraph, Blazegraph, Neptune) then converge to one leaf. Blazegraph's form is a fixed point => ~zero mainnet migration; oxigraph/devnet leaves converge up (coordinated release, spec §9.0.2). Validation: - oxigraph oracle (packages/publisher/test/term-canon-oracle.test.ts) reframed from identity to CONVERGENCE (canon(oxigraph-readback) == canon(input)); 34/34 green locally (in-process oxigraph). - Blazegraph oracle (packages/storage/test/term-canon-blazegraph-oracle.test.ts) brought in + wired into the tornado-blazegraph CI lane; dateTime/time flipped from it.fails to it (CI validates against a live Blazegraph — local blazegraph is amd64-under-qemu, unrunnable on arm64). SCOPE: this commit fixes xsd:dateTime + xsd:time only. date/gregorian, some xsd:double/float, and some escaped strings still diverge and remain it.fails, pending the rest of the backend-independent canon (tracked in OT-RFC-57). NOT for merge until the Blazegraph oracle is green in CI and reviewed. Refs: OT-RFC-57 (dkgv10-spec#136). The oracle + CI wiring overlap #1397 (they originate there); resolve on merge by taking this branch's version. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 7 +- packages/core/src/crypto/term-canon.ts | 86 ++++-- .../publisher/test/term-canon-oracle.test.ts | 30 ++- .../test/term-canon-blazegraph-oracle.test.ts | 248 ++++++++++++++++++ 4 files changed, 340 insertions(+), 31 deletions(-) create mode 100644 packages/storage/test/term-canon-blazegraph-oracle.test.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ed8999246c..62005f8b5e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -332,7 +332,12 @@ jobs: # Stock image ships the default `kb` namespace; that's all the # adapter needs (it isolates per-test via unique GRAPH IRIs). BLAZEGRAPH_TEST_URL: http://127.0.0.1:9999/bigdata/namespace/kb/sparql - run: pnpm --filter @origintrail-official/dkg-storage exec vitest run test/blazegraph.integration.test.ts + # term-canon-blazegraph-oracle: cross-backend V10 leaf agreement (OT-RFC-57) + # — proves an oxigraph node and a Blazegraph node compute the SAME merkle + # leaf for the same typed literal (else RandomSampling forks). dateTime/time + # are fixed here; date/gregorian/double/escaping remain it.fails pending + # the rest of the backend-independent canon. + run: pnpm --filter @origintrail-official/dkg-storage exec vitest run test/blazegraph.integration.test.ts test/term-canon-blazegraph-oracle.test.ts # ------------------------------------------------------------------ # Tornado publisher lane — sharded across 4 parallel runners. diff --git a/packages/core/src/crypto/term-canon.ts b/packages/core/src/crypto/term-canon.ts index 8ea1126868..be6e3cde61 100644 --- a/packages/core/src/crypto/term-canon.ts +++ b/packages/core/src/crypto/term-canon.ts @@ -177,6 +177,23 @@ function daysFromCivil(y: bigint, m: bigint, d: bigint): bigint { const doe = yoe * 365n + yoe / 4n - yoe / 100n + doy; return era * 146097n + doe - 719468n; } +// Inverse of daysFromCivil: proleptic-Gregorian (y,m,d) from a signed day count +// (days since 1970-01-01). Standard Howard Hinnant algorithm. Used to roll the +// DATE when a timezone offset pushes a dateTime across midnight during the +// backend-independent UTC normalization (OT-RFC-57). +function civilFromDays(zIn: bigint): { y: bigint; m: bigint; d: bigint } { + const z = zIn + 719468n; + const era = (z >= 0n ? z : z - 146096n) / 146097n; + const doe = z - era * 146097n; // [0, 146096] + const yoe = (doe - doe / 1460n + doe / 36524n - doe / 146096n) / 365n; // [0, 399] + const y = yoe + era * 400n; + const doy = doe - (365n * yoe + yoe / 4n - yoe / 100n); // [0, 365] + const mp = (5n * doy + 2n) / 153n; // [0, 11] + const d = doy - (153n * mp + 2n) / 5n + 1n; // [1, 31] + const m = mp < 10n ? mp + 3n : mp - 9n; // [1, 12] + return { y: m <= 2n ? y + 1n : y, m, d }; +} + function temporalInRange(yearStr: string, mo: number, dd: number, hh = 0, mi = 0, ss = 0): boolean { const seconds = (daysFromCivil(BigInt(yearStr), BigInt(mo), BigInt(dd)) + 719162n) * 86400n + @@ -345,11 +362,31 @@ function splitTz(s: string): { body: string; tz: string } { return { body, tz: tz === '+00:00' || tz === '-00:00' ? 'Z' : tz }; } -// Normalize a fractional-seconds group ('.ddd' or undefined): strip trailing -// zeros, drop entirely if it becomes empty. +// Like splitTz, but returns the offset MAGNITUDE in minutes (signed) for the +// backend-independent UTC normalization of xsd:dateTime/xsd:time (OT-RFC-57). +// hadTz=false ⇒ no timezone present (a bare dateTime is normalized to UTC and +// gains a Z, matching Blazegraph/Neptune). Malformed/out-of-range tz → throw +// (→ the literal is kept verbatim, as oxigraph does). +function splitTzToOffset(s: string): { body: string; offsetMin: number; hadTz: boolean } { + const m = /(Z|[+-]\d{2}:\d{2})$/.exec(s); + if (!m) return { body: s, offsetMin: 0, hadTz: false }; + const tz = m[1]; + const body = s.slice(0, s.length - tz.length); + if (tz === 'Z') return { body, offsetMin: 0, hadTz: true }; + const h = parseInt(tz.slice(1, 3), 10); + const mi = parseInt(tz.slice(4, 6), 10); + if (mi > 59 || h * 60 + mi > 840) throw new Error(`invalid tz: ${tz}`); + const mag = h * 60 + mi; + return { body, offsetMin: tz[0] === '-' ? -mag : mag, hadTz: true }; +} + +// Normalize a fractional-seconds group ('.ddd' or undefined): TRUNCATE to at most +// 3 digits (milliseconds — the backend-independent precision floor; a lossy store +// such as Blazegraph keeps only ms), then strip trailing zeros; drop entirely if +// empty. Truncate, NOT round (matches Blazegraph). (OT-RFC-57) function normFrac(frac: string | undefined): string { if (frac === undefined) return ''; - const d = frac.slice(1).replace(/0+$/, ''); + const d = frac.slice(1, 4).replace(/0+$/, ''); // at most 3 digits, then strip trailing zeros return d === '' ? '' : `.${d}`; } @@ -402,8 +439,13 @@ function validateClock(hh: number, mi: number, ss: number, fracNorm: string): { // on a literal oxigraph leaves untouched. const YEAR = '-?(?:\\d{4}|[1-9]\\d{4,})'; +// OT-RFC-57 backend-independent form: normalize to UTC (subtract the tz offset, +// rolling the DATE across midnight), truncate fraction to ms, always emit Z. A +// no-timezone dateTime is treated as UTC and gains a Z (matching Blazegraph / +// Neptune). This is the value-space form the publisher's input AND every +// backend's read-back converge to. function canonDateTime(lex: string): string { - const { body, tz } = splitTz(lex); + const { body, offsetMin } = splitTzToOffset(lex); const m = new RegExp(`^(${YEAR})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2}):(\\d{2})(\\.\\d+)?$`).exec(body); if (!m) throw new Error('invalid xsd:dateTime'); const [, yy, mo, dd, hh, mi, ss, frac] = m; @@ -413,35 +455,31 @@ function canonDateTime(lex: string): string { if (ddN < 1 || ddN > daysInMonth(yy, moN)) throw new Error('day'); if (!temporalInRange(yy, moN, ddN, +hh, +mi, +ss)) throw new Error('year overflows i128 seconds'); const fracNorm = normFrac(frac); - if (fracNorm.length - 1 > 18) throw new Error('sub-1e-18 seconds'); // oxigraph stores ≤18 frac digits const { rolls } = validateClock(+hh, +mi, +ss, fracNorm); - if (rolls) { - return `${rollNextDay(yy, moN, ddN)}T00:${mi}:${ss}${fracNorm}${tz}`; - } - // KNOWN oxigraph 0.5.5 DEFECT (documented, NOT mirrored): a BEFORE-EPOCH dateTime - // (before 0001-01-01T00:00:00, i.e. year ≤ 0000) with seconds == 59 AND a non-zero - // fraction has its minute bumped by +1 on every load→serialize round-trip. Far - // before the epoch it never stabilises (-1711-…T15:19:59.6 → :20:59.6 → :21:59.6 → - // …); near it the bump just crosses into year 0001 once. Either way the store has - // no stable form for these, so no canonicalization can make them consensus-safe. - // We deliberately do NOT replicate the bump: canon stays DETERMINISTIC + IDEMPOTENT - // (the best achievable), normalising tz/fraction like any other dateTime and - // leaving the wall-clock untouched. Residual exposure = a pre-existing oxigraph - // storage defect for an essentially-nonexistent input class (BCE / year-0 timestamps - // at :59 with sub-second precision) — escalated to the store layer, off this canon. - return `${normYear(yy)}-${mo}-${dd}T${hh}:${mi}:${ss}${fracNorm}${tz}`; + // Base date as a day count; a T24:00 clock rolls one day and resets the hour to 0. + let days = daysFromCivil(BigInt(yy), BigInt(moN), BigInt(ddN)); + const hourN = rolls ? 0 : +hh; + if (rolls) days += 1n; + // UTC: subtract the offset (whole minutes); roll the date across midnight. + const totalMin = hourN * 60 + +mi - offsetMin; + days += BigInt(Math.floor(totalMin / 1440)); + const minInDay = ((totalMin % 1440) + 1440) % 1440; + const { y, m: mm, d } = civilFromDays(days); + return `${fmtYear(y)}-${pad2(Number(mm))}-${pad2(Number(d))}T${pad2(Math.floor(minInDay / 60))}:${pad2(minInDay % 60)}:${ss}${fracNorm}Z`; } +// OT-RFC-57: time has no date, so a tz offset just wraps the wall clock mod 24h; +// normalize to UTC + Z, ms-truncated. function canonTime(lex: string): string { - const { body, tz } = splitTz(lex); + const { body, offsetMin } = splitTzToOffset(lex); const m = /^(\d{2}):(\d{2}):(\d{2})(\.\d+)?$/.exec(body); if (!m) throw new Error('invalid xsd:time'); const [, hh, mi, ss, frac] = m; const fracNorm = normFrac(frac); - if (fracNorm.length - 1 > 18) throw new Error('sub-1e-18 seconds'); const { rolls } = validateClock(+hh, +mi, +ss, fracNorm); - // time has no date to roll; hour 24 → 00 of the same wall clock. - return `${rolls ? '00' : hh}:${mi}:${ss}${fracNorm}${tz}`; + const hourN = rolls ? 0 : +hh; + const minInDay = (((hourN * 60 + +mi - offsetMin) % 1440) + 1440) % 1440; + return `${pad2(Math.floor(minInDay / 60))}:${pad2(minInDay % 60)}:${ss}${fracNorm}Z`; } function canonDate(lex: string): string { diff --git a/packages/publisher/test/term-canon-oracle.test.ts b/packages/publisher/test/term-canon-oracle.test.ts index ccfbf3f19f..f69192cba1 100644 --- a/packages/publisher/test/term-canon-oracle.test.ts +++ b/packages/publisher/test/term-canon-oracle.test.ts @@ -32,16 +32,27 @@ async function oxigraphForms(objects: string[]): Promise { return objects.map((_, i) => byPred.get(`urn:p#${i}`) ?? '(DROPPED)'); } -/** Assert the pure core canonicalizer reproduces oxigraph's form for every input. */ +/** + * OT-RFC-57: the canon is now a backend-INDEPENDENT value canon — it no longer + * reproduces oxigraph's stored lexical form (for temporal types it emits the UTC, + * ms-truncated form so oxigraph and Blazegraph nodes agree). So we assert + * CONVERGENCE, not identity: `canon(oxigraph_readback) === canon(input)`. This is + * exactly what consensus needs — the publisher (input) and a prover reading from + * an oxigraph store compute the same leaf — and it holds for BOTH the types the + * canon rewrites (dateTime/time) and the types it leaves as oxigraph's form. + */ async function expectMatchesOxigraph(objects: string[]): Promise { const oxi = await oxigraphForms(objects); const mismatches: string[] = []; objects.forEach((obj, i) => { - const got = canonicalizeObjectTermForHash(obj); - if (got !== oxi[i]) mismatches.push(` in: ${obj}\n core:${got}\n oxi: ${oxi[i]}`); + const canonInput = canonicalizeObjectTermForHash(obj); + const canonOxi = canonicalizeObjectTermForHash(oxi[i]); + if (canonInput !== canonOxi) { + mismatches.push(` in: ${obj}\n canon(input): ${canonInput}\n canon(oxi-store ${oxi[i]}): ${canonOxi}`); + } }); if (mismatches.length) { - throw new Error(`core canon diverged from oxigraph (${mismatches.length}/${objects.length}):\n${mismatches.join('\n')}`); + throw new Error(`canon(input) != canon(oxigraph-readback) — publisher/prover would fork (${mismatches.length}/${objects.length}):\n${mismatches.join('\n')}`); } expect(mismatches.length).toBe(0); } @@ -322,8 +333,15 @@ describe('term-canon oracle: fuzz-hardened edge classes (#1386)', () => { const oxi = await oxigraphForms(battery); for (let i = 0; i < battery.length; i++) { const once = canonicalizeObjectTermForHash(battery[i]); - expect(canonicalizeObjectTermForHash(once)).toBe(once); // idempotent - if (oxi[i] !== '(DROPPED)') expect(canonicalizeObjectTermForHash(oxi[i])).toBe(oxi[i]); // identity on store output + expect(canonicalizeObjectTermForHash(once)).toBe(once); // idempotent (fixed point) + // OT-RFC-57: the canon is no longer the IDENTITY on oxigraph's output — for + // temporal types it normalizes oxigraph's preserved form to the UTC value + // form (so oxigraph nodes agree with Blazegraph). That is the intended + // oxigraph/devnet migration (mainnet = Blazegraph is unchanged; asserted by + // the Blazegraph oracle). What MUST hold for consensus is CONVERGENCE: + // canon(oxigraph_readback) == canon(input) ⇒ publisher and an oxigraph + // prover compute the same leaf. + if (oxi[i] !== '(DROPPED)') expect(canonicalizeObjectTermForHash(oxi[i])).toBe(once); } }); diff --git a/packages/storage/test/term-canon-blazegraph-oracle.test.ts b/packages/storage/test/term-canon-blazegraph-oracle.test.ts new file mode 100644 index 0000000000..b6719d94f6 --- /dev/null +++ b/packages/storage/test/term-canon-blazegraph-oracle.test.ts @@ -0,0 +1,248 @@ +// CROSS-BACKEND CONSENSUS oracle for the V10 leaf canonicalization (#1386). +// +// `packages/publisher/test/term-canon-oracle.test.ts` proves the pure core +// canonicalizer (dkg-core `canonicalizeObjectTermForHash`, applied at +// `tripleContentV10`) reproduces **oxigraph 0.5.5**'s round-trip form byte for +// byte. That closes the oxigraph side. It does NOT cover the genuine consensus +// risk: a node running a DIFFERENT triple-store backend (Blazegraph) may store +// a literal in a different lexical form, and if the protocol canon does not +// absorb that difference, the two nodes compute DIFFERENT merkle leaves for the +// SAME published triple — a silent RandomSampling fork. +// +// This oracle closes that gap. For a broad battery of typed literals it asserts: +// +// canon(blazegraphRoundTrip(x)) === canon(x) === canon(oxigraphRoundTrip(x)) +// +// i.e. a node on oxigraph and a node on Blazegraph, each reading the literal +// back from its own store and applying the protocol canon, land on the SAME +// V10 leaf term (== the protocol leaf canon(x)). If Blazegraph normalizes a +// type into a form the canon does not fold to the same value, THIS fails — +// which is the difference between "works on our deployed backend" and "forks +// the moment an operator runs Blazegraph". +// +// Gated on BLAZEGRAPH_TEST_URL (same as blazegraph.integration.test.ts) so a +// local `pnpm test` without a Blazegraph server skips it; CI's tornado-blazegraph +// lane provisions the service container and sets the env var. +// BLAZEGRAPH_TEST_URL=http://127.0.0.1:9999/bigdata/namespace/kb/sparql +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { BlazegraphStore } from '../src/adapters/blazegraph.js'; +import { OxigraphStore } from '../src/adapters/oxigraph.js'; +import type { Quad } from '../src/triple-store.js'; +import { canonicalizeObjectTermForHash } from '@origintrail-official/dkg-core'; + +const BLAZEGRAPH_URL = process.env.BLAZEGRAPH_TEST_URL; + +const xsd = (t: string) => `http://www.w3.org/2001/XMLSchema#${t}`; +const lit = (v: string, dt: string) => `"${v}"^^<${xsd(dt)}>`; + +// Unique graph per run so a persistent namespace never serves stale data. +const RUN = `${Date.now()}-${Math.floor(Math.random() * 1e6)}`; +const G = `urn:tc-bg:${RUN}`; +const S = `urn:tc-bg:${RUN}:s`; + +/** Round-trip `objects` through a store; return, per input index, the object + * term string the store emits on CONSTRUCT (its canonical stored form). */ +async function roundTrip( + store: { insert(q: Quad[]): Promise; query(s: string): Promise; dropGraph?(g: string): Promise }, + graph: string, + objects: string[], +): Promise { + const quads: Quad[] = objects.map((object, i) => ({ subject: S, predicate: `urn:p#${i}`, object, graph })); + await store.insert(quads); + const res = await store.query(`CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <${graph}> { ?s ?p ?o } }`); + const byPred = new Map(); + if (res?.type === 'quads') for (const q of res.quads) byPred.set(q.predicate, q.object); + else if (Array.isArray(res?.quads)) for (const q of res.quads) byPred.set(q.predicate, q.object); + return objects.map((_, i) => byPred.get(`urn:p#${i}`) ?? '(DROPPED)'); +} + +let blaze: BlazegraphStore; +let oxi: OxigraphStore; +let graphSeq = 0; + +/** + * Assert both backends converge to the same protocol leaf for every literal. + * Each call uses a fresh sub-graph so re-runs never see stale data. + */ +async function expectCrossBackendLeafAgreement(objects: string[]): Promise { + const bgGraph = `${G}:${++graphSeq}`; + const oxGraph = `${G}:ox:${graphSeq}`; + const [bzForms, oxiForms] = await Promise.all([ + roundTrip(blaze, bgGraph, objects), + roundTrip(oxi, oxGraph, objects), + ]); + await blaze.dropGraph(bgGraph).catch(() => {}); + + const mismatches: string[] = []; + objects.forEach((obj, i) => { + const leaf = canonicalizeObjectTermForHash(obj); // the protocol leaf + const leafFromBz = canonicalizeObjectTermForHash(bzForms[i]!); + const leafFromOxi = canonicalizeObjectTermForHash(oxiForms[i]!); + if (leafFromBz !== leaf || leafFromOxi !== leaf) { + mismatches.push( + ` in: ${obj}\n` + + ` canon(in) [leaf]: ${leaf}\n` + + ` blazegraph→canon: ${leafFromBz} (stored: ${bzForms[i]})\n` + + ` oxigraph→canon: ${leafFromOxi} (stored: ${oxiForms[i]})`, + ); + } + }); + if (mismatches.length) { + throw new Error( + `CROSS-BACKEND V10 LEAF DIVERGENCE (${mismatches.length}/${objects.length}) — a Blazegraph node would fork RandomSampling:\n${mismatches.join('\n\n')}`, + ); + } + expect(mismatches.length).toBe(0); +} + +describe.skipIf(!BLAZEGRAPH_URL)('term-canon cross-backend oracle: oxigraph ⇄ blazegraph leaf agreement (#1386)', () => { + beforeAll(async () => { + blaze = new BlazegraphStore(BLAZEGRAPH_URL as string); + oxi = new OxigraphStore(); + await blaze.dropGraph(G).catch(() => {}); + }, 120_000); + + afterAll(async () => { + if (blaze) await blaze.dropGraph(G).catch(() => {}); + }); + + it('xsd:string elision', async () => { + await expectCrossBackendLeafAgreement([lit('Bitcoin', 'string'), lit('a b c', 'string'), lit('', 'string')]); + }); + + it('language-tag lowercasing', async () => { + await expectCrossBackendLeafAgreement(['"x"@EN', '"x"@en', '"x"@en-US', '"x"@EN-us', '"x"@En-Gb', '"x"@DE']); + }); + + it('plain literals', async () => { + await expectCrossBackendLeafAgreement(['"plain"', '"with space"']); + }); + + it('xsd:integer family (incl. out-of-i64)', async () => { + const cases = ['007', '+5', '-0', '00', '-42', '0', '999999999999999999999999']; + const types = ['integer', 'int', 'long', 'short', 'nonNegativeInteger', 'positiveInteger', 'negativeInteger']; + const objects: string[] = []; + for (const ty of types) + for (const v of cases) { + if (ty.includes('nonNegative') || ty === 'positiveInteger') if (v.startsWith('-')) continue; + if (ty === 'negativeInteger') if (!v.startsWith('-') || v === '-0') continue; + if (ty === 'positiveInteger' && (v === '0' || v === '00' || v === '-0')) continue; + objects.push(lit(v, ty)); + } + await expectCrossBackendLeafAgreement(objects); + }); + + it('xsd:decimal value-space', async () => { + const vals = ['1.0', '1.50', '100.0', '0.500', '.5', '-0.0', '+1.5', '010.0', '0', '0.0', '-3.14', '123.456000', '000.000']; + await expectCrossBackendLeafAgreement(vals.map((v) => lit(v, 'decimal'))); + }); + + it('xsd:boolean', async () => { + await expectCrossBackendLeafAgreement(['1', '0', 'true', 'false'].map((v) => lit(v, 'boolean'))); + }); + + // ─────────────────────────────────────────────────────────────────────────── + // KNOWN CROSS-BACKEND DIVERGENCE — #1386 canon is NOT cross-backend safe. + // + // This oracle DETECTED a real divergence (that is its job). Blazegraph + // normalizes these datatypes into a different lexical form than oxigraph — + // e.g. a timezone-less `"2026-06-29T12:00:00"` is STORED by Blazegraph as + // `"2026-06-29T12:00:00.000Z"` (adds `Z` + `.000`) — and #1386's oxigraph-tuned + // `canonicalizeObjectTermForHash` does NOT reconcile the difference. Because the + // RS extractor hashes STORE-EMITTED terms (packages/random-sampling/src/ + // ka-extractor.ts:397 `triples.map(t => hashTripleV10(t.subject,t.predicate, + // t.object))`), a Blazegraph-backed node computes a DIFFERENT V10 merkle leaf + // for the same triple than an oxigraph node → RandomSampling FORK the moment a + // non-oxigraph node joins. This is a latent consensus bug in #1386 (the canon + // was validated against oxigraph 0.5.5 only); fixing it is a coordinated + // dkg-core consensus change with migration implications — OUT OF SCOPE for this + // test-coverage PR. + // + // These cases are marked `it.fails` so the divergence stays TRACKED and CI + // stays green: each currently throws (divergence exists) ⇒ `it.fails` passes. + // If the canon is ever made cross-backend safe, they flip to FAILING — forcing + // whoever fixed it to remove the marker. Affected: xsd:dateTime, xsd:time, + // date/gregorian, some xsd:double/float, some escaped string content. The + // AGREEING datatypes above assert real cross-backend agreement. + // ─────────────────────────────────────────────────────────────────────────── + it('xsd:dateTime fractional-seconds + timezone (OT-RFC-57)', async () => { + const vals = [ + '2026-06-29T12:00:00', '2026-06-29T12:00:00.0', '2026-06-29T12:00:00.500', '2026-06-29T12:00:00.000', + '2026-06-29T12:00:00Z', '2026-06-29T12:00:00+00:00', '2026-06-29T12:00:00-00:00', '2026-06-29T12:00:00+02:00', + '2026-06-29T12:00:00.120Z', '2026-06-29T12:00:00.123456', + ]; + await expectCrossBackendLeafAgreement(vals.map((v) => lit(v, 'dateTime'))); + }); + + it('xsd:dateTime T24:00:00 rollover (OT-RFC-57)', async () => { + const vals = [ + '2026-06-29T24:00:00', '2026-12-31T24:00:00', '2024-02-28T24:00:00', '2026-02-28T24:00:00', + '2026-06-29T24:00:00Z', '2026-06-29T24:00:00+02:00', '2000-02-29T24:00:00', + ]; + await expectCrossBackendLeafAgreement(vals.map((v) => lit(v, 'dateTime'))); + }); + + it('xsd:time (OT-RFC-57)', async () => { + const vals = ['12:00:00', '12:00:00.0', '12:00:00.500', '12:00:00Z', '12:00:00+00:00', '12:00:00-00:00', '12:00:00+02:00', '24:00:00', '24:00:00Z']; + await expectCrossBackendLeafAgreement(vals.map((v) => lit(v, 'time'))); + }); + + it.fails('date / gYear / gYearMonth / gMonthDay / gMonth / gDay [KNOWN #1386 cross-backend divergence]', async () => { + await expectCrossBackendLeafAgreement([ + lit('2026-06-29', 'date'), lit('2026-06-29Z', 'date'), lit('2026-06-29+00:00', 'date'), + lit('2026-06-29-00:00', 'date'), lit('2026-06-29+02:00', 'date'), + lit('2026', 'gYear'), lit('2026+00:00', 'gYear'), lit('2026+02:00', 'gYear'), lit('02026', 'gYear'), + lit('2026-06', 'gYearMonth'), lit('2026-06+00:00', 'gYearMonth'), + lit('--06-29', 'gMonthDay'), lit('--06-29+00:00', 'gMonthDay'), + lit('--06', 'gMonth'), lit('--06+00:00', 'gMonth'), lit('---29', 'gDay'), + ]); + }); + + it('xsd:duration / dayTimeDuration / yearMonthDuration zero-component dropping', async () => { + const dur = ['P1Y0M', 'P1Y', 'PT0S', 'P0Y', 'P1Y2M3DT4H5M6S', '-P1Y', 'P1DT0H', 'PT1H0M0S', 'P0Y0M0DT0H0M0S', 'PT1.500S', 'P0M0D']; + await expectCrossBackendLeafAgreement(dur.map((v) => lit(v, 'duration'))); + await expectCrossBackendLeafAgreement(['PT1H0M', 'PT0H0M0S'].map((v) => lit(v, 'dayTimeDuration'))); + await expectCrossBackendLeafAgreement(['P1Y0M', 'P0Y0M'].map((v) => lit(v, 'yearMonthDuration'))); + }); + + it.fails('xsd:double / xsd:float [KNOWN #1386 cross-backend divergence]', async () => { + const dbl = ['1.0E2', '1e10', '-0.0', '3.14', '1E-7', '1.5E300', 'NaN', 'INF', '-INF', '0.1', '0.5', '100', '0', '0.0', '-2.5E-3', '6.022E23']; + await expectCrossBackendLeafAgreement(dbl.map((v) => lit(v, 'double'))); + await expectCrossBackendLeafAgreement(['1.0', '0.1', '3.14', '1E2', '1.5', '100', '0'].map((v) => lit(v, 'float'))); + }); + + it('randomized double sweep across magnitudes', async () => { + const mantissas = [1, 1.5, 3.14159, 2, 7, 9.999, 1.234567890123, 5.5, 8.0]; + const exps = [-300, -100, -20, -7, -3, -1, 0, 1, 3, 7, 15, 21, 100, 300]; + const objects: string[] = []; + for (const m of mantissas) + for (const e of exps) + for (const sign of [1, -1]) { + const v = sign * m * Math.pow(10, e); + if (!Number.isFinite(v) || v === 0) continue; + objects.push(lit(v.toExponential(), 'double')); + } + await expectCrossBackendLeafAgreement(objects); + }); + + it.fails('literal-content escaping normalization [KNOWN #1386 cross-backend divergence]', async () => { + await expectCrossBackendLeafAgreement([ + lit('caf\\u00e9', 'string'), + lit('smile\\U0001F600', 'string'), + lit('tab\\there', 'string'), + lit('q\\"uote', 'string'), + lit('back\\\\slash', 'string'), + lit('new\\nline', 'string'), + lit('ret\\rX', 'string'), + '"smile\\U0001F600"@EN', + '"plain ascii"', + ]); + }); + + it('verbatim datatypes (hexBinary / base64Binary / anyURI / custom)', async () => { + await expectCrossBackendLeafAgreement([ + lit('4A6f', 'hexBinary'), lit('SGk=', 'base64Binary'), lit('http://x', 'anyURI'), + '"RawValue"^^', + ]); + }); +}); From a1953ddcec2503150894741ee2a597352f9b045b Mon Sep 17 00:00:00 2001 From: Branimir Rakic Date: Wed, 1 Jul 2026 13:04:26 +0200 Subject: [PATCH 2/8] test: reframe exhaustive term-canon oracle identity->convergence (OT-RFC-57) The exhaustive oracle asserted core(input)==oxigraph AND core(oxi)==oxi (identity/no-migration). The backend-independent canon emits the UTC value form for dateTime/time, so those no longer hold on the oxigraph side. Assert CONVERGENCE (canon(oxi_readback)==canon(input)) + true idempotence (canon(canon(x))==canon(x)). 40/40 green locally (both publisher oracles). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../test/term-canon-exhaustive.test.ts | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/packages/publisher/test/term-canon-exhaustive.test.ts b/packages/publisher/test/term-canon-exhaustive.test.ts index 6faa1f881c..ecc6372f2b 100644 --- a/packages/publisher/test/term-canon-exhaustive.test.ts +++ b/packages/publisher/test/term-canon-exhaustive.test.ts @@ -30,16 +30,21 @@ async function oxiForms(objects: string[]): Promise { async function proveParity(label: string, objects: string[]): Promise { const oxi = await oxiForms(objects); const mismatches: string[] = []; - // forward parity + // OT-RFC-57: the canon is now a backend-INDEPENDENT value canon — for temporal + // types it emits the UTC value form, NOT oxigraph's preserved lexical form. So + // the old "core == oxigraph" identity no longer holds. Assert CONVERGENCE + // (canon(oxigraph_readback) == canon(input)) — the property consensus needs — + // and true idempotence (canon(canon(x)) == canon(x)). objects.forEach((obj, i) => { - const got = canonicalizeObjectTermForHash(obj); - if (got !== oxi[i]) mismatches.push(`FWD in=${obj}\n core=${got}\n oxi =${oxi[i]}`); - }); - // no-migration: core is the identity on oxigraph's own canonical output - oxi.forEach((o) => { - if (o === '(DROPPED)') return; - const re = canonicalizeObjectTermForHash(o); - if (re !== o) mismatches.push(`IDEMPOTENCE BROKEN oxi=${o}\n core(oxi)=${re}`); + if (oxi[i] === '(DROPPED)') return; + const canonIn = canonicalizeObjectTermForHash(obj); + const canonOxi = canonicalizeObjectTermForHash(oxi[i]); + if (canonIn !== canonOxi) { + mismatches.push(`CONVERGENCE in=${obj}\n canon(in) =${canonIn}\n canon(oxi ${oxi[i]})=${canonOxi}`); + } + if (canonicalizeObjectTermForHash(canonIn) !== canonIn) { + mismatches.push(`IDEMPOTENCE BROKEN in=${obj}\n canon=${canonIn}\n canon(canon)=${canonicalizeObjectTermForHash(canonIn)}`); + } }); if (mismatches.length) { throw new Error(`${label}: ${mismatches.length} mismatch(es):\n${mismatches.slice(0, 30).join('\n')}`); From 0df137d77420ac3ce437e58da23221a0036a07b8 Mon Sep 17 00:00:00 2001 From: Branimir Rakic Date: Wed, 1 Jul 2026 13:29:16 +0200 Subject: [PATCH 3/8] fix(core): extend backend-independent canon to xsd:date/gYear/gYearMonth/gMonthDay/gMonth/gDay (OT-RFC-57) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continues the value-canon: gregorian types now emit Blazegraph's value form. date/gYear/gYearMonth normalize to the UTC date of midnight-in-tz (positive offset rolls the date back a day) via a new civilFromDays + utcDateFromMidnight, emitting NO timezone and dropping leading zeros. gMonthDay/gMonth/gDay have no year to convert, so the timezone is stripped (oracle battery exercises Z/+00:00; a non-UTC offset on these bare types is undefined across backends — OT-RFC-57 §7.8). Removed the now-dead splitTz/normYear (fmtYear subsumes year formatting). Oxigraph oracles reframed-to-convergence stay green (40/40 local); the blazegraph oracle's date/gregorian case flipped it.fails -> it (CI validates cross-backend). Remaining it.fails: xsd:double/float + literal-content escaping. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/crypto/term-canon.ts | 69 ++++++++++--------- .../test/term-canon-blazegraph-oracle.test.ts | 2 +- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/packages/core/src/crypto/term-canon.ts b/packages/core/src/crypto/term-canon.ts index be6e3cde61..6a80ab32ff 100644 --- a/packages/core/src/crypto/term-canon.ts +++ b/packages/core/src/crypto/term-canon.ts @@ -159,10 +159,6 @@ function decodeIriEscapes(iri: string): string { }); } -// oxigraph normalizes the "negative zero" year -0000 to 0000. (Only -0000 reaches -// here: a leading-zero 5+-digit negative year fails the YEAR pattern → verbatim.) -const normYear = (yy: string) => (yy === '-0000' ? '0000' : yy); - // oxigraph stores temporal values as seconds-since-0001-01-01 in the same i128/1e18 // Decimal as xsd:decimal/duration. A date/time whose scaled seconds overflow i128 // fails to parse and is kept VERBATIM, so a foldable timezone / T24 roll / fraction @@ -194,6 +190,20 @@ function civilFromDays(zIn: bigint): { y: bigint; m: bigint; d: bigint } { return { y: m <= 2n ? y + 1n : y, m, d }; } +// OT-RFC-57: the UTC date of "midnight in the given tz" — the backend-independent +// form for xsd:date / gYear / gYearMonth. Blazegraph interprets the value at 00:00 +// in its tz, converts to UTC, and takes the UTC date; a positive offset rolls the +// date back a day. offsetMin=0 (Z / no-tz) ⇒ the date is unchanged. +function utcDateFromMidnight( + y: bigint, + mo: bigint, + d: bigint, + offsetMin: number, +): { y: bigint; m: bigint; d: bigint } { + const days = daysFromCivil(y, mo, d) + BigInt(Math.floor((0 - offsetMin) / 1440)); + return civilFromDays(days); +} + function temporalInRange(yearStr: string, mo: number, dd: number, hh = 0, mi = 0, ss = 0): boolean { const seconds = (daysFromCivil(BigInt(yearStr), BigInt(mo), BigInt(dd)) + 719162n) * 86400n + @@ -346,23 +356,7 @@ function stripTrailingZeros(s: string): string { } // ── date/time family ─────────────────────────────────────────────────────────── -// Split + validate the trailing timezone, folding +00:00/-00:00 to Z. oxigraph -// accepts Z or ±HH:MM with |offset| ≤ 14:00 (HH≤14, MM≤59, total ≤ 840 min); -// anything else (incl. a malformed +0:00) leaves the timezone in `body`, where the -// per-type grammar then rejects it → the whole literal is kept verbatim. -function splitTz(s: string): { body: string; tz: string } { - const m = /(Z|[+-]\d{2}:\d{2})$/.exec(s); - if (!m) return { body: s, tz: '' }; - const tz = m[1]; - const body = s.slice(0, s.length - tz.length); - if (tz === 'Z') return { body, tz: 'Z' }; - const h = parseInt(tz.slice(1, 3), 10); - const mi = parseInt(tz.slice(4, 6), 10); - if (mi > 59 || h * 60 + mi > 840) throw new Error(`invalid tz: ${tz}`); - return { body, tz: tz === '+00:00' || tz === '-00:00' ? 'Z' : tz }; -} - -// Like splitTz, but returns the offset MAGNITUDE in minutes (signed) for the +// Returns the offset MAGNITUDE in minutes (signed) for the // backend-independent UTC normalization of xsd:dateTime/xsd:time (OT-RFC-57). // hadTz=false ⇒ no timezone present (a bare dateTime is normalized to UTC and // gains a Z, matching Blazegraph/Neptune). Malformed/out-of-range tz → throw @@ -482,8 +476,10 @@ function canonTime(lex: string): string { return `${pad2(Math.floor(minInDay / 60))}:${pad2(minInDay % 60)}:${ss}${fracNorm}Z`; } +// OT-RFC-57: xsd:date / gYear / gYearMonth normalize to the UTC date of +// midnight-in-tz, with NO timezone emitted (Blazegraph's value form). function canonDate(lex: string): string { - const { body, tz } = splitTz(lex); + const { body, offsetMin } = splitTzToOffset(lex); const m = new RegExp(`^(${YEAR})-(\\d{2})-(\\d{2})$`).exec(body); if (!m) throw new Error('invalid xsd:date'); const moN = +m[2]; @@ -491,50 +487,57 @@ function canonDate(lex: string): string { if (moN < 1 || moN > 12) throw new Error('month'); if (ddN < 1 || ddN > daysInMonth(m[1], moN)) throw new Error('day'); if (!temporalInRange(m[1], moN, ddN)) throw new Error('year overflows i128 seconds'); - return `${normYear(m[1])}-${m[2]}-${m[3]}${tz}`; + const { y, m: mm, d } = utcDateFromMidnight(BigInt(m[1]), BigInt(moN), BigInt(ddN), offsetMin); + return `${fmtYear(y)}-${pad2(Number(mm))}-${pad2(Number(d))}`; } function canonGYear(lex: string): string { - const { body, tz } = splitTz(lex); + const { body, offsetMin } = splitTzToOffset(lex); if (!new RegExp(`^${YEAR}$`).test(body)) throw new Error('invalid xsd:gYear'); if (!temporalInRange(body, 1, 1)) throw new Error('year overflows i128 seconds'); - return `${normYear(body)}${tz}`; + const { y } = utcDateFromMidnight(BigInt(body), 1n, 1n, offsetMin); + return fmtYear(y); } function canonGYearMonth(lex: string): string { - const { body, tz } = splitTz(lex); + const { body, offsetMin } = splitTzToOffset(lex); const m = new RegExp(`^(${YEAR})-(\\d{2})$`).exec(body); if (!m || +m[2] < 1 || +m[2] > 12) throw new Error('invalid xsd:gYearMonth'); if (!temporalInRange(m[1], +m[2], 1)) throw new Error('year overflows i128 seconds'); - return `${normYear(m[1])}-${m[2]}${tz}`; + const { y, m: mm } = utcDateFromMidnight(BigInt(m[1]), BigInt(+m[2]), 1n, offsetMin); + return `${fmtYear(y)}-${pad2(Number(mm))}`; } // gMonthDay day bounds. oxigraph 0.5.5 validates --MM-DD against a NON-leap // reference year, so --02-29 is rejected (kept verbatim) — February's max is 28 // here, unlike a real leap date which needs the year context of xsd:date. const MONTH_MAX_DAY = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]; +// OT-RFC-57: gMonthDay / gMonth / gDay have no year to convert, so a timezone is +// just STRIPPED (Blazegraph's value form). NB the oracle battery only exercises +// Z/+00:00 here; a non-UTC offset on these bare types is undefined across backends +// and not consensus-verified — see OT-RFC-57 §7.8. function canonGMonthDay(lex: string): string { - const { body, tz } = splitTz(lex); + const { body } = splitTzToOffset(lex); const m = /^--(\d{2})-(\d{2})$/.exec(body); if (!m) throw new Error('invalid xsd:gMonthDay'); const moN = +m[1]; const ddN = +m[2]; if (moN < 1 || moN > 12 || ddN < 1 || ddN > MONTH_MAX_DAY[moN - 1]) throw new Error('range'); - return `${body}${tz}`; + return body; } function canonGMonth(lex: string): string { - const { body, tz } = splitTz(lex); + const { body } = splitTzToOffset(lex); const m = /^--(\d{2})$/.exec(body); if (!m || +m[1] < 1 || +m[1] > 12) throw new Error('invalid xsd:gMonth'); - return `${body}${tz}`; + return body; } function canonGDay(lex: string): string { - const { body, tz } = splitTz(lex); + const { body } = splitTzToOffset(lex); const m = /^---(\d{2})$/.exec(body); if (!m || +m[1] < 1 || +m[1] > 31) throw new Error('invalid xsd:gDay'); - return `${body}${tz}`; + return body; } // ── xsd:duration / dayTimeDuration / yearMonthDuration ───────────────────────── diff --git a/packages/storage/test/term-canon-blazegraph-oracle.test.ts b/packages/storage/test/term-canon-blazegraph-oracle.test.ts index b6719d94f6..9abf002d71 100644 --- a/packages/storage/test/term-canon-blazegraph-oracle.test.ts +++ b/packages/storage/test/term-canon-blazegraph-oracle.test.ts @@ -187,7 +187,7 @@ describe.skipIf(!BLAZEGRAPH_URL)('term-canon cross-backend oracle: oxigraph ⇄ await expectCrossBackendLeafAgreement(vals.map((v) => lit(v, 'time'))); }); - it.fails('date / gYear / gYearMonth / gMonthDay / gMonth / gDay [KNOWN #1386 cross-backend divergence]', async () => { + it('date / gYear / gYearMonth / gMonthDay / gMonth / gDay (OT-RFC-57)', async () => { await expectCrossBackendLeafAgreement([ lit('2026-06-29', 'date'), lit('2026-06-29Z', 'date'), lit('2026-06-29+00:00', 'date'), lit('2026-06-29-00:00', 'date'), lit('2026-06-29+02:00', 'date'), From e1946cdfe807b723b31fbba527d7f1a344397e10 Mon Sep 17 00:00:00 2001 From: Branimir Rakic Date: Wed, 1 Jul 2026 13:37:07 +0200 Subject: [PATCH 4/8] fix(core): accept leading-zero years in value canon (Blazegraph strips them) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The date/gregorian CI blazegraph oracle caught one residual divergence: "02026"^^gYear — Blazegraph normalizes a leading-zero year to its value ("2026") on write, while the strict XSD YEAR pattern rejected it and kept the literal verbatim (oxigraph's behavior). Loosen YEAR to any 4+-digit year; the existing BigInt+fmtYear year normalization strips the leading zero, matching Blazegraph. Convergence oracle holds either way (canon(input) and canon(store-readback) fold to the same value). Oxigraph oracles stay green (40/40). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/crypto/term-canon.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/core/src/crypto/term-canon.ts b/packages/core/src/crypto/term-canon.ts index 6a80ab32ff..51310c10f9 100644 --- a/packages/core/src/crypto/term-canon.ts +++ b/packages/core/src/crypto/term-canon.ts @@ -427,11 +427,13 @@ function validateClock(hh: number, mi: number, ss: number, fracNorm: string): { return { rolls: false }; } -// A valid XSD year is EXACTLY 4 digits (leading zeros allowed) OR 5+ digits with -// NO leading zero. oxigraph rejects a leading-zero 5+-digit year (e.g. 09508) and -// keeps the whole literal verbatim — so we must too, or we'd normalize tz/fraction -// on a literal oxigraph leaves untouched. -const YEAR = '-?(?:\\d{4}|[1-9]\\d{4,})'; +// OT-RFC-57: the backend-independent value canon accepts any 4+-digit year (any +// number of leading zeros) and normalizes it via BigInt+fmtYear (min-4-digit, no +// leading zero). This matches Blazegraph, which on write STRIPS a leading-zero +// year to its value ("02026"^^gYear → "2026") — oxigraph instead keeps the invalid +// literal verbatim, but the CONVERGENCE oracle holds either way since canon(input) +// and canon(store-readback) both fold to the same value form (OT-RFC-57 §7.5). +const YEAR = '-?\\d{4,}'; // OT-RFC-57 backend-independent form: normalize to UTC (subtract the tz offset, // rolling the DATE across midnight), truncate fraction to ms, always emit Z. A From 609b0a10f37d5831c24e0e15f319c480163de256 Mon Sep 17 00:00:00 2001 From: Branimir Rakic Date: Wed, 1 Jul 2026 13:40:13 +0200 Subject: [PATCH 5/8] test(storage): TEMP reveal double/escaping cross-backend forms (OT-RFC-57) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flip the double/float + escaping oracle cases from it.fails to it so CI surfaces Blazegraph's exact stored + canon'd forms for the divergent cases. This commit is expected RED on those two cases — the next commit implements the canon fix and turns them green. (date/gregorian confirmation rides along in the same run.) Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/storage/test/term-canon-blazegraph-oracle.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/storage/test/term-canon-blazegraph-oracle.test.ts b/packages/storage/test/term-canon-blazegraph-oracle.test.ts index 9abf002d71..a7c456f4dc 100644 --- a/packages/storage/test/term-canon-blazegraph-oracle.test.ts +++ b/packages/storage/test/term-canon-blazegraph-oracle.test.ts @@ -205,7 +205,7 @@ describe.skipIf(!BLAZEGRAPH_URL)('term-canon cross-backend oracle: oxigraph ⇄ await expectCrossBackendLeafAgreement(['P1Y0M', 'P0Y0M'].map((v) => lit(v, 'yearMonthDuration'))); }); - it.fails('xsd:double / xsd:float [KNOWN #1386 cross-backend divergence]', async () => { + it('xsd:double / xsd:float (OT-RFC-57 REVEAL)', async () => { const dbl = ['1.0E2', '1e10', '-0.0', '3.14', '1E-7', '1.5E300', 'NaN', 'INF', '-INF', '0.1', '0.5', '100', '0', '0.0', '-2.5E-3', '6.022E23']; await expectCrossBackendLeafAgreement(dbl.map((v) => lit(v, 'double'))); await expectCrossBackendLeafAgreement(['1.0', '0.1', '3.14', '1E2', '1.5', '100', '0'].map((v) => lit(v, 'float'))); @@ -225,7 +225,7 @@ describe.skipIf(!BLAZEGRAPH_URL)('term-canon cross-backend oracle: oxigraph ⇄ await expectCrossBackendLeafAgreement(objects); }); - it.fails('literal-content escaping normalization [KNOWN #1386 cross-backend divergence]', async () => { + it('literal-content escaping normalization (OT-RFC-57 REVEAL)', async () => { await expectCrossBackendLeafAgreement([ lit('caf\\u00e9', 'string'), lit('smile\\U0001F600', 'string'), From 3d2324207d209276af6bd58e6e0830d72c96f2c5 Mon Sep 17 00:00:00 2001 From: Branimir Rakic Date: Wed, 1 Jul 2026 13:49:26 +0200 Subject: [PATCH 6/8] fix(core): fold signed-zero double to "0"; scope escaping oracle to BMP (OT-RFC-57) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI-reveal surfaced the last two cross-backend divergences: - xsd:double/float: only "-0.0" diverged — Blazegraph drops the sign on write ("0.0"→value 0) while oxigraph keeps "-0". canonDouble now emits "0" for both signed zeros; oracle case flipped green (16/16 double + 7/7 float converge). - literal escaping: 7/9 (all BMP) already converge. The 2 astral cases (😀, U+1F600) diverge because Blazegraph CORRUPTS supplementary-plane codepoints on write, truncating to the low 16 bits (U+1F600 → U+F600). That is stored-byte corruption, not a representation difference — no leaf canon can reconcile two backends physically holding different strings. Split the oracle: BMP escaping is an asserted `it` (green); astral is a documented `it.fails` tracking the Blazegraph limitation (OT-RFC-57 §7.7). Publishing astral content is a pre-existing cross-backend consensus hazard, independent of this change. All six datatype families that CAN converge now do (dateTime/time, date/gregorian, numeric incl. double, duration, escaping-BMP, verbatim). Oxigraph oracles 40/40. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/crypto/term-canon.ts | 6 ++++- .../test/term-canon-blazegraph-oracle.test.ts | 26 ++++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/packages/core/src/crypto/term-canon.ts b/packages/core/src/crypto/term-canon.ts index 51310c10f9..b53a2fccea 100644 --- a/packages/core/src/crypto/term-canon.ts +++ b/packages/core/src/crypto/term-canon.ts @@ -252,7 +252,11 @@ function canonDouble(lex: string, isFloat: boolean): string { if (Number.isNaN(n)) return 'NaN'; if (n === Infinity) return 'INF'; if (n === -Infinity) return '-INF'; - if (n === 0) return Object.is(n, -0) ? '-0' : '0'; + // OT-RFC-57: negative zero folds to "0". Blazegraph drops the sign on write + // ("-0.0"^^double → stored "0.0" → value 0), while oxigraph keeps "-0"; emitting + // "0" for both signed zeros makes canon(input) == canon(store-readback) on either + // backend. (The IEEE-754 -0/+0 distinction is not consensus-observable here.) + if (n === 0) return '0'; const neg = n < 0; const a = Math.abs(n); // double: V8's a.toString() IS the shortest round-trip; only ties need the diff --git a/packages/storage/test/term-canon-blazegraph-oracle.test.ts b/packages/storage/test/term-canon-blazegraph-oracle.test.ts index a7c456f4dc..d0438c45d0 100644 --- a/packages/storage/test/term-canon-blazegraph-oracle.test.ts +++ b/packages/storage/test/term-canon-blazegraph-oracle.test.ts @@ -205,7 +205,9 @@ describe.skipIf(!BLAZEGRAPH_URL)('term-canon cross-backend oracle: oxigraph ⇄ await expectCrossBackendLeafAgreement(['P1Y0M', 'P0Y0M'].map((v) => lit(v, 'yearMonthDuration'))); }); - it('xsd:double / xsd:float (OT-RFC-57 REVEAL)', async () => { + it('xsd:double / xsd:float (OT-RFC-57)', async () => { + // Signed zero folds to "0" on both backends (Blazegraph drops the sign on + // write; the canon now emits "0" for -0.0 to match — OT-RFC-57 §7.6). const dbl = ['1.0E2', '1e10', '-0.0', '3.14', '1E-7', '1.5E300', 'NaN', 'INF', '-INF', '0.1', '0.5', '100', '0', '0.0', '-2.5E-3', '6.022E23']; await expectCrossBackendLeafAgreement(dbl.map((v) => lit(v, 'double'))); await expectCrossBackendLeafAgreement(['1.0', '0.1', '3.14', '1E2', '1.5', '100', '0'].map((v) => lit(v, 'float'))); @@ -225,20 +227,36 @@ describe.skipIf(!BLAZEGRAPH_URL)('term-canon cross-backend oracle: oxigraph ⇄ await expectCrossBackendLeafAgreement(objects); }); - it('literal-content escaping normalization (OT-RFC-57 REVEAL)', async () => { + // BMP (≤ U+FFFF) escaping normalization converges: the value-canon decodes every + // escape to the raw character then re-emits oxigraph's minimal N-Quads escaping, + // and Blazegraph reaches the same value for any char that fits in one UTF-16 unit. + it('literal-content escaping normalization — BMP (OT-RFC-57)', async () => { await expectCrossBackendLeafAgreement([ lit('caf\\u00e9', 'string'), - lit('smile\\U0001F600', 'string'), lit('tab\\there', 'string'), lit('q\\"uote', 'string'), lit('back\\\\slash', 'string'), lit('new\\nline', 'string'), lit('ret\\rX', 'string'), - '"smile\\U0001F600"@EN', '"plain ascii"', ]); }); + // ASTRAL (> U+FFFF) is a KNOWN Blazegraph LIMITATION, not a canon gap: Blazegraph + // CORRUPTS a supplementary-plane codepoint on write, truncating it to its low 16 + // bits (\U0001F600 😀 → stored  ). The stored *value* differs from + // oxigraph's (which preserves 😀), so no leaf canon can reconcile them — the two + // backends hold genuinely different strings. Publishing astral-plane content is a + // cross-backend consensus hazard; tracked in OT-RFC-57 §7.7 (mitigation = optional + // publish-time astral reject, or accept the fork). Marked it.fails to keep the + // divergence visible without failing CI. + it.fails('literal-content escaping — ASTRAL corrupted by Blazegraph [OT-RFC-57 §7.7]', async () => { + await expectCrossBackendLeafAgreement([ + lit('smile\\U0001F600', 'string'), + '"smile\\U0001F600"@EN', + ]); + }); + it('verbatim datatypes (hexBinary / base64Binary / anyURI / custom)', async () => { await expectCrossBackendLeafAgreement([ lit('4A6f', 'hexBinary'), lit('SGk=', 'base64Binary'), lit('http://x', 'anyURI'), From 2fcf691a114e221dca23b29222abd41dad0f179b Mon Sep 17 00:00:00 2001 From: Branimir Rakic Date: Wed, 1 Jul 2026 13:53:17 +0200 Subject: [PATCH 7/8] =?UTF-8?q?docs:=20fix=20signed-zero=20oracle=20citati?= =?UTF-8?q?on=20=C2=A77.6=E2=86=92=C2=A77.5=20(comment-only)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit §7.6 is Migration; the signed-zero fold rule lives in §7.5 (Protocol value canonicalization). Matches the RFC #136 update pinning the CI-revealed rules. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/storage/test/term-canon-blazegraph-oracle.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/storage/test/term-canon-blazegraph-oracle.test.ts b/packages/storage/test/term-canon-blazegraph-oracle.test.ts index d0438c45d0..874422e2b5 100644 --- a/packages/storage/test/term-canon-blazegraph-oracle.test.ts +++ b/packages/storage/test/term-canon-blazegraph-oracle.test.ts @@ -207,7 +207,7 @@ describe.skipIf(!BLAZEGRAPH_URL)('term-canon cross-backend oracle: oxigraph ⇄ it('xsd:double / xsd:float (OT-RFC-57)', async () => { // Signed zero folds to "0" on both backends (Blazegraph drops the sign on - // write; the canon now emits "0" for -0.0 to match — OT-RFC-57 §7.6). + // write; the canon now emits "0" for -0.0 to match — OT-RFC-57 §7.5). const dbl = ['1.0E2', '1e10', '-0.0', '3.14', '1E-7', '1.5E300', 'NaN', 'INF', '-INF', '0.1', '0.5', '100', '0', '0.0', '-2.5E-3', '6.022E23']; await expectCrossBackendLeafAgreement(dbl.map((v) => lit(v, 'double'))); await expectCrossBackendLeafAgreement(['1.0', '0.1', '3.14', '1E2', '1.5', '100', '0'].map((v) => lit(v, 'float'))); From de5fd9701936941091321589876df2c5cc3a462b Mon Sep 17 00:00:00 2001 From: Branimir Rakic Date: Wed, 1 Jul 2026 15:52:18 +0200 Subject: [PATCH 8/8] =?UTF-8?q?fix(core):=20address=20otReviewAgent=20?= =?UTF-8?q?=E2=80=94=20validate=20normalized=20temporal=20value=20+=20don'?= =?UTF-8?q?t=20collapse=20bare-gregorian=20offsets?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two consensus bugs on the value-canon: 🔴 Range guard ran on the LEXICAL components, before the UTC/T24 normalization — so a boundary literal could pass validation and then be shifted OUTSIDE the i128 seconds range, emitting a leaf for a value the store can't represent stably (e.g. "5391559471919-03-30T14:00:00-14:00" → past max second). Now canonDateTime / canonDate / canonGYear / canonGYearMonth range-check the NORMALIZED UTC instant; out of range → verbatim. 🔴 Bare gregorian types (gMonthDay/gMonth/gDay) stripped ANY timezone, silently COLLAPSING distinct values ("--06-29+14:00" and "--06-29-14:00" → same leaf). Now fold only a UTC-equivalent zone (Z/+00:00/-00:00 → no-tz value form); a non-UTC offset is kept VERBATIM so distinct literals stay distinct (factored into a shared bareGregorian helper; the "parse-then-ignore" coupling is gone). Plus otReviewAgent 🟡s: rewrote the blazegraph-oracle's stale "known divergence / it.fails" narrative (the divergence is RESOLVED — cases are asserted `it` now, only 2.1.5-escape astral remains it.fails), and added negative-offset (-05:00) coverage to the dateTime/time/date batteries. Pure-canon unit tests for both bugs added (overflow→verbatim; non-UTC offsets distinct + verbatim; UTC-equivalent folds). oxigraph oracles 42/42; blazegraph oracle validated locally against a live server (15/16; the 1 is the astral it.fails inverting on 2.1.6, which fixed the escape bug CI's 2.1.5 still exercises). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/crypto/term-canon.ts | 60 +++++++++++-------- .../test/term-canon-exhaustive.test.ts | 34 +++++++++++ .../test/term-canon-blazegraph-oracle.test.ts | 39 +++++------- 3 files changed, 85 insertions(+), 48 deletions(-) diff --git a/packages/core/src/crypto/term-canon.ts b/packages/core/src/crypto/term-canon.ts index b53a2fccea..afa679bb32 100644 --- a/packages/core/src/crypto/term-canon.ts +++ b/packages/core/src/crypto/term-canon.ts @@ -453,7 +453,6 @@ function canonDateTime(lex: string): string { const ddN = +dd; if (moN < 1 || moN > 12) throw new Error('month'); if (ddN < 1 || ddN > daysInMonth(yy, moN)) throw new Error('day'); - if (!temporalInRange(yy, moN, ddN, +hh, +mi, +ss)) throw new Error('year overflows i128 seconds'); const fracNorm = normFrac(frac); const { rolls } = validateClock(+hh, +mi, +ss, fracNorm); // Base date as a day count; a T24:00 clock rolls one day and resets the hour to 0. @@ -465,6 +464,12 @@ function canonDateTime(lex: string): string { days += BigInt(Math.floor(totalMin / 1440)); const minInDay = ((totalMin % 1440) + 1440) % 1440; const { y, m: mm, d } = civilFromDays(days); + // Range-check the NORMALIZED UTC instant, not the lexical components: a tz offset + // or T24 roll can push a boundary value outside the i128 seconds range it would + // otherwise pass, emitting a leaf for a value the store can't represent stably + // (otReviewAgent). Out of range → verbatim (throw, caught upstream). + if (!temporalInRange(y.toString(), Number(mm), Number(d), Math.floor(minInDay / 60), minInDay % 60, +ss)) + throw new Error('normalized dateTime overflows i128 seconds'); return `${fmtYear(y)}-${pad2(Number(mm))}-${pad2(Number(d))}T${pad2(Math.floor(minInDay / 60))}:${pad2(minInDay % 60)}:${ss}${fracNorm}Z`; } @@ -492,16 +497,18 @@ function canonDate(lex: string): string { const ddN = +m[3]; if (moN < 1 || moN > 12) throw new Error('month'); if (ddN < 1 || ddN > daysInMonth(m[1], moN)) throw new Error('day'); - if (!temporalInRange(m[1], moN, ddN)) throw new Error('year overflows i128 seconds'); const { y, m: mm, d } = utcDateFromMidnight(BigInt(m[1]), BigInt(moN), BigInt(ddN), offsetMin); + // Validate the NORMALIZED date (the tz roll can cross the year boundary) — see canonDateTime. + if (!temporalInRange(y.toString(), Number(mm), Number(d))) throw new Error('normalized date overflows i128 seconds'); return `${fmtYear(y)}-${pad2(Number(mm))}-${pad2(Number(d))}`; } function canonGYear(lex: string): string { const { body, offsetMin } = splitTzToOffset(lex); if (!new RegExp(`^${YEAR}$`).test(body)) throw new Error('invalid xsd:gYear'); - if (!temporalInRange(body, 1, 1)) throw new Error('year overflows i128 seconds'); - const { y } = utcDateFromMidnight(BigInt(body), 1n, 1n, offsetMin); + const { y, m: mm, d } = utcDateFromMidnight(BigInt(body), 1n, 1n, offsetMin); + // Validate the NORMALIZED date (a negative offset can roll 01-01 into the prior year). + if (!temporalInRange(y.toString(), Number(mm), Number(d))) throw new Error('normalized gYear overflows i128 seconds'); return fmtYear(y); } @@ -509,8 +516,9 @@ function canonGYearMonth(lex: string): string { const { body, offsetMin } = splitTzToOffset(lex); const m = new RegExp(`^(${YEAR})-(\\d{2})$`).exec(body); if (!m || +m[2] < 1 || +m[2] > 12) throw new Error('invalid xsd:gYearMonth'); - if (!temporalInRange(m[1], +m[2], 1)) throw new Error('year overflows i128 seconds'); - const { y, m: mm } = utcDateFromMidnight(BigInt(m[1]), BigInt(+m[2]), 1n, offsetMin); + const { y, m: mm, d } = utcDateFromMidnight(BigInt(m[1]), BigInt(+m[2]), 1n, offsetMin); + // Validate the NORMALIZED date (the tz roll can cross the year boundary). + if (!temporalInRange(y.toString(), Number(mm), Number(d))) throw new Error('normalized gYearMonth overflows i128 seconds'); return `${fmtYear(y)}-${pad2(Number(mm))}`; } @@ -518,32 +526,34 @@ function canonGYearMonth(lex: string): string { // reference year, so --02-29 is rejected (kept verbatim) — February's max is 28 // here, unlike a real leap date which needs the year context of xsd:date. const MONTH_MAX_DAY = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]; -// OT-RFC-57: gMonthDay / gMonth / gDay have no year to convert, so a timezone is -// just STRIPPED (Blazegraph's value form). NB the oracle battery only exercises -// Z/+00:00 here; a non-UTC offset on these bare types is undefined across backends -// and not consensus-verified — see OT-RFC-57 §7.8. +// OT-RFC-57: gMonthDay / gMonth / gDay have no year/date context to convert a +// timezone into UTC. We therefore fold ONLY a UTC-equivalent zone (Z / +00:00 / +// -00:00 → offsetMin 0) to the no-timezone value form. A NON-UTC offset is kept +// VERBATIM (the whole literal, offset included): stripping it would silently +// COLLAPSE distinct values — "--06-29+14:00" and "--06-29-14:00" are different +// literals — onto one leaf (otReviewAgent). Verbatim keeps them distinct and defers +// to the store's own preservation; such exotic offsets on bare gregorian types are +// vanishingly rare and out of the consensus-verified set (see OT-RFC-57 §7.8). +function bareGregorian(lex: string, re: RegExp, validate: (m: RegExpExecArray) => boolean): string { + const { body, offsetMin } = splitTzToOffset(lex); + const m = re.exec(body); + if (!m || !validate(m)) throw new Error('invalid bare gregorian'); + return offsetMin === 0 ? body : lex; // fold UTC-equivalent zone only; else verbatim +} function canonGMonthDay(lex: string): string { - const { body } = splitTzToOffset(lex); - const m = /^--(\d{2})-(\d{2})$/.exec(body); - if (!m) throw new Error('invalid xsd:gMonthDay'); - const moN = +m[1]; - const ddN = +m[2]; - if (moN < 1 || moN > 12 || ddN < 1 || ddN > MONTH_MAX_DAY[moN - 1]) throw new Error('range'); - return body; + return bareGregorian(lex, /^--(\d{2})-(\d{2})$/, (m) => { + const moN = +m[1]; + const ddN = +m[2]; + return moN >= 1 && moN <= 12 && ddN >= 1 && ddN <= MONTH_MAX_DAY[moN - 1]; + }); } function canonGMonth(lex: string): string { - const { body } = splitTzToOffset(lex); - const m = /^--(\d{2})$/.exec(body); - if (!m || +m[1] < 1 || +m[1] > 12) throw new Error('invalid xsd:gMonth'); - return body; + return bareGregorian(lex, /^--(\d{2})$/, (m) => +m[1] >= 1 && +m[1] <= 12); } function canonGDay(lex: string): string { - const { body } = splitTzToOffset(lex); - const m = /^---(\d{2})$/.exec(body); - if (!m || +m[1] < 1 || +m[1] > 31) throw new Error('invalid xsd:gDay'); - return body; + return bareGregorian(lex, /^---(\d{2})$/, (m) => +m[1] >= 1 && +m[1] <= 31); } // ── xsd:duration / dayTimeDuration / yearMonthDuration ───────────────────────── diff --git a/packages/publisher/test/term-canon-exhaustive.test.ts b/packages/publisher/test/term-canon-exhaustive.test.ts index ecc6372f2b..633d5db3ae 100644 --- a/packages/publisher/test/term-canon-exhaustive.test.ts +++ b/packages/publisher/test/term-canon-exhaustive.test.ts @@ -134,3 +134,37 @@ describe('term-canon EXHAUSTIVE', () => { await proveParity('numeric boundaries', objs); }); }); + +// Pure-canon consensus assertions for the two otReviewAgent #1399 findings — no +// oxigraph round-trip (these exercise exotic inputs the store may reject; the point +// is the canonicalizer's own deterministic behavior). +describe('term-canon OT-RFC-57 edge cases (otReviewAgent #1399)', () => { + const canon = canonicalizeObjectTermForHash; + + it('dateTime overflowing i128 seconds AFTER the tz shift is kept verbatim (not a UTC leaf)', () => { + // Local components pass the range check, but subtracting -14:00 pushes the UTC + // instant past the max representable second — must fall back to verbatim. + const over = lit('5391559471919-03-30T14:00:00-14:00', 'dateTime'); + expect(canon(over)).toBe(over); + // A near-boundary value that stays in range after folding still normalizes. + expect(canon(lit('2026-06-29T12:00:00-14:00', 'dateTime'))).toBe(lit('2026-06-30T02:00:00Z', 'dateTime')); + }); + + it('bare gregorian: UTC-equivalent zone folds; a non-UTC offset stays verbatim + distinct', () => { + // Z / +00:00 / -00:00 fold to the no-timezone value form. + for (const z of ['Z', '+00:00', '-00:00']) { + expect(canon(lit(`--06-29${z}`, 'gMonthDay'))).toBe(lit('--06-29', 'gMonthDay')); + } + // Non-UTC offsets must NOT collapse onto one leaf (the bug): kept verbatim + distinct. + const plus = lit('--06-29+14:00', 'gMonthDay'); + const minus = lit('--06-29-14:00', 'gMonthDay'); + expect(canon(plus)).toBe(plus); + expect(canon(minus)).toBe(minus); + expect(canon(plus)).not.toBe(canon(minus)); + // gMonth / gDay likewise. + expect(canon(lit('--06+05:00', 'gMonth'))).toBe(lit('--06+05:00', 'gMonth')); + expect(canon(lit('--06Z', 'gMonth'))).toBe(lit('--06', 'gMonth')); + expect(canon(lit('---29-05:00', 'gDay'))).toBe(lit('---29-05:00', 'gDay')); + expect(canon(lit('---29Z', 'gDay'))).toBe(lit('---29', 'gDay')); + }); +}); diff --git a/packages/storage/test/term-canon-blazegraph-oracle.test.ts b/packages/storage/test/term-canon-blazegraph-oracle.test.ts index 874422e2b5..19895c0c8a 100644 --- a/packages/storage/test/term-canon-blazegraph-oracle.test.ts +++ b/packages/storage/test/term-canon-blazegraph-oracle.test.ts @@ -142,33 +142,26 @@ describe.skipIf(!BLAZEGRAPH_URL)('term-canon cross-backend oracle: oxigraph ⇄ }); // ─────────────────────────────────────────────────────────────────────────── - // KNOWN CROSS-BACKEND DIVERGENCE — #1386 canon is NOT cross-backend safe. + // CROSS-BACKEND AGREEMENT (OT-RFC-57) — RESOLVED, and now asserted as `it`. // - // This oracle DETECTED a real divergence (that is its job). Blazegraph - // normalizes these datatypes into a different lexical form than oxigraph — - // e.g. a timezone-less `"2026-06-29T12:00:00"` is STORED by Blazegraph as - // `"2026-06-29T12:00:00.000Z"` (adds `Z` + `.000`) — and #1386's oxigraph-tuned - // `canonicalizeObjectTermForHash` does NOT reconcile the difference. Because the - // RS extractor hashes STORE-EMITTED terms (packages/random-sampling/src/ - // ka-extractor.ts:397 `triples.map(t => hashTripleV10(t.subject,t.predicate, - // t.object))`), a Blazegraph-backed node computes a DIFFERENT V10 merkle leaf - // for the same triple than an oxigraph node → RandomSampling FORK the moment a - // non-oxigraph node joins. This is a latent consensus bug in #1386 (the canon - // was validated against oxigraph 0.5.5 only); fixing it is a coordinated - // dkg-core consensus change with migration implications — OUT OF SCOPE for this - // test-coverage PR. - // - // These cases are marked `it.fails` so the divergence stays TRACKED and CI - // stays green: each currently throws (divergence exists) ⇒ `it.fails` passes. - // If the canon is ever made cross-backend safe, they flip to FAILING — forcing - // whoever fixed it to remove the marker. Affected: xsd:dateTime, xsd:time, - // date/gregorian, some xsd:double/float, some escaped string content. The - // AGREEING datatypes above assert real cross-backend agreement. + // This oracle originally DETECTED a real divergence: Blazegraph normalizes these + // datatypes into a different lexical form than oxigraph — e.g. a timezone-less + // `"2026-06-29T12:00:00"` is STORED by Blazegraph as `"2026-06-29T12:00:00.000Z"` + // (adds `Z` + `.000`), and a positive offset is shifted to UTC. The #1386 canon + // (oxigraph-tuned) did not reconcile that; the RS extractor hashes STORE-EMITTED + // terms (packages/random-sampling/src/ka-extractor.ts), so a Blazegraph node + // would compute a DIFFERENT V10 leaf → RandomSampling fork. OT-RFC-57's backend- + // independent value-canon FIXES this: every case below now asserts real cross- + // backend agreement (`canon(store_readback)` converges to `canon(input)` on both + // backends). The ONLY remaining `it.fails` is astral (> U+FFFF) via a `\U…` + // ESCAPE insert — a Blazegraph stored-value corruption no leaf canon can + // reconcile (OT-RFC-57 §7.7; note the DKG daemon's raw-UTF-8 path is not affected). // ─────────────────────────────────────────────────────────────────────────── it('xsd:dateTime fractional-seconds + timezone (OT-RFC-57)', async () => { const vals = [ '2026-06-29T12:00:00', '2026-06-29T12:00:00.0', '2026-06-29T12:00:00.500', '2026-06-29T12:00:00.000', '2026-06-29T12:00:00Z', '2026-06-29T12:00:00+00:00', '2026-06-29T12:00:00-00:00', '2026-06-29T12:00:00+02:00', + '2026-06-29T12:00:00-05:00', '2026-06-29T23:00:00-05:00', // negative offset (incl. one that rolls the date forward) '2026-06-29T12:00:00.120Z', '2026-06-29T12:00:00.123456', ]; await expectCrossBackendLeafAgreement(vals.map((v) => lit(v, 'dateTime'))); @@ -183,14 +176,14 @@ describe.skipIf(!BLAZEGRAPH_URL)('term-canon cross-backend oracle: oxigraph ⇄ }); it('xsd:time (OT-RFC-57)', async () => { - const vals = ['12:00:00', '12:00:00.0', '12:00:00.500', '12:00:00Z', '12:00:00+00:00', '12:00:00-00:00', '12:00:00+02:00', '24:00:00', '24:00:00Z']; + const vals = ['12:00:00', '12:00:00.0', '12:00:00.500', '12:00:00Z', '12:00:00+00:00', '12:00:00-00:00', '12:00:00+02:00', '12:00:00-05:00', '24:00:00', '24:00:00Z']; await expectCrossBackendLeafAgreement(vals.map((v) => lit(v, 'time'))); }); it('date / gYear / gYearMonth / gMonthDay / gMonth / gDay (OT-RFC-57)', async () => { await expectCrossBackendLeafAgreement([ lit('2026-06-29', 'date'), lit('2026-06-29Z', 'date'), lit('2026-06-29+00:00', 'date'), - lit('2026-06-29-00:00', 'date'), lit('2026-06-29+02:00', 'date'), + lit('2026-06-29-00:00', 'date'), lit('2026-06-29+02:00', 'date'), lit('2026-06-29-05:00', 'date'), lit('2026', 'gYear'), lit('2026+00:00', 'gYear'), lit('2026+02:00', 'gYear'), lit('02026', 'gYear'), lit('2026-06', 'gYearMonth'), lit('2026-06+00:00', 'gYearMonth'), lit('--06-29', 'gMonthDay'), lit('--06-29+00:00', 'gMonthDay'),