OriginTrail · branarakic · Jul 1, 2026 · Jul 1, 2026 · Jul 1, 2026 · Jul 1, 2026
@@ -332,7 +332,12 @@ jobs:
           # Stock image ships the default `kb` namespace; that's all the
           # adapter needs (it isolates per-test via unique GRAPH IRIs).
           BLAZEGRAPH_TEST_URL: http://127.0.0.1:9999/bigdata/namespace/kb/sparql
-        run: pnpm --filter @origintrail-official/dkg-storage exec vitest run test/blazegraph.integration.test.ts
+        # term-canon-blazegraph-oracle: cross-backend V10 leaf agreement (OT-RFC-57)
+        # — proves an oxigraph node and a Blazegraph node compute the SAME merkle
+        # leaf for the same typed literal (else RandomSampling forks). dateTime/time
+        # are fixed here; date/gregorian/double/escaping remain it.fails pending
+        # the rest of the backend-independent canon.
+        run: pnpm --filter @origintrail-official/dkg-storage exec vitest run test/blazegraph.integration.test.ts test/term-canon-blazegraph-oracle.test.ts
 
   # ------------------------------------------------------------------
   # Tornado publisher lane — sharded across 4 parallel runners.

diff --git a/packages/core/src/crypto/term-canon.ts b/packages/core/src/crypto/term-canon.ts
@@ -159,10 +159,6 @@ function decodeIriEscapes(iri: string): string {
   });
 }
 
-// oxigraph normalizes the "negative zero" year -0000 to 0000. (Only -0000 reaches
-// here: a leading-zero 5+-digit negative year fails the YEAR pattern → verbatim.)
-const normYear = (yy: string) => (yy === '-0000' ? '0000' : yy);
-
 // oxigraph stores temporal values as seconds-since-0001-01-01 in the same i128/1e18
 // Decimal as xsd:decimal/duration. A date/time whose scaled seconds overflow i128
 // fails to parse and is kept VERBATIM, so a foldable timezone / T24 roll / fraction
@@ -177,6 +173,37 @@ function daysFromCivil(y: bigint, m: bigint, d: bigint): bigint {
   const doe = yoe * 365n + yoe / 4n - yoe / 100n + doy;
   return era * 146097n + doe - 719468n;
 }
+// Inverse of daysFromCivil: proleptic-Gregorian (y,m,d) from a signed day count
+// (days since 1970-01-01). Standard Howard Hinnant algorithm. Used to roll the
+// DATE when a timezone offset pushes a dateTime across midnight during the
+// backend-independent UTC normalization (OT-RFC-57).
+function civilFromDays(zIn: bigint): { y: bigint; m: bigint; d: bigint } {
+  const z = zIn + 719468n;
+  const era = (z >= 0n ? z : z - 146096n) / 146097n;
+  const doe = z - era * 146097n; // [0, 146096]
+  const yoe = (doe - doe / 1460n + doe / 36524n - doe / 146096n) / 365n; // [0, 399]
+  const y = yoe + era * 400n;
+  const doy = doe - (365n * yoe + yoe / 4n - yoe / 100n); // [0, 365]
+  const mp = (5n * doy + 2n) / 153n; // [0, 11]
+  const d = doy - (153n * mp + 2n) / 5n + 1n; // [1, 31]
+  const m = mp < 10n ? mp + 3n : mp - 9n; // [1, 12]
+  return { y: m <= 2n ? y + 1n : y, m, d };
+}
+
+// OT-RFC-57: the UTC date of "midnight in the given tz" — the backend-independent
+// form for xsd:date / gYear / gYearMonth. Blazegraph interprets the value at 00:00
+// in its tz, converts to UTC, and takes the UTC date; a positive offset rolls the
+// date back a day. offsetMin=0 (Z / no-tz) ⇒ the date is unchanged.
+function utcDateFromMidnight(
+  y: bigint,
+  mo: bigint,
+  d: bigint,
+  offsetMin: number,
+): { y: bigint; m: bigint; d: bigint } {
+  const days = daysFromCivil(y, mo, d) + BigInt(Math.floor((0 - offsetMin) / 1440));
+  return civilFromDays(days);
+}
+
 function temporalInRange(yearStr: string, mo: number, dd: number, hh = 0, mi = 0, ss = 0): boolean {
   const seconds =
     (daysFromCivil(BigInt(yearStr), BigInt(mo), BigInt(dd)) + 719162n) * 86400n +
@@ -225,7 +252,11 @@ function canonDouble(lex: string, isFloat: boolean): string {
   if (Number.isNaN(n)) return 'NaN';
   if (n === Infinity) return 'INF';
   if (n === -Infinity) return '-INF';
-  if (n === 0) return Object.is(n, -0) ? '-0' : '0';
+  // OT-RFC-57: negative zero folds to "0". Blazegraph drops the sign on write
+  // ("-0.0"^^double → stored "0.0" → value 0), while oxigraph keeps "-0"; emitting
+  // "0" for both signed zeros makes canon(input) == canon(store-readback) on either
+  // backend. (The IEEE-754 -0/+0 distinction is not consensus-observable here.)
+  if (n === 0) return '0';
   const neg = n < 0;
   const a = Math.abs(n);
   // double: V8's a.toString() IS the shortest round-trip; only ties need the
@@ -329,27 +360,31 @@ function stripTrailingZeros(s: string): string {
 }
 
 // ── date/time family ───────────────────────────────────────────────────────────
-// Split + validate the trailing timezone, folding +00:00/-00:00 to Z. oxigraph
-// accepts Z or ±HH:MM with |offset| ≤ 14:00 (HH≤14, MM≤59, total ≤ 840 min);
-// anything else (incl. a malformed +0:00) leaves the timezone in `body`, where the
-// per-type grammar then rejects it → the whole literal is kept verbatim.
-function splitTz(s: string): { body: string; tz: string } {
+// Returns the offset MAGNITUDE in minutes (signed) for the
+// backend-independent UTC normalization of xsd:dateTime/xsd:time (OT-RFC-57).
+// hadTz=false ⇒ no timezone present (a bare dateTime is normalized to UTC and
+// gains a Z, matching Blazegraph/Neptune). Malformed/out-of-range tz → throw
+// (→ the literal is kept verbatim, as oxigraph does).
+function splitTzToOffset(s: string): { body: string; offsetMin: number; hadTz: boolean } {
   const m = /(Z|[+-]\d{2}:\d{2})$/.exec(s);
-  if (!m) return { body: s, tz: '' };
+  if (!m) return { body: s, offsetMin: 0, hadTz: false };
   const tz = m[1];
   const body = s.slice(0, s.length - tz.length);
-  if (tz === 'Z') return { body, tz: 'Z' };
+  if (tz === 'Z') return { body, offsetMin: 0, hadTz: true };
   const h = parseInt(tz.slice(1, 3), 10);
   const mi = parseInt(tz.slice(4, 6), 10);
   if (mi > 59 || h * 60 + mi > 840) throw new Error(`invalid tz: ${tz}`);
-  return { body, tz: tz === '+00:00' || tz === '-00:00' ? 'Z' : tz };
+  const mag = h * 60 + mi;
+  return { body, offsetMin: tz[0] === '-' ? -mag : mag, hadTz: true };
 }
 
-// Normalize a fractional-seconds group ('.ddd' or undefined): strip trailing
-// zeros, drop entirely if it becomes empty.
+// Normalize a fractional-seconds group ('.ddd' or undefined): TRUNCATE to at most
+// 3 digits (milliseconds — the backend-independent precision floor; a lossy store
+// such as Blazegraph keeps only ms), then strip trailing zeros; drop entirely if
+// empty. Truncate, NOT round (matches Blazegraph). (OT-RFC-57)
 function normFrac(frac: string | undefined): string {
   if (frac === undefined) return '';
-  const d = frac.slice(1).replace(/0+$/, '');
+  const d = frac.slice(1, 4).replace(/0+$/, ''); // at most 3 digits, then strip trailing zeros
   return d === '' ? '' : `.${d}`;
 }
 
@@ -396,14 +431,21 @@ function validateClock(hh: number, mi: number, ss: number, fracNorm: string): {
   return { rolls: false };
 }
 
-// A valid XSD year is EXACTLY 4 digits (leading zeros allowed) OR 5+ digits with
-// NO leading zero. oxigraph rejects a leading-zero 5+-digit year (e.g. 09508) and
-// keeps the whole literal verbatim — so we must too, or we'd normalize tz/fraction
-// on a literal oxigraph leaves untouched.
-const YEAR = '-?(?:\\d{4}|[1-9]\\d{4,})';
-
+// OT-RFC-57: the backend-independent value canon accepts any 4+-digit year (any
+// number of leading zeros) and normalizes it via BigInt+fmtYear (min-4-digit, no
+// leading zero). This matches Blazegraph, which on write STRIPS a leading-zero
+// year to its value ("02026"^^gYear → "2026") — oxigraph instead keeps the invalid
+// literal verbatim, but the CONVERGENCE oracle holds either way since canon(input)
+// and canon(store-readback) both fold to the same value form (OT-RFC-57 §7.5).
+const YEAR = '-?\\d{4,}';
+
+// OT-RFC-57 backend-independent form: normalize to UTC (subtract the tz offset,
+// rolling the DATE across midnight), truncate fraction to ms, always emit Z. A
+// no-timezone dateTime is treated as UTC and gains a Z (matching Blazegraph /
+// Neptune). This is the value-space form the publisher's input AND every
+// backend's read-back converge to.
 function canonDateTime(lex: string): string {
-  const { body, tz } = splitTz(lex);
+  const { body, offsetMin } = splitTzToOffset(lex);
   const m = new RegExp(`^(${YEAR})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2}):(\\d{2})(\\.\\d+)?$`).exec(body);
   if (!m) throw new Error('invalid xsd:dateTime');
   const [, yy, mo, dd, hh, mi, ss, frac] = m;
@@ -413,90 +455,95 @@ function canonDateTime(lex: string): string {
   if (ddN < 1 || ddN > daysInMonth(yy, moN)) throw new Error('day');
   if (!temporalInRange(yy, moN, ddN, +hh, +mi, +ss)) throw new Error('year overflows i128 seconds');
   const fracNorm = normFrac(frac);
-  if (fracNorm.length - 1 > 18) throw new Error('sub-1e-18 seconds'); // oxigraph stores ≤18 frac digits
   const { rolls } = validateClock(+hh, +mi, +ss, fracNorm);
-  if (rolls) {
-    return `${rollNextDay(yy, moN, ddN)}T00:${mi}:${ss}${fracNorm}${tz}`;
-  }
-  // KNOWN oxigraph 0.5.5 DEFECT (documented, NOT mirrored): a BEFORE-EPOCH dateTime
-  // (before 0001-01-01T00:00:00, i.e. year ≤ 0000) with seconds == 59 AND a non-zero
-  // fraction has its minute bumped by +1 on every load→serialize round-trip. Far
-  // before the epoch it never stabilises (-1711-…T15:19:59.6 → :20:59.6 → :21:59.6 →
-  // …); near it the bump just crosses into year 0001 once. Either way the store has
-  // no stable form for these, so no canonicalization can make them consensus-safe.
-  // We deliberately do NOT replicate the bump: canon stays DETERMINISTIC + IDEMPOTENT
-  // (the best achievable), normalising tz/fraction like any other dateTime and
-  // leaving the wall-clock untouched. Residual exposure = a pre-existing oxigraph
-  // storage defect for an essentially-nonexistent input class (BCE / year-0 timestamps
-  // at :59 with sub-second precision) — escalated to the store layer, off this canon.
-  return `${normYear(yy)}-${mo}-${dd}T${hh}:${mi}:${ss}${fracNorm}${tz}`;
+  // Base date as a day count; a T24:00 clock rolls one day and resets the hour to 0.
+  let days = daysFromCivil(BigInt(yy), BigInt(moN), BigInt(ddN));
+  const hourN = rolls ? 0 : +hh;
+  if (rolls) days += 1n;
+  // UTC: subtract the offset (whole minutes); roll the date across midnight.
+  const totalMin = hourN * 60 + +mi - offsetMin;
+  days += BigInt(Math.floor(totalMin / 1440));
+  const minInDay = ((totalMin % 1440) + 1440) % 1440;
+  const { y, m: mm, d } = civilFromDays(days);
+  return `${fmtYear(y)}-${pad2(Number(mm))}-${pad2(Number(d))}T${pad2(Math.floor(minInDay / 60))}:${pad2(minInDay % 60)}:${ss}${fracNorm}Z`;
 }
 
+// OT-RFC-57: time has no date, so a tz offset just wraps the wall clock mod 24h;
+// normalize to UTC + Z, ms-truncated.
 function canonTime(lex: string): string {
-  const { body, tz } = splitTz(lex);
+  const { body, offsetMin } = splitTzToOffset(lex);
   const m = /^(\d{2}):(\d{2}):(\d{2})(\.\d+)?$/.exec(body);
   if (!m) throw new Error('invalid xsd:time');
   const [, hh, mi, ss, frac] = m;
   const fracNorm = normFrac(frac);
-  if (fracNorm.length - 1 > 18) throw new Error('sub-1e-18 seconds');
   const { rolls } = validateClock(+hh, +mi, +ss, fracNorm);
-  // time has no date to roll; hour 24 → 00 of the same wall clock.
-  return `${rolls ? '00' : hh}:${mi}:${ss}${fracNorm}${tz}`;
+  const hourN = rolls ? 0 : +hh;
+  const minInDay = (((hourN * 60 + +mi - offsetMin) % 1440) + 1440) % 1440;
+  return `${pad2(Math.floor(minInDay / 60))}:${pad2(minInDay % 60)}:${ss}${fracNorm}Z`;
 }
 
+// OT-RFC-57: xsd:date / gYear / gYearMonth normalize to the UTC date of
+// midnight-in-tz, with NO timezone emitted (Blazegraph's value form).
 function canonDate(lex: string): string {
-  const { body, tz } = splitTz(lex);
+  const { body, offsetMin } = splitTzToOffset(lex);
   const m = new RegExp(`^(${YEAR})-(\\d{2})-(\\d{2})$`).exec(body);
   if (!m) throw new Error('invalid xsd:date');
   const moN = +m[2];
   const ddN = +m[3];
   if (moN < 1 || moN > 12) throw new Error('month');
   if (ddN < 1 || ddN > daysInMonth(m[1], moN)) throw new Error('day');
   if (!temporalInRange(m[1], moN, ddN)) throw new Error('year overflows i128 seconds');
-  return `${normYear(m[1])}-${m[2]}-${m[3]}${tz}`;
+  const { y, m: mm, d } = utcDateFromMidnight(BigInt(m[1]), BigInt(moN), BigInt(ddN), offsetMin);
+  return `${fmtYear(y)}-${pad2(Number(mm))}-${pad2(Number(d))}`;
 }
 
 function canonGYear(lex: string): string {
-  const { body, tz } = splitTz(lex);
+  const { body, offsetMin } = splitTzToOffset(lex);
   if (!new RegExp(`^${YEAR}$`).test(body)) throw new Error('invalid xsd:gYear');
   if (!temporalInRange(body, 1, 1)) throw new Error('year overflows i128 seconds');
-  return `${normYear(body)}${tz}`;
+  const { y } = utcDateFromMidnight(BigInt(body), 1n, 1n, offsetMin);
+  return fmtYear(y);
 }
 
 function canonGYearMonth(lex: string): string {
-  const { body, tz } = splitTz(lex);
+  const { body, offsetMin } = splitTzToOffset(lex);
   const m = new RegExp(`^(${YEAR})-(\\d{2})$`).exec(body);
   if (!m || +m[2] < 1 || +m[2] > 12) throw new Error('invalid xsd:gYearMonth');
   if (!temporalInRange(m[1], +m[2], 1)) throw new Error('year overflows i128 seconds');
-  return `${normYear(m[1])}-${m[2]}${tz}`;
+  const { y, m: mm } = utcDateFromMidnight(BigInt(m[1]), BigInt(+m[2]), 1n, offsetMin);
+  return `${fmtYear(y)}-${pad2(Number(mm))}`;
 }
 
 // gMonthDay day bounds. oxigraph 0.5.5 validates --MM-DD against a NON-leap
 // reference year, so --02-29 is rejected (kept verbatim) — February's max is 28
 // here, unlike a real leap date which needs the year context of xsd:date.
 const MONTH_MAX_DAY = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
+// OT-RFC-57: gMonthDay / gMonth / gDay have no year to convert, so a timezone is
+// just STRIPPED (Blazegraph's value form). NB the oracle battery only exercises
+// Z/+00:00 here; a non-UTC offset on these bare types is undefined across backends
+// and not consensus-verified — see OT-RFC-57 §7.8.
 function canonGMonthDay(lex: string): string {
-  const { body, tz } = splitTz(lex);
+  const { body } = splitTzToOffset(lex);
   const m = /^--(\d{2})-(\d{2})$/.exec(body);
   if (!m) throw new Error('invalid xsd:gMonthDay');
   const moN = +m[1];
   const ddN = +m[2];
   if (moN < 1 || moN > 12 || ddN < 1 || ddN > MONTH_MAX_DAY[moN - 1]) throw new Error('range');
-  return `${body}${tz}`;
+  return body;
 }
 
 function canonGMonth(lex: string): string {
-  const { body, tz } = splitTz(lex);
+  const { body } = splitTzToOffset(lex);
   const m = /^--(\d{2})$/.exec(body);
   if (!m || +m[1] < 1 || +m[1] > 12) throw new Error('invalid xsd:gMonth');
-  return `${body}${tz}`;
+  return body;
 }
 
 function canonGDay(lex: string): string {
-  const { body, tz } = splitTz(lex);
+  const { body } = splitTzToOffset(lex);
   const m = /^---(\d{2})$/.exec(body);
   if (!m || +m[1] < 1 || +m[1] > 31) throw new Error('invalid xsd:gDay');
-  return `${body}${tz}`;
+  return body;
 }
 
 // ── xsd:duration / dayTimeDuration / yearMonthDuration ─────────────────────────

diff --git a/packages/publisher/test/term-canon-exhaustive.test.ts b/packages/publisher/test/term-canon-exhaustive.test.ts
@@ -30,16 +30,21 @@ async function oxiForms(objects: string[]): Promise<string[]> {
 async function proveParity(label: string, objects: string[]): Promise<void> {
   const oxi = await oxiForms(objects);
   const mismatches: string[] = [];
-  // forward parity
+  // OT-RFC-57: the canon is now a backend-INDEPENDENT value canon — for temporal
+  // types it emits the UTC value form, NOT oxigraph's preserved lexical form. So
+  // the old "core == oxigraph" identity no longer holds. Assert CONVERGENCE
+  // (canon(oxigraph_readback) == canon(input)) — the property consensus needs —
+  // and true idempotence (canon(canon(x)) == canon(x)).
   objects.forEach((obj, i) => {
-    const got = canonicalizeObjectTermForHash(obj);
-    if (got !== oxi[i]) mismatches.push(`FWD in=${obj}\n   core=${got}\n   oxi =${oxi[i]}`);
-  });
-  // no-migration: core is the identity on oxigraph's own canonical output
-  oxi.forEach((o) => {
-    if (o === '(DROPPED)') return;
-    const re = canonicalizeObjectTermForHash(o);
-    if (re !== o) mismatches.push(`IDEMPOTENCE BROKEN oxi=${o}\n   core(oxi)=${re}`);
+    if (oxi[i] === '(DROPPED)') return;
+    const canonIn = canonicalizeObjectTermForHash(obj);
+    const canonOxi = canonicalizeObjectTermForHash(oxi[i]);
+    if (canonIn !== canonOxi) {
+      mismatches.push(`CONVERGENCE in=${obj}\n   canon(in) =${canonIn}\n   canon(oxi ${oxi[i]})=${canonOxi}`);
+    }
+    if (canonicalizeObjectTermForHash(canonIn) !== canonIn) {
+      mismatches.push(`IDEMPOTENCE BROKEN in=${obj}\n   canon=${canonIn}\n   canon(canon)=${canonicalizeObjectTermForHash(canonIn)}`);
+    }
   });
   if (mismatches.length) {
     throw new Error(`${label}: ${mismatches.length} mismatch(es):\n${mismatches.slice(0, 30).join('\n')}`);

diff --git a/packages/publisher/test/term-canon-oracle.test.ts b/packages/publisher/test/term-canon-oracle.test.ts
@@ -32,16 +32,27 @@ async function oxigraphForms(objects: string[]): Promise<string[]> {
   return objects.map((_, i) => byPred.get(`urn:p#${i}`) ?? '(DROPPED)');
 }
 
-/** Assert the pure core canonicalizer reproduces oxigraph's form for every input. */
+/**
+ * OT-RFC-57: the canon is now a backend-INDEPENDENT value canon — it no longer
+ * reproduces oxigraph's stored lexical form (for temporal types it emits the UTC,
+ * ms-truncated form so oxigraph and Blazegraph nodes agree). So we assert
+ * CONVERGENCE, not identity: `canon(oxigraph_readback) === canon(input)`. This is
+ * exactly what consensus needs — the publisher (input) and a prover reading from
+ * an oxigraph store compute the same leaf — and it holds for BOTH the types the
+ * canon rewrites (dateTime/time) and the types it leaves as oxigraph's form.
+ */
 async function expectMatchesOxigraph(objects: string[]): Promise<void> {
   const oxi = await oxigraphForms(objects);
   const mismatches: string[] = [];
   objects.forEach((obj, i) => {
-    const got = canonicalizeObjectTermForHash(obj);
-    if (got !== oxi[i]) mismatches.push(`  in:  ${obj}\n  core:${got}\n  oxi: ${oxi[i]}`);
+    const canonInput = canonicalizeObjectTermForHash(obj);
+    const canonOxi = canonicalizeObjectTermForHash(oxi[i]);
+    if (canonInput !== canonOxi) {
+      mismatches.push(`  in:            ${obj}\n  canon(input):  ${canonInput}\n  canon(oxi-store ${oxi[i]}): ${canonOxi}`);
+    }
   });
   if (mismatches.length) {
-    throw new Error(`core canon diverged from oxigraph (${mismatches.length}/${objects.length}):\n${mismatches.join('\n')}`);
+    throw new Error(`canon(input) != canon(oxigraph-readback) — publisher/prover would fork (${mismatches.length}/${objects.length}):\n${mismatches.join('\n')}`);
   }
   expect(mismatches.length).toBe(0);
 }
@@ -322,8 +333,15 @@ describe('term-canon oracle: fuzz-hardened edge classes (#1386)', () => {
     const oxi = await oxigraphForms(battery);
     for (let i = 0; i < battery.length; i++) {
       const once = canonicalizeObjectTermForHash(battery[i]);
-      expect(canonicalizeObjectTermForHash(once)).toBe(once); // idempotent
-      if (oxi[i] !== '(DROPPED)') expect(canonicalizeObjectTermForHash(oxi[i])).toBe(oxi[i]); // identity on store output
+      expect(canonicalizeObjectTermForHash(once)).toBe(once); // idempotent (fixed point)
+      // OT-RFC-57: the canon is no longer the IDENTITY on oxigraph's output — for
+      // temporal types it normalizes oxigraph's preserved form to the UTC value
+      // form (so oxigraph nodes agree with Blazegraph). That is the intended
+      // oxigraph/devnet migration (mainnet = Blazegraph is unchanged; asserted by
+      // the Blazegraph oracle). What MUST hold for consensus is CONVERGENCE:
+      // canon(oxigraph_readback) == canon(input) ⇒ publisher and an oxigraph
+      // prover compute the same leaf.
+      if (oxi[i] !== '(DROPPED)') expect(canonicalizeObjectTermForHash(oxi[i])).toBe(once);
     }
   });