diff --git a/spine-cli/src/export.rs b/spine-cli/src/export.rs index e6d0531..34093de 100644 --- a/spine-cli/src/export.rs +++ b/spine-cli/src/export.rs @@ -575,15 +575,24 @@ fn syslog_token(s: &str) -> String { } } -/// Escape `"` and `\` for values that land inside the quoted MSG -/// portion. RFC 3164 / 5424 do not specify escaping; this matches -/// the convention most SIEMs (Splunk, ELK, `QRadar`) accept. +/// Escape `"`, `\`, and C0 control characters (including CR/LF) for +/// values that land inside the quoted MSG portion. RFC 3164 / 5424 do not +/// specify escaping; this matches the convention most SIEMs (Splunk, ELK, +/// `QRadar`) accept. Escaping the control characters is load-bearing: a +/// newline in a producer-controlled field (e.g. `event_type`) would +/// otherwise split the line and forge an extra syslog record. fn syslog_escape(s: &str) -> String { let mut out = String::with_capacity(s.len()); for c in s.chars() { match c { '"' => out.push_str("\\\""), '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + // Any other C0 control character could split or corrupt the + // line; render it as a visible escape rather than emit it raw. + c if (c as u32) < 0x20 => out.push_str(&format!("\\x{:02x}", c as u32)), c => out.push(c), } } diff --git a/spine-cli/src/inspect.rs b/spine-cli/src/inspect.rs index edfdc2c..d18f63d 100644 --- a/spine-cli/src/inspect.rs +++ b/spine-cli/src/inspect.rs @@ -212,7 +212,9 @@ fn compute_stats(wal_path: &Path) -> Result { stats.integrity.prev_hash_links_ok = false; } if let Some(prev_seq) = prev_sequence { - if entry.sequence != prev_seq + 1 { + // saturating_add: a record may carry sequence = u64::MAX, and a + // bare `prev_seq + 1` would overflow on the following record. + if entry.sequence != prev_seq.saturating_add(1) { stats.integrity.sequence_contiguous = false; } } else if entry.sequence != 1 { diff --git a/spine-cli/tests/cli_smoke.rs b/spine-cli/tests/cli_smoke.rs index b4b0c1f..7c7dfc6 100644 --- a/spine-cli/tests/cli_smoke.rs +++ b/spine-cli/tests/cli_smoke.rs @@ -533,3 +533,75 @@ fn verify_lenient_on_strict_wal_emits_profile_hint() { "lenient failure on a strict WAL must hint at --strict" ); } + +#[test] +fn inspect_stats_on_max_sequence_does_not_panic() { + // Regression: a record carrying sequence = u64::MAX overflowed the + // `prev_seq + 1` contiguity check, panicking the binary (exit 101). + // It must now report a non-contiguous chain and exit cleanly. + let dir = tempfile::tempdir().expect("tempdir"); + let line1 = json!({ + "sequence": u64::MAX, + "timestamp_ns": 1, + "prev_hash": GENESIS_PREV_HASH, + "payload_hash": "ab".repeat(32), + }); + let line2 = json!({ + "sequence": 5, + "timestamp_ns": 2, + "prev_hash": "cd".repeat(32), + "payload_hash": "ef".repeat(32), + }); + let body = format!("{line1}\n{line2}\n"); + std::fs::write(dir.path().join("00000001.jsonl"), body).expect("write wal"); + + let out = run(&[ + "inspect", + "--wal", + path_str(dir.path()), + "--stats", + "--format", + "json", + ]); + assert_eq!(code(&out), 0, "stats on a u64::MAX sequence must not panic"); + let stats: Value = serde_json::from_str(&stdout(&out)).expect("stats JSON"); + assert_eq!(stats["integrity"]["sequence_contiguous"], false); +} + +#[test] +fn export_syslog_escapes_control_chars_in_event_type() { + // A producer-controlled newline in event_type must not split the + // syslog output into an extra forged record. + let dir = tempfile::tempdir().expect("tempdir"); + let line = json!({ + "sequence": 1, + "timestamp_ns": 1_700_000_000_000_000_000i64, + "prev_hash": GENESIS_PREV_HASH, + "payload_hash": "ab".repeat(32), + "event_type": "login\n<13>1 forged", + "source": "auth", + }); + std::fs::write(dir.path().join("00000001.jsonl"), format!("{line}\n")).expect("write wal"); + + let out = run(&[ + "export", + "--wal", + path_str(dir.path()), + "--export-format", + "syslog", + "--format", + "quiet", + ]); + assert_eq!(code(&out), 0); + let body = stdout(&out); + let lines: Vec<&str> = body.lines().filter(|l| !l.trim().is_empty()).collect(); + assert_eq!( + lines.len(), + 1, + "a newline in event_type must not add a syslog line" + ); + assert!( + body.contains("\\n"), + "the newline must be escaped, not emitted raw" + ); +} diff --git a/spine-core/src/canonical.rs b/spine-core/src/canonical.rs index cc7337f..316df01 100644 --- a/spine-core/src/canonical.rs +++ b/spine-core/src/canonical.rs @@ -15,9 +15,10 @@ //! //! - **Strings**: escaped per RFC 8259 (matches `JSON.stringify`); content is //! normalized to **Unicode NFC** before serialization. -//! - **Integers** in `i64` range, plus `u64` values above `i64::MAX`. -//! Output: decimal digits, no leading sign for non-negative, no decimal -//! point. Matches `String(integer)` in JavaScript. +//! - **Integers** with magnitude up to `2^53 - 1` (JavaScript's +//! `Number.MAX_SAFE_INTEGER`). Output: decimal digits, no leading sign +//! for non-negative, no decimal point. Matches `String(integer)` in +//! JavaScript. Larger integers are refused (see below). //! - **Booleans**: `true` / `false`. //! - **Null**: `null`. //! - **Arrays**: `[item1,item2,…]`, no whitespace. @@ -25,18 +26,25 @@ //! order** (matches `Array.prototype.sort()` in JS), serialized as //! `{"k1":v1,"k2":v2,…}`, no whitespace. //! -//! Numbers must be integer-valued AND representable as an `i64`. A JSON -//! float with a whole value such as `2.0` or `-0` is accepted and -//! serialized without a decimal point (matching `Number.isInteger`), but -//! only when it maps to an `i64` exactly. A finite non-integer is rejected -//! with [`CanonicalError::NonIntegerNumber`]; an integer-valued float -//! outside `i64` range is rejected with [`CanonicalError::NumberOutOfRange`] -//! (saturating it would let two distinct payloads canonicalize to the same -//! bytes, so the canonical form would no longer be injective). NaN / Infinity -//! cannot occur because `serde_json::Value::Number` rejects them at parse -//! time. This is intentional: the demo WAL encodes monetary amounts as strings (e.g. -//! `"amount": "100.00"`), sidestepping ECMA-262 `NumberToString` quirks -//! entirely. +//! Numbers must be integer-valued AND within the JS-safe integer range +//! (magnitude up to `2^53 - 1`). A JSON float with a whole value such as +//! `2.0` or `-0` is accepted and serialized without a decimal point +//! (matching `Number.isInteger`), but only when it maps exactly to a +//! safe-range integer. A finite non-integer is rejected with +//! [`CanonicalError::NonIntegerNumber`]; an integer (or integer-valued +//! float) outside the safe range is rejected with +//! [`CanonicalError::NumberOutOfRange`]. Refusing out-of-range values +//! keeps the canonical form injective AND reproducible by a JS verifier +//! that parses the WAL with `JSON.parse` (which would round anything past +//! `2^53`). NaN / Infinity cannot occur because `serde_json::Value::Number` +//! rejects them at parse time. This is intentional: the demo WAL encodes +//! monetary amounts as strings (e.g. `"amount": "100.00"`), sidestepping +//! ECMA-262 `NumberToString` quirks entirely. +//! +//! Object keys are NFC-normalized before sorting. If two byte-distinct +//! input keys collapse to the same key after normalization, the value is +//! rejected with [`CanonicalError::DuplicateKeyAfterNfc`] rather than +//! emitting non-injective JSON with duplicate keys. //! //! ## Subtlety: UTF-16 vs UTF-8 key ordering //! @@ -60,10 +68,26 @@ pub enum CanonicalError { #[error("non-integer number not supported in canonical JSON: {0}")] NonIntegerNumber(String), - #[error("integer outside i64 range not supported in canonical JSON: {0}")] + #[error( + "integer outside the safe range supported by canonical JSON (max magnitude 2^53-1): {0}" + )] NumberOutOfRange(String), + + #[error("two object keys collide after NFC normalization: {0:?}")] + DuplicateKeyAfterNfc(String), } +/// Largest integer magnitude that round-trips through an IEEE-754 double +/// without loss, i.e. JavaScript's `Number.MAX_SAFE_INTEGER` (2^53 - 1). +/// +/// Canonical JSON refuses integers beyond this. A larger integer is exact +/// in Rust (serde keeps the full i64/u64), but a JS reimplementation that +/// parsed the same WAL with `JSON.parse` already rounded it, so the two +/// canonical forms would diverge and the cross-language hash contract +/// would silently break. Producers encode larger values as strings (the +/// demo WAL does this for monetary amounts). +const MAX_SAFE_INTEGER: i64 = 9_007_199_254_740_991; + /// Canonicalize a parsed JSON value to a UTF-8 byte sequence. /// /// The output is suitable for hashing with BLAKE3 to produce `payload_hash`. @@ -114,6 +138,17 @@ fn write_value(value: &Value, out: &mut Vec) -> Result<(), CanonicalError> { .collect(); entries.sort_by(|a, b| a.1.cmp(&b.1)); + // Two byte-distinct input keys can collapse to the same key + // after NFC normalization (e.g. precomposed vs decomposed + // "café"). Emitting both would produce non-injective, invalid + // JSON (duplicate object keys) that a JS verifier would not + // reproduce, so reject. Duplicates are adjacent after the sort. + for adj in entries.windows(2) { + if adj[0].1 == adj[1].1 { + return Err(CanonicalError::DuplicateKeyAfterNfc(adj[0].0.clone())); + } + } + out.push(b'{'); for (i, (key, _, val)) in entries.iter().enumerate() { if i > 0 { @@ -131,15 +166,21 @@ fn write_value(value: &Value, out: &mut Vec) -> Result<(), CanonicalError> { fn write_number(n: &serde_json::Number, out: &mut Vec) -> Result<(), CanonicalError> { if let Some(i) = n.as_i64() { + if !(-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&i) { + // Exact in Rust, but a JS verifier that parsed this with + // JSON.parse already lost precision, so the two canonical + // forms would diverge. Refuse rather than emit bytes no JS + // reimplementation can reproduce. See MAX_SAFE_INTEGER. + return Err(CanonicalError::NumberOutOfRange(n.to_string())); + } out.extend_from_slice(i.to_string().as_bytes()); return Ok(()); } - if let Some(u) = n.as_u64() { - // Only reachable for u64 > i64::MAX, which is outside JS safe range - // and we'd refuse to roundtrip anyway. Match JavaScript's String(u) - // for completeness. - out.extend_from_slice(u.to_string().as_bytes()); - return Ok(()); + if n.as_u64().is_some() { + // The only u64 values that are not also i64 are those above + // i64::MAX, which are far beyond MAX_SAFE_INTEGER. They cannot + // round-trip through a JS verifier, so refuse them. + return Err(CanonicalError::NumberOutOfRange(n.to_string())); } // Last resort: serde_json parses `-0` and any integer-valued literal that // overflows i64/u64 as `f64`. Match `Number.isInteger(value)` semantics: @@ -161,7 +202,10 @@ fn write_number(n: &serde_json::Number, out: &mut Vec) -> Result<(), Canonic // happened) before we trust `as_int`. #[allow(clippy::cast_precision_loss, clippy::float_cmp)] let round_trips = as_int as f64 == f; - if round_trips { + // Also bound to the JS-safe range: a float at or beyond 2^53 + // cannot distinguish adjacent integers, so accepting it would + // diverge from a JS verifier just as the integer path would. + if round_trips && (-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&as_int) { out.extend_from_slice(as_int.to_string().as_bytes()); return Ok(()); } @@ -205,8 +249,47 @@ mod tests { assert_eq!(canon(json!(0)), "0"); assert_eq!(canon(json!(-1)), "-1"); assert_eq!(canon(json!(42)), "42"); - assert_eq!(canon(json!(i64::MAX)), i64::MAX.to_string()); - assert_eq!(canon(json!(i64::MIN)), i64::MIN.to_string()); + // The largest magnitudes that still round-trip through a JS double. + assert_eq!(canon(json!(9_007_199_254_740_991i64)), "9007199254740991"); + assert_eq!(canon(json!(-9_007_199_254_740_991i64)), "-9007199254740991"); + } + + #[test] + fn rejects_integers_beyond_js_safe_range() { + // At or beyond 2^53 a value cannot round-trip through a JS double, + // so a JS reimplementation that parsed the WAL with JSON.parse + // would disagree on the bytes. Refuse rather than diverge silently. + assert!(matches!( + canonical_json(&json!(9_007_199_254_740_992i64)), // 2^53 + Err(CanonicalError::NumberOutOfRange(_)) + )); + assert!(matches!( + canonical_json(&json!(i64::MAX)), + Err(CanonicalError::NumberOutOfRange(_)) + )); + assert!(matches!( + canonical_json(&json!(i64::MIN)), + Err(CanonicalError::NumberOutOfRange(_)) + )); + // A u64 above i64::MAX is likewise refused. + let big: Value = serde_json::from_str(r#"{"n":9223372036854775813}"#).unwrap(); + assert!(matches!( + canonical_json(&big), + Err(CanonicalError::NumberOutOfRange(_)) + )); + } + + #[test] + fn rejects_keys_colliding_after_nfc() { + // "cafe\u{0301}" (decomposed) and "caf\u{00e9}" (precomposed) are + // byte-distinct map keys that normalize to the same NFC key. + // Emitting both would be duplicate, non-injective JSON. + let input = "{\"cafe\u{0301}\":1,\"caf\u{00e9}\":2}"; + let result = canonical_json_from_bytes(input.as_bytes()); + assert!(matches!( + result, + Err(CanonicalError::DuplicateKeyAfterNfc(_)) + )); } #[test] diff --git a/spine-core/src/verify.rs b/spine-core/src/verify.rs index 1a1bfc4..90a7753 100644 --- a/spine-core/src/verify.rs +++ b/spine-core/src/verify.rs @@ -258,13 +258,18 @@ fn verify_internal(bytes: &[u8], opts: &LenientOptions) -> VerificationResult { } if let Some(prev_seq) = prev_sequence { - if entry.sequence != prev_seq + 1 { + // saturating_add: a hostile record can carry sequence = u64::MAX + // (it fails the genesis check, but in accumulate-all mode the + // chain still advances and records it as prev_sequence), and a + // bare `prev_seq + 1` would then overflow on the next record. + // spine-core promises never to panic, so saturate instead. + let expected = prev_seq.saturating_add(1); + if entry.sequence != expected { let err = VerificationError { sequence: Some(entry.sequence), error_type: "sequence_gap".to_string(), details: format!( - "expected sequence {} after {prev_seq}, found {}", - prev_seq + 1, + "expected sequence {expected} after {prev_seq}, found {}", entry.sequence ), }; @@ -896,6 +901,22 @@ mod tests { assert!(r.errors.iter().any(|e| e.error_type == "parse_error")); } + #[test] + fn max_sequence_first_record_does_not_panic() { + // Regression: a first record carrying sequence = u64::MAX used to + // overflow `prev_seq + 1` on the following record (panic in debug, + // silent wrap in release). It must now degrade to a normal failure + // rather than abort the verifier. + let a = make_entry(u64::MAX, 1, GENESIS_PREV_HASH, "p1"); + let h = compute_entry_hash(&a); + let b = make_entry(5, 2, &h, "p2"); + let bytes = to_jsonl(&[a, b]); + let r = verify_wal_bytes(&bytes); + assert!(!r.valid); // genesis sequence is not 1 + assert_eq!(r.events_verified, 2); + assert!(r.errors.iter().any(|e| e.error_type == "sequence_gap")); + } + #[test] fn receipts_without_keystore_emit_warning() { use crate::receipt::Receipt;