Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions spine-cli/src/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -575,15 +575,24 @@ fn syslog_token(s: &str) -> String {
}
}

/// Escape `"` and `\` for values that land inside the quoted MSG
/// portion. RFC 3164 / 5424 do not specify escaping; this matches
/// the convention most SIEMs (Splunk, ELK, `QRadar`) accept.
/// Escape `"`, `\`, and C0 control characters (including CR/LF) for
/// values that land inside the quoted MSG portion. RFC 3164 / 5424 do not
/// specify escaping; this matches the convention most SIEMs (Splunk, ELK,
/// `QRadar`) accept. Escaping the control characters is load-bearing: a
/// newline in a producer-controlled field (e.g. `event_type`) would
/// otherwise split the line and forge an extra syslog record.
fn syslog_escape(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
// Any other C0 control character could split or corrupt the
// line; render it as a visible escape rather than emit it raw.
c if (c as u32) < 0x20 => out.push_str(&format!("\\x{:02x}", c as u32)),
c => out.push(c),
}
}
Expand Down
4 changes: 3 additions & 1 deletion spine-cli/src/inspect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,9 @@ fn compute_stats(wal_path: &Path) -> Result<WalStats, InspectCmdError> {
stats.integrity.prev_hash_links_ok = false;
}
if let Some(prev_seq) = prev_sequence {
if entry.sequence != prev_seq + 1 {
// saturating_add: a record may carry sequence = u64::MAX, and a
// bare `prev_seq + 1` would overflow on the following record.
if entry.sequence != prev_seq.saturating_add(1) {
stats.integrity.sequence_contiguous = false;
}
} else if entry.sequence != 1 {
Expand Down
72 changes: 72 additions & 0 deletions spine-cli/tests/cli_smoke.rs
Original file line number Diff line number Diff line change
Expand Up @@ -533,3 +533,75 @@ fn verify_lenient_on_strict_wal_emits_profile_hint() {
"lenient failure on a strict WAL must hint at --strict"
);
}

#[test]
fn inspect_stats_on_max_sequence_does_not_panic() {
// Regression: a record carrying sequence = u64::MAX overflowed the
// `prev_seq + 1` contiguity check, panicking the binary (exit 101).
// It must now report a non-contiguous chain and exit cleanly.
let dir = tempfile::tempdir().expect("tempdir");
let line1 = json!({
"sequence": u64::MAX,
"timestamp_ns": 1,
"prev_hash": GENESIS_PREV_HASH,
"payload_hash": "ab".repeat(32),
});
let line2 = json!({
"sequence": 5,
"timestamp_ns": 2,
"prev_hash": "cd".repeat(32),
"payload_hash": "ef".repeat(32),
});
let body = format!("{line1}\n{line2}\n");
std::fs::write(dir.path().join("00000001.jsonl"), body).expect("write wal");

let out = run(&[
"inspect",
"--wal",
path_str(dir.path()),
"--stats",
"--format",
"json",
]);
assert_eq!(code(&out), 0, "stats on a u64::MAX sequence must not panic");
let stats: Value = serde_json::from_str(&stdout(&out)).expect("stats JSON");
assert_eq!(stats["integrity"]["sequence_contiguous"], false);
}

#[test]
fn export_syslog_escapes_control_chars_in_event_type() {
// A producer-controlled newline in event_type must not split the
// syslog output into an extra forged record.
let dir = tempfile::tempdir().expect("tempdir");
let line = json!({
"sequence": 1,
"timestamp_ns": 1_700_000_000_000_000_000i64,
"prev_hash": GENESIS_PREV_HASH,
"payload_hash": "ab".repeat(32),
"event_type": "login\n<13>1 forged",
"source": "auth",
});
std::fs::write(dir.path().join("00000001.jsonl"), format!("{line}\n")).expect("write wal");

let out = run(&[
"export",
"--wal",
path_str(dir.path()),
"--export-format",
"syslog",
"--format",
"quiet",
]);
assert_eq!(code(&out), 0);
let body = stdout(&out);
let lines: Vec<&str> = body.lines().filter(|l| !l.trim().is_empty()).collect();
assert_eq!(
lines.len(),
1,
"a newline in event_type must not add a syslog line"
);
assert!(
body.contains("\\n"),
"the newline must be escaped, not emitted raw"
);
}
133 changes: 108 additions & 25 deletions spine-core/src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,36 @@
//!
//! - **Strings**: escaped per RFC 8259 (matches `JSON.stringify`); content is
//! normalized to **Unicode NFC** before serialization.
//! - **Integers** in `i64` range, plus `u64` values above `i64::MAX`.
//! Output: decimal digits, no leading sign for non-negative, no decimal
//! point. Matches `String(integer)` in JavaScript.
//! - **Integers** with magnitude up to `2^53 - 1` (JavaScript's
//! `Number.MAX_SAFE_INTEGER`). Output: decimal digits, no leading sign
//! for non-negative, no decimal point. Matches `String(integer)` in
//! JavaScript. Larger integers are refused (see below).
//! - **Booleans**: `true` / `false`.
//! - **Null**: `null`.
//! - **Arrays**: `[item1,item2,…]`, no whitespace.
//! - **Objects**: keys NFC-normalized, then sorted by **UTF-16 code unit
//! order** (matches `Array.prototype.sort()` in JS), serialized as
//! `{"k1":v1,"k2":v2,…}`, no whitespace.
//!
//! Numbers must be integer-valued AND representable as an `i64`. A JSON
//! float with a whole value such as `2.0` or `-0` is accepted and
//! serialized without a decimal point (matching `Number.isInteger`), but
//! only when it maps to an `i64` exactly. A finite non-integer is rejected
//! with [`CanonicalError::NonIntegerNumber`]; an integer-valued float
//! outside `i64` range is rejected with [`CanonicalError::NumberOutOfRange`]
//! (saturating it would let two distinct payloads canonicalize to the same
//! bytes, so the canonical form would no longer be injective). NaN / Infinity
//! cannot occur because `serde_json::Value::Number` rejects them at parse
//! time. This is intentional: the demo WAL encodes monetary amounts as strings (e.g.
//! `"amount": "100.00"`), sidestepping ECMA-262 `NumberToString` quirks
//! entirely.
//! Numbers must be integer-valued AND within the JS-safe integer range
//! (magnitude up to `2^53 - 1`). A JSON float with a whole value such as
//! `2.0` or `-0` is accepted and serialized without a decimal point
//! (matching `Number.isInteger`), but only when it maps exactly to a
//! safe-range integer. A finite non-integer is rejected with
//! [`CanonicalError::NonIntegerNumber`]; an integer (or integer-valued
//! float) outside the safe range is rejected with
//! [`CanonicalError::NumberOutOfRange`]. Refusing out-of-range values
//! keeps the canonical form injective AND reproducible by a JS verifier
//! that parses the WAL with `JSON.parse` (which would round anything past
//! `2^53`). NaN / Infinity cannot occur because `serde_json::Value::Number`
//! rejects them at parse time. This is intentional: the demo WAL encodes
//! monetary amounts as strings (e.g. `"amount": "100.00"`), sidestepping
//! ECMA-262 `NumberToString` quirks entirely.
//!
//! Object keys are NFC-normalized before sorting. If two byte-distinct
//! input keys collapse to the same key after normalization, the value is
//! rejected with [`CanonicalError::DuplicateKeyAfterNfc`] rather than
//! emitting non-injective JSON with duplicate keys.
//!
//! ## Subtlety: UTF-16 vs UTF-8 key ordering
//!
Expand All @@ -60,10 +68,26 @@ pub enum CanonicalError {
#[error("non-integer number not supported in canonical JSON: {0}")]
NonIntegerNumber(String),

#[error("integer outside i64 range not supported in canonical JSON: {0}")]
#[error(
"integer outside the safe range supported by canonical JSON (max magnitude 2^53-1): {0}"
)]
NumberOutOfRange(String),

#[error("two object keys collide after NFC normalization: {0:?}")]
DuplicateKeyAfterNfc(String),
}

/// Largest integer magnitude that round-trips through an IEEE-754 double
/// without loss, i.e. JavaScript's `Number.MAX_SAFE_INTEGER` (2^53 - 1).
///
/// Canonical JSON refuses integers beyond this. A larger integer is exact
/// in Rust (serde keeps the full i64/u64), but a JS reimplementation that
/// parsed the same WAL with `JSON.parse` already rounded it, so the two
/// canonical forms would diverge and the cross-language hash contract
/// would silently break. Producers encode larger values as strings (the
/// demo WAL does this for monetary amounts).
const MAX_SAFE_INTEGER: i64 = 9_007_199_254_740_991;

/// Canonicalize a parsed JSON value to a UTF-8 byte sequence.
///
/// The output is suitable for hashing with BLAKE3 to produce `payload_hash`.
Expand Down Expand Up @@ -114,6 +138,17 @@ fn write_value(value: &Value, out: &mut Vec<u8>) -> Result<(), CanonicalError> {
.collect();
entries.sort_by(|a, b| a.1.cmp(&b.1));

// Two byte-distinct input keys can collapse to the same key
// after NFC normalization (e.g. precomposed vs decomposed
// "café"). Emitting both would produce non-injective, invalid
// JSON (duplicate object keys) that a JS verifier would not
// reproduce, so reject. Duplicates are adjacent after the sort.
for adj in entries.windows(2) {
if adj[0].1 == adj[1].1 {
return Err(CanonicalError::DuplicateKeyAfterNfc(adj[0].0.clone()));
}
}

out.push(b'{');
for (i, (key, _, val)) in entries.iter().enumerate() {
if i > 0 {
Expand All @@ -131,15 +166,21 @@ fn write_value(value: &Value, out: &mut Vec<u8>) -> Result<(), CanonicalError> {

fn write_number(n: &serde_json::Number, out: &mut Vec<u8>) -> Result<(), CanonicalError> {
if let Some(i) = n.as_i64() {
if !(-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&i) {
// Exact in Rust, but a JS verifier that parsed this with
// JSON.parse already lost precision, so the two canonical
// forms would diverge. Refuse rather than emit bytes no JS
// reimplementation can reproduce. See MAX_SAFE_INTEGER.
return Err(CanonicalError::NumberOutOfRange(n.to_string()));
}
out.extend_from_slice(i.to_string().as_bytes());
return Ok(());
}
if let Some(u) = n.as_u64() {
// Only reachable for u64 > i64::MAX, which is outside JS safe range
// and we'd refuse to roundtrip anyway. Match JavaScript's String(u)
// for completeness.
out.extend_from_slice(u.to_string().as_bytes());
return Ok(());
if n.as_u64().is_some() {
// The only u64 values that are not also i64 are those above
// i64::MAX, which are far beyond MAX_SAFE_INTEGER. They cannot
// round-trip through a JS verifier, so refuse them.
return Err(CanonicalError::NumberOutOfRange(n.to_string()));
}
// Last resort: serde_json parses `-0` and any integer-valued literal that
// overflows i64/u64 as `f64`. Match `Number.isInteger(value)` semantics:
Expand All @@ -161,7 +202,10 @@ fn write_number(n: &serde_json::Number, out: &mut Vec<u8>) -> Result<(), Canonic
// happened) before we trust `as_int`.
#[allow(clippy::cast_precision_loss, clippy::float_cmp)]
let round_trips = as_int as f64 == f;
if round_trips {
// Also bound to the JS-safe range: a float at or beyond 2^53
// cannot distinguish adjacent integers, so accepting it would
// diverge from a JS verifier just as the integer path would.
if round_trips && (-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&as_int) {
out.extend_from_slice(as_int.to_string().as_bytes());
return Ok(());
}
Expand Down Expand Up @@ -205,8 +249,47 @@ mod tests {
assert_eq!(canon(json!(0)), "0");
assert_eq!(canon(json!(-1)), "-1");
assert_eq!(canon(json!(42)), "42");
assert_eq!(canon(json!(i64::MAX)), i64::MAX.to_string());
assert_eq!(canon(json!(i64::MIN)), i64::MIN.to_string());
// The largest magnitudes that still round-trip through a JS double.
assert_eq!(canon(json!(9_007_199_254_740_991i64)), "9007199254740991");
assert_eq!(canon(json!(-9_007_199_254_740_991i64)), "-9007199254740991");
}

#[test]
fn rejects_integers_beyond_js_safe_range() {
// At or beyond 2^53 a value cannot round-trip through a JS double,
// so a JS reimplementation that parsed the WAL with JSON.parse
// would disagree on the bytes. Refuse rather than diverge silently.
assert!(matches!(
canonical_json(&json!(9_007_199_254_740_992i64)), // 2^53
Err(CanonicalError::NumberOutOfRange(_))
));
assert!(matches!(
canonical_json(&json!(i64::MAX)),
Err(CanonicalError::NumberOutOfRange(_))
));
assert!(matches!(
canonical_json(&json!(i64::MIN)),
Err(CanonicalError::NumberOutOfRange(_))
));
// A u64 above i64::MAX is likewise refused.
let big: Value = serde_json::from_str(r#"{"n":9223372036854775813}"#).unwrap();
assert!(matches!(
canonical_json(&big),
Err(CanonicalError::NumberOutOfRange(_))
));
}

#[test]
fn rejects_keys_colliding_after_nfc() {
// "cafe\u{0301}" (decomposed) and "caf\u{00e9}" (precomposed) are
// byte-distinct map keys that normalize to the same NFC key.
// Emitting both would be duplicate, non-injective JSON.
let input = "{\"cafe\u{0301}\":1,\"caf\u{00e9}\":2}";
let result = canonical_json_from_bytes(input.as_bytes());
assert!(matches!(
result,
Err(CanonicalError::DuplicateKeyAfterNfc(_))
));
}

#[test]
Expand Down
27 changes: 24 additions & 3 deletions spine-core/src/verify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,13 +258,18 @@ fn verify_internal(bytes: &[u8], opts: &LenientOptions) -> VerificationResult {
}

if let Some(prev_seq) = prev_sequence {
if entry.sequence != prev_seq + 1 {
// saturating_add: a hostile record can carry sequence = u64::MAX
// (it fails the genesis check, but in accumulate-all mode the
// chain still advances and records it as prev_sequence), and a
// bare `prev_seq + 1` would then overflow on the next record.
// spine-core promises never to panic, so saturate instead.
let expected = prev_seq.saturating_add(1);
if entry.sequence != expected {
let err = VerificationError {
sequence: Some(entry.sequence),
error_type: "sequence_gap".to_string(),
details: format!(
"expected sequence {} after {prev_seq}, found {}",
prev_seq + 1,
"expected sequence {expected} after {prev_seq}, found {}",
entry.sequence
),
};
Expand Down Expand Up @@ -896,6 +901,22 @@ mod tests {
assert!(r.errors.iter().any(|e| e.error_type == "parse_error"));
}

#[test]
fn max_sequence_first_record_does_not_panic() {
// Regression: a first record carrying sequence = u64::MAX used to
// overflow `prev_seq + 1` on the following record (panic in debug,
// silent wrap in release). It must now degrade to a normal failure
// rather than abort the verifier.
let a = make_entry(u64::MAX, 1, GENESIS_PREV_HASH, "p1");
let h = compute_entry_hash(&a);
let b = make_entry(5, 2, &h, "p2");
let bytes = to_jsonl(&[a, b]);
let r = verify_wal_bytes(&bytes);
assert!(!r.valid); // genesis sequence is not 1
assert_eq!(r.events_verified, 2);
assert!(r.errors.iter().any(|e| e.error_type == "sequence_gap"));
}

#[test]
fn receipts_without_keystore_emit_warning() {
use crate::receipt::Receipt;
Expand Down