Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions .githooks/pre-push
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env bash
# Pre-push hook: run README corpus fidelity validation before pushing.
# Install: git config core.hooksPath .githooks
#
# Skip with: git push --no-verify

set -euo pipefail

# Only run corpus validation when source/test/config files changed
REMOTE="$1"
URL="$2"

while read -r LOCAL_REF LOCAL_OID REMOTE_REF REMOTE_OID; do
if [ "$LOCAL_OID" = "0000000000000000000000000000000000000000" ]; then
# Branch deleted, nothing to validate
continue
fi

# Determine diff base
if [ "$REMOTE_OID" = "0000000000000000000000000000000000000000" ]; then
# New branch — diff against main
DIFF_BASE="origin/main"
else
DIFF_BASE="$REMOTE_OID"
fi

# Check if any runtime-affecting files changed
CHANGED=$(git diff --name-only "$DIFF_BASE" "$LOCAL_OID" -- \
'crates/**' \
'runtime-assets/**' \
'web/**' \
'index.html' \
'Cargo.toml' \
'Cargo.lock' \
2>/dev/null || true)

if [ -z "$CHANGED" ]; then
echo "[pre-push] No runtime files changed, skipping corpus validation."
exit 0
fi
done

echo "========================================"
echo "[pre-push] Running README corpus validation"
echo "========================================"

# Prerequisites check
for cmd in node npm cargo pdftoppm pdftotext; do
if ! command -v "$cmd" &>/dev/null; then
echo "[pre-push] ERROR: $cmd is required but not found."
exit 1
fi
done

# Ensure dependencies are installed
npm ci --prefix crates/marknest/playwright-runtime --silent 2>/dev/null || true
npm ci --prefix validation --silent 2>/dev/null || true

# Resolve browser path
if [ -z "${MARKNEST_BROWSER_PATH:-}" ]; then
BROWSER_PATH=$(node -e '
try {
const { chromium } = require("./crates/marknest/playwright-runtime/node_modules/playwright");
process.stdout.write(chromium.executablePath());
} catch { process.exit(1); }
' 2>/dev/null || true)
if [ -n "$BROWSER_PATH" ] && [ -f "$BROWSER_PATH" ]; then
export MARKNEST_BROWSER_PATH="$BROWSER_PATH"
fi
fi

# Run corpus validation (smoke tier for speed, full tier via: CORPUS_TIER=all git push)
TIER="${CORPUS_TIER:-smoke}"
echo "[pre-push] Tier: $TIER"

node validation/readme_corpus.mjs run --tier "$TIER" --force

echo "========================================"
echo "[pre-push] Corpus validation passed"
echo "========================================"
71 changes: 15 additions & 56 deletions .github/workflows/readme-corpus.yml
Original file line number Diff line number Diff line change
@@ -1,70 +1,29 @@
name: README Corpus
name: CI

on:
pull_request:
paths-ignore:
- "*.md"
- "docs/**"
- "LICENSE"
workflow_dispatch:

jobs:
validate-readme-corpus:
runs-on: ubuntu-latest
timeout-minutes: 90
check:
runs-on: self-hosted
timeout-minutes: 30

steps:
- name: Check out repository
uses: actions/checkout@v4
with:
lfs: true

- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Check formatting
run: cargo fmt --all -- --check

- name: Install Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
cache: "npm"
cache-dependency-path: |
crates/marknest/playwright-runtime/package-lock.json
validation/package-lock.json
- name: Run workspace tests
run: cargo test

- name: Cache Rust build outputs
uses: Swatinem/rust-cache@v2

- name: Install PDF rasterization tools
run: sudo apt-get update && sudo apt-get install -y poppler-utils

- name: Pull Git LFS baselines
run: git lfs pull

- name: Install native renderer runtime
run: npm ci --prefix crates/marknest/playwright-runtime

- name: Install validation dependencies
run: npm ci --prefix validation

- name: Install Playwright Chromium for CI
run: npx --prefix crates/marknest/playwright-runtime playwright install --with-deps chromium

- name: Resolve Playwright browser path
id: playwright-browser
- name: Check WASM target
run: |
path=$(node -e 'const { chromium } = require("./crates/marknest/playwright-runtime/node_modules/playwright"); process.stdout.write(chromium.executablePath());')
echo "path=$path" >> "$GITHUB_OUTPUT"

- name: Run Playwright print timeout guard
run: node --test crates/marknest/src/playwright_print.test.mjs
env:
MARKNEST_BROWSER_PATH: ${{ steps.playwright-browser.outputs.path }}

- name: Run full README corpus validation
run: node validation/readme_corpus.mjs run --tier all --force
env:
MARKNEST_BROWSER_PATH: ${{ steps.playwright-browser.outputs.path }}

- name: Upload corpus artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: readme-corpus-artifacts
path: validation/.runs
retention-days: 14
rustup target add wasm32-unknown-unknown 2>/dev/null || true
cargo check -p marknest-wasm --target wasm32-unknown-unknown