diff --git a/.githooks/pre-push b/.githooks/pre-push new file mode 100644 index 0000000..f40042d --- /dev/null +++ b/.githooks/pre-push @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# Pre-push hook: run README corpus fidelity validation before pushing. +# Install: git config core.hooksPath .githooks +# +# Skip with: git push --no-verify + +set -euo pipefail + +# Only run corpus validation when source/test/config files changed +REMOTE="$1" +URL="$2" + +while read -r LOCAL_REF LOCAL_OID REMOTE_REF REMOTE_OID; do + if [ "$LOCAL_OID" = "0000000000000000000000000000000000000000" ]; then + # Branch deleted, nothing to validate + continue + fi + + # Determine diff base + if [ "$REMOTE_OID" = "0000000000000000000000000000000000000000" ]; then + # New branch — diff against main + DIFF_BASE="origin/main" + else + DIFF_BASE="$REMOTE_OID" + fi + + # Check if any runtime-affecting files changed + CHANGED=$(git diff --name-only "$DIFF_BASE" "$LOCAL_OID" -- \ + 'crates/**' \ + 'runtime-assets/**' \ + 'web/**' \ + 'index.html' \ + 'Cargo.toml' \ + 'Cargo.lock' \ + 2>/dev/null || true) + + if [ -z "$CHANGED" ]; then + echo "[pre-push] No runtime files changed, skipping corpus validation." + exit 0 + fi +done + +echo "========================================" +echo "[pre-push] Running README corpus validation" +echo "========================================" + +# Prerequisites check +for cmd in node npm cargo pdftoppm pdftotext; do + if ! command -v "$cmd" &>/dev/null; then + echo "[pre-push] ERROR: $cmd is required but not found." + exit 1 + fi +done + +# Ensure dependencies are installed +npm ci --prefix crates/marknest/playwright-runtime --silent 2>/dev/null || true +npm ci --prefix validation --silent 2>/dev/null || true + +# Resolve browser path +if [ -z "${MARKNEST_BROWSER_PATH:-}" ]; then + BROWSER_PATH=$(node -e ' + try { + const { chromium } = require("./crates/marknest/playwright-runtime/node_modules/playwright"); + process.stdout.write(chromium.executablePath()); + } catch { process.exit(1); } + ' 2>/dev/null || true) + if [ -n "$BROWSER_PATH" ] && [ -f "$BROWSER_PATH" ]; then + export MARKNEST_BROWSER_PATH="$BROWSER_PATH" + fi +fi + +# Run corpus validation (smoke tier for speed, full tier via: CORPUS_TIER=all git push) +TIER="${CORPUS_TIER:-smoke}" +echo "[pre-push] Tier: $TIER" + +node validation/readme_corpus.mjs run --tier "$TIER" --force + +echo "========================================" +echo "[pre-push] Corpus validation passed" +echo "========================================" diff --git a/.github/workflows/readme-corpus.yml b/.github/workflows/readme-corpus.yml index 8b7df78..facbd16 100644 --- a/.github/workflows/readme-corpus.yml +++ b/.github/workflows/readme-corpus.yml @@ -1,70 +1,29 @@ -name: README Corpus +name: CI on: pull_request: + paths-ignore: + - "*.md" + - "docs/**" + - "LICENSE" workflow_dispatch: jobs: - validate-readme-corpus: - runs-on: ubuntu-latest - timeout-minutes: 90 + check: + runs-on: self-hosted + timeout-minutes: 30 steps: - name: Check out repository uses: actions/checkout@v4 - with: - lfs: true - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable + - name: Check formatting + run: cargo fmt --all -- --check - - name: Install Node.js - uses: actions/setup-node@v4 - with: - node-version: "22" - cache: "npm" - cache-dependency-path: | - crates/marknest/playwright-runtime/package-lock.json - validation/package-lock.json + - name: Run workspace tests + run: cargo test - - name: Cache Rust build outputs - uses: Swatinem/rust-cache@v2 - - - name: Install PDF rasterization tools - run: sudo apt-get update && sudo apt-get install -y poppler-utils - - - name: Pull Git LFS baselines - run: git lfs pull - - - name: Install native renderer runtime - run: npm ci --prefix crates/marknest/playwright-runtime - - - name: Install validation dependencies - run: npm ci --prefix validation - - - name: Install Playwright Chromium for CI - run: npx --prefix crates/marknest/playwright-runtime playwright install --with-deps chromium - - - name: Resolve Playwright browser path - id: playwright-browser + - name: Check WASM target run: | - path=$(node -e 'const { chromium } = require("./crates/marknest/playwright-runtime/node_modules/playwright"); process.stdout.write(chromium.executablePath());') - echo "path=$path" >> "$GITHUB_OUTPUT" - - - name: Run Playwright print timeout guard - run: node --test crates/marknest/src/playwright_print.test.mjs - env: - MARKNEST_BROWSER_PATH: ${{ steps.playwright-browser.outputs.path }} - - - name: Run full README corpus validation - run: node validation/readme_corpus.mjs run --tier all --force - env: - MARKNEST_BROWSER_PATH: ${{ steps.playwright-browser.outputs.path }} - - - name: Upload corpus artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: readme-corpus-artifacts - path: validation/.runs - retention-days: 14 + rustup target add wasm32-unknown-unknown 2>/dev/null || true + cargo check -p marknest-wasm --target wasm32-unknown-unknown