From 97f32c1fc1a1f49fc154a83a55ffebbe86c91c1b Mon Sep 17 00:00:00 2001 From: Yonghye Kwon Date: Mon, 9 Mar 2026 11:09:56 +0900 Subject: [PATCH 1/2] ci: switch to self-hosted AWS runner and skip docs-only PRs Replace ubuntu-latest with self-hosted runner on AWS where Rust, Node 22, and poppler-utils are pre-installed. Remove redundant setup steps. Add paths-ignore so docs-only changes skip the corpus validation. Signed-off-by: Yonghye Kwon Co-Authored-By: Claude Opus 4.6 Signed-off-by: Yonghye Kwon --- .github/workflows/readme-corpus.yml | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/.github/workflows/readme-corpus.yml b/.github/workflows/readme-corpus.yml index 8b7df78..5013f55 100644 --- a/.github/workflows/readme-corpus.yml +++ b/.github/workflows/readme-corpus.yml @@ -2,11 +2,15 @@ name: README Corpus on: pull_request: + paths-ignore: + - "*.md" + - "docs/**" + - "LICENSE" workflow_dispatch: jobs: validate-readme-corpus: - runs-on: ubuntu-latest + runs-on: self-hosted timeout-minutes: 90 steps: @@ -15,24 +19,6 @@ jobs: with: lfs: true - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - - - name: Install Node.js - uses: actions/setup-node@v4 - with: - node-version: "22" - cache: "npm" - cache-dependency-path: | - crates/marknest/playwright-runtime/package-lock.json - validation/package-lock.json - - - name: Cache Rust build outputs - uses: Swatinem/rust-cache@v2 - - - name: Install PDF rasterization tools - run: sudo apt-get update && sudo apt-get install -y poppler-utils - - name: Pull Git LFS baselines run: git lfs pull From 2ec15ed01fe46d9118f2ed09a20270024dc39d6b Mon Sep 17 00:00:00 2001 From: Yonghye Kwon Date: Mon, 9 Mar 2026 11:26:48 +0900 Subject: [PATCH 2/2] ci: lightweight CI on AWS, corpus validation as pre-push hook Replace heavy 50-repo corpus validation in CI with lightweight checks (cargo fmt, cargo test, WASM check) suitable for the AWS micro runner. Move corpus validation to .githooks/pre-push so it runs locally before push. Defaults to smoke tier for speed; use CORPUS_TIER=all for full validation. Only triggers when runtime files changed. Setup: git config core.hooksPath .githooks Signed-off-by: Yonghye Kwon Co-Authored-By: Claude Opus 4.6 Signed-off-by: Yonghye Kwon --- .githooks/pre-push | 80 +++++++++++++++++++++++++++++ .github/workflows/readme-corpus.yml | 47 ++++------------- 2 files changed, 90 insertions(+), 37 deletions(-) create mode 100644 .githooks/pre-push diff --git a/.githooks/pre-push b/.githooks/pre-push new file mode 100644 index 0000000..f40042d --- /dev/null +++ b/.githooks/pre-push @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# Pre-push hook: run README corpus fidelity validation before pushing. +# Install: git config core.hooksPath .githooks +# +# Skip with: git push --no-verify + +set -euo pipefail + +# Only run corpus validation when source/test/config files changed +REMOTE="$1" +URL="$2" + +while read -r LOCAL_REF LOCAL_OID REMOTE_REF REMOTE_OID; do + if [ "$LOCAL_OID" = "0000000000000000000000000000000000000000" ]; then + # Branch deleted, nothing to validate + continue + fi + + # Determine diff base + if [ "$REMOTE_OID" = "0000000000000000000000000000000000000000" ]; then + # New branch — diff against main + DIFF_BASE="origin/main" + else + DIFF_BASE="$REMOTE_OID" + fi + + # Check if any runtime-affecting files changed + CHANGED=$(git diff --name-only "$DIFF_BASE" "$LOCAL_OID" -- \ + 'crates/**' \ + 'runtime-assets/**' \ + 'web/**' \ + 'index.html' \ + 'Cargo.toml' \ + 'Cargo.lock' \ + 2>/dev/null || true) + + if [ -z "$CHANGED" ]; then + echo "[pre-push] No runtime files changed, skipping corpus validation." + exit 0 + fi +done + +echo "========================================" +echo "[pre-push] Running README corpus validation" +echo "========================================" + +# Prerequisites check +for cmd in node npm cargo pdftoppm pdftotext; do + if ! command -v "$cmd" &>/dev/null; then + echo "[pre-push] ERROR: $cmd is required but not found." + exit 1 + fi +done + +# Ensure dependencies are installed +npm ci --prefix crates/marknest/playwright-runtime --silent 2>/dev/null || true +npm ci --prefix validation --silent 2>/dev/null || true + +# Resolve browser path +if [ -z "${MARKNEST_BROWSER_PATH:-}" ]; then + BROWSER_PATH=$(node -e ' + try { + const { chromium } = require("./crates/marknest/playwright-runtime/node_modules/playwright"); + process.stdout.write(chromium.executablePath()); + } catch { process.exit(1); } + ' 2>/dev/null || true) + if [ -n "$BROWSER_PATH" ] && [ -f "$BROWSER_PATH" ]; then + export MARKNEST_BROWSER_PATH="$BROWSER_PATH" + fi +fi + +# Run corpus validation (smoke tier for speed, full tier via: CORPUS_TIER=all git push) +TIER="${CORPUS_TIER:-smoke}" +echo "[pre-push] Tier: $TIER" + +node validation/readme_corpus.mjs run --tier "$TIER" --force + +echo "========================================" +echo "[pre-push] Corpus validation passed" +echo "========================================" diff --git a/.github/workflows/readme-corpus.yml b/.github/workflows/readme-corpus.yml index 5013f55..facbd16 100644 --- a/.github/workflows/readme-corpus.yml +++ b/.github/workflows/readme-corpus.yml @@ -1,4 +1,4 @@ -name: README Corpus +name: CI on: pull_request: @@ -9,48 +9,21 @@ on: workflow_dispatch: jobs: - validate-readme-corpus: + check: runs-on: self-hosted - timeout-minutes: 90 + timeout-minutes: 30 steps: - name: Check out repository uses: actions/checkout@v4 - with: - lfs: true - - name: Pull Git LFS baselines - run: git lfs pull + - name: Check formatting + run: cargo fmt --all -- --check - - name: Install native renderer runtime - run: npm ci --prefix crates/marknest/playwright-runtime + - name: Run workspace tests + run: cargo test - - name: Install validation dependencies - run: npm ci --prefix validation - - - name: Install Playwright Chromium for CI - run: npx --prefix crates/marknest/playwright-runtime playwright install --with-deps chromium - - - name: Resolve Playwright browser path - id: playwright-browser + - name: Check WASM target run: | - path=$(node -e 'const { chromium } = require("./crates/marknest/playwright-runtime/node_modules/playwright"); process.stdout.write(chromium.executablePath());') - echo "path=$path" >> "$GITHUB_OUTPUT" - - - name: Run Playwright print timeout guard - run: node --test crates/marknest/src/playwright_print.test.mjs - env: - MARKNEST_BROWSER_PATH: ${{ steps.playwright-browser.outputs.path }} - - - name: Run full README corpus validation - run: node validation/readme_corpus.mjs run --tier all --force - env: - MARKNEST_BROWSER_PATH: ${{ steps.playwright-browser.outputs.path }} - - - name: Upload corpus artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: readme-corpus-artifacts - path: validation/.runs - retention-days: 14 + rustup target add wasm32-unknown-unknown 2>/dev/null || true + cargo check -p marknest-wasm --target wasm32-unknown-unknown