diff --git a/.github/ai-review/README.md b/.github/ai-review/README.md index 07d997eca2..871196d768 100644 --- a/.github/ai-review/README.md +++ b/.github/ai-review/README.md @@ -81,21 +81,32 @@ will re-evaluate the change. ## Fork PR handling -Repository secrets (`OPENAI_API_KEY`, `AI_REVIEW_APP_PRIVATE_KEY`) are not -exposed to `pull_request` events from forks, and the default token is read- -only, so the Codex steps cannot run on a fork auto-trigger. - -The persona jobs do still run on fork PRs — they fail-fast in the very first -"Fork PR advisory" step with a clear error message directing maintainers to -invoke the workflow manually. This is intentional: a skipped required check -is treated by GitHub Branch Protection as satisfied, which would silently -bypass the security gate for exactly the contributor class that needs it most -(fork PRs from untrusted authors). Failing the check instead keeps the gate -red until a maintainer explicitly clears it. - -**To AI-review a fork PR:** a nucleus member dispatches the workflow with -the PR number. `workflow_dispatch` runs in base context with secrets -available, performs the real review, and the required checks turn green. +Fork PRs are AI-reviewed automatically via the `pull_request_target` trigger. +Same-repo PRs use `pull_request`; the `decide` job's `if:` condition routes +each PR through exactly one trigger so they never run twice. + +**Why this is safe:** + +- Under `pull_request_target`, GitHub reads the workflow file from the BASE + branch, not the fork's head — so a hostile fork can't modify the workflow + to bypass review. +- Persona prompts and helper scripts (`prefetch.sh`, `post_review.py`, the + output schema) are extracted from `origin/` into + `/tmp/ai-review-trusted/` and run from there — PR-side copies are ignored. +- The Skeptic runs in `sandbox: read-only` and never executes fork code. +- The Auditor runs in `sandbox: workspace-write` but with `drop-sudo`, no + `GH_TOKEN` / `OPENAI_API_KEY` in env, and the credentialed push step uses + a fresh clean clone so PR-controlled `.git/config` / `.gitattributes` can + never touch the token. + +**Hard guard: fork PRs that touch `.github/` are blocked at `decide`.** The +job emits a clear error message naming the offending paths and exits 1, so +the required checks go red. Land any `.github/` changes through a separate +same-repo PR first, then re-trigger AI review on the fork PR. + +**Manual override (rare):** if a fork PR needs to be reviewed despite some +limitation of the auto-trigger, a maintainer can dispatch the workflow with +the PR number — that path still runs through `decide` and its guards. ```bash gh workflow run ai-review.yml --repo opentensor/subtensor -f pr_number= diff --git a/.github/ai-review/prefetch.sh b/.github/ai-review/prefetch.sh index 3a19a3a87b..7fdd2dbaba 100755 --- a/.github/ai-review/prefetch.sh +++ b/.github/ai-review/prefetch.sh @@ -79,8 +79,38 @@ jq -r '.body // ""' "$OUTPUT_DIR/pr.json" > "$OUTPUT_DIR/pr-body.md" # Files changed (paths + per-file additions/deletions; full content lives in the diff) gh_retry gh pr view "$PR_NUMBER" --repo "$REPO" --json files > "$OUTPUT_DIR/pr-files.json" -# Full unified diff -gh_retry gh pr diff "$PR_NUMBER" --repo "$REPO" > "$OUTPUT_DIR/pr-diff.patch" +# Full unified diff. Use local `git diff` rather than `gh pr diff` because +# the GitHub REST diff endpoint hard-caps at 20,000 lines (HTTP 406 +# `PullRequest.diff too_large`). The workflow already checked out the PR +# head with `fetch-depth: 0`, so all branches are available locally. +# +# Hardening against PR-controlled diff suppression: +# --text force textual diff so a `.gitattributes` `binary` mark +# cannot hide hunks in sensitive paths. +# --no-textconv ignore textconv filters (which can mangle/suppress output +# and execute external programs). +# --no-ext-diff ignore external diff drivers configured via `.gitattributes`. +# We also pin the comparison to the immutable `baseRefOid` SHA from pr.json +# rather than the moving `origin/` tip — so an advance of the base +# branch between fetch and diff cannot change what gets reviewed. +BASE_SHA_FOR_DIFF=$(jq -r '.baseRefOid' "$OUTPUT_DIR/pr.json") +HEAD_SHA_FOR_DIFF=$(jq -r '.headRefOid' "$OUTPUT_DIR/pr.json") +SAFE_DIFF_OPTS=(--no-ext-diff --no-textconv --text) +if git cat-file -e "${BASE_SHA_FOR_DIFF}^{commit}" 2>/dev/null; then + git diff "${SAFE_DIFF_OPTS[@]}" "${BASE_SHA_FOR_DIFF}...${HEAD_SHA_FOR_DIFF}" \ + > "$OUTPUT_DIR/pr-diff.patch" +else + # Base commit not local (e.g. shallow checkout missing the merge base). + # Fall back to the REST endpoint; bail loudly if it 406s on a huge PR. + echo "::warning::base commit ${BASE_SHA_FOR_DIFF} not local; falling back to gh pr diff (may fail for >20k-line PRs)" + gh_retry gh pr diff "$PR_NUMBER" --repo "$REPO" > "$OUTPUT_DIR/pr-diff.patch" +fi +DIFF_BYTES=$(wc -c < "$OUTPUT_DIR/pr-diff.patch") +DIFF_LINES=$(wc -l < "$OUTPUT_DIR/pr-diff.patch") +echo "PR diff: ${DIFF_LINES} lines, ${DIFF_BYTES} bytes" +if (( DIFF_BYTES > 2 * 1024 * 1024 )); then + echo "::warning::PR diff is large (${DIFF_BYTES} bytes); the persona may need to focus on pr-files.json to triage." +fi # All PR comments (issue-style). `--paginate` alone writes one JSON array per # page; `--slurp` wraps them as [[page1], [page2], ...]; we then flatten with diff --git a/.github/workflows/ai-review.yml b/.github/workflows/ai-review.yml index 2c68114327..92182b0828 100644 --- a/.github/workflows/ai-review.yml +++ b/.github/workflows/ai-review.yml @@ -1,8 +1,16 @@ name: ai-review on: + # Same-repo PRs route through pull_request (uses the PR's workflow file — + # handy when iterating on this workflow). Fork PRs route through + # pull_request_target so they receive secrets and can be reviewed; that + # trigger reads the workflow file from the BASE branch, so a hostile fork + # cannot modify the review pipeline. The `decide` job's `if:` below ensures + # each PR fires exactly one of these triggers — no double-runs. pull_request: types: [opened, reopened, synchronize, ready_for_review] + pull_request_target: + types: [opened, reopened, synchronize, ready_for_review] workflow_dispatch: inputs: pr_number: @@ -22,6 +30,13 @@ jobs: decide: name: decide runs-on: ubuntu-latest + # Route each PR through exactly one trigger so it never runs twice: + # same-repo via pull_request, fork via pull_request_target. dispatch + # always runs. + if: | + github.event_name == 'workflow_dispatch' || + (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) || + (github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository) permissions: contents: read pull-requests: read @@ -78,13 +93,12 @@ jobs: RUN_SKEPTIC=true; RUN_AUDITOR=true fi - # Fork auto-trigger: persona jobs still RUN (so the required checks - # exist as failures rather than as `skipped`-satisfied bypasses). - # The persona jobs fail-fast in their "Fork PR advisory" step with a - # clear maintainer-facing message. A nucleus member then has to - # invoke workflow_dispatch on the PR (which runs in base context - # with secrets and produces real green checks) to clear the gate. - # See README.md → "Fork PR handling" for the rationale. + # Fork PRs run via pull_request_target (see workflow `on:` block) + # so they receive secrets and can be reviewed. The workflow file + # itself comes from the base branch under pull_request_target, and + # persona prompts + helper scripts are extracted from base into + # /tmp/ai-review-trusted/, so a hostile fork cannot tamper with the + # review pipeline. { echo "pr_number=$PR" @@ -98,6 +112,30 @@ jobs: echo "run_auditor=$RUN_AUDITOR" } >> "$GITHUB_OUTPUT" + # Fork PRs MUST NOT modify `.github/` — workflow files, persona prompts, + # required-check definitions, etc. live there. Even though our trusted + # extraction renders most of this unweaponizable, a fork PR that touches + # `.github/` requires human-eyes review on the diff itself before AI + # review can safely run on the rest of the change. Block with a clear + # error; the right fix is to land any `.github/` changes via a separate + # same-repo PR first, then re-trigger AI review on the fork PR (which + # will then have nothing to touch under `.github/`). + - name: Fork PR — block .github/ changes + if: steps.compute.outputs.is_fork == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + PR: ${{ steps.compute.outputs.pr_number }} + run: | + set -euo pipefail + touched=$(gh pr view "$PR" --repo "$REPO" --json files \ + --jq '[.files[] | select(.path | startswith(".github/")) | .path] | join(", ")') + if [[ -n "$touched" ]]; then + echo "::error::Fork PR modifies .github/ paths: $touched" + echo "::error::AI review will not run automatically. Land the .github/ changes in a separate same-repo PR first, then re-run this workflow." + exit 1 + fi + skeptic: name: skeptic needs: decide @@ -111,21 +149,6 @@ jobs: posted_url: ${{ steps.post.outputs.posted_url }} verdict: ${{ steps.post.outputs.verdict }} steps: - # Fail-fast on fork auto-trigger. `pull_request` from a fork has no - # secrets and a read-only token, so the Codex steps cannot run. Rather - # than skip the job (which would `skipped`-satisfy a required check - # and silently bypass the security gate), fail loudly with a clear - # maintainer-facing message. A nucleus member then dispatches the - # workflow manually for this PR; workflow_dispatch runs in base - # context with secrets and produces real green checks. - - name: Fork PR advisory (fail-fast) - if: needs.decide.outputs.is_fork == 'true' && github.event_name == 'pull_request' - env: - PR: ${{ needs.decide.outputs.pr_number }} - run: | - echo "::error::Fork PR detected. AI review cannot auto-run on fork pull_request events (repository secrets are not exposed). A maintainer must invoke this workflow via workflow_dispatch with PR #${PR} to perform the security review." - exit 1 - - name: Checkout PR head uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: @@ -337,15 +360,6 @@ jobs: posted_url: ${{ steps.post.outputs.posted_url }} verdict: ${{ steps.post.outputs.verdict }} steps: - # See note in the skeptic job — same fork-PR fail-fast rationale. - - name: Fork PR advisory (fail-fast) - if: needs.decide.outputs.is_fork == 'true' && github.event_name == 'pull_request' - env: - PR: ${{ needs.decide.outputs.pr_number }} - run: | - echo "::error::Fork PR detected. AI review cannot auto-run on fork pull_request events (repository secrets are not exposed). A maintainer must invoke this workflow via workflow_dispatch with PR #${PR} to perform the security review." - exit 1 - - name: Checkout PR head uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: