From e8c1d6835751e8b2cbf20d1574dc4d3ace716c83 Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Thu, 11 Jun 2026 10:19:39 +0400 Subject: [PATCH 1/6] fix(ci): ignore stale rebased codex inline comments --- .github/scripts/sync_codex_ok_labels.py | 62 +++++++- tests/unit/test_sync_codex_ok_labels.py | 183 ++++++++++++++++++++++++ 2 files changed, 240 insertions(+), 5 deletions(-) diff --git a/.github/scripts/sync_codex_ok_labels.py b/.github/scripts/sync_codex_ok_labels.py index c7bd17c1e..dc89cc275 100755 --- a/.github/scripts/sync_codex_ok_labels.py +++ b/.github/scripts/sync_codex_ok_labels.py @@ -148,6 +148,12 @@ } body url + commit { + oid + } + originalCommit { + oid + } } } } @@ -373,6 +379,14 @@ def timeline_head_oid(node: dict[str, Any]) -> str | None: return None +def timeline_node_timestamp(node: dict[str, Any]) -> str | None: + for key in ("createdAt", "submittedAt", "committedDate"): + value = node.get(key) + if isinstance(value, str) and value: + return value + return None + + def pull_review_comment_nodes(repo: str, number: int, *, head_sha: str) -> list[dict[str, Any]]: comments = paged_api(f"/repos/{repo}/pulls/{number}/comments") nodes: list[dict[str, Any]] = [] @@ -383,8 +397,11 @@ def pull_review_comment_nodes(repo: str, number: int, *, head_sha: str) -> list[ commit_id = comment.get("commit_id") original_commit_id = comment.get("original_commit_id") review_id = comment.get("pull_request_review_id") - if commit_id != head_sha and original_commit_id != head_sha: + original_matches_head = original_commit_id == head_sha + if not original_matches_head and not body_mentions_head(body, head_sha): continue + effective_commit_id = original_commit_id if original_matches_head else commit_id + effective_review_id = review_id if original_matches_head else None user = comment.get("user") login = user.get("login") if isinstance(user, dict) else None nodes.append( @@ -394,8 +411,8 @@ def pull_review_comment_nodes(repo: str, number: int, *, head_sha: str) -> list[ "bodyText": body, "createdAt": comment.get("created_at"), "url": comment.get("html_url") or comment.get("url"), - "commit": {"oid": commit_id} if isinstance(commit_id, str) else None, - "pullRequestReviewDatabaseId": review_id if isinstance(review_id, int) else None, + "commit": {"oid": effective_commit_id} if isinstance(effective_commit_id, str) else None, + "pullRequestReviewDatabaseId": effective_review_id if isinstance(effective_review_id, int) else None, } ) return nodes @@ -433,7 +450,18 @@ def merge_review_comment_nodes( for node in comment_nodes: if id(node) not in placed_ids and node not in unplaced: unplaced.append(node) - merged.extend(unplaced) + for node in unplaced: + timestamp = timeline_node_timestamp(node) + if timestamp is None: + merged.append(node) + continue + insert_at = len(merged) + for index, candidate in enumerate(merged): + candidate_timestamp = timeline_node_timestamp(candidate) + if candidate_timestamp is not None and candidate_timestamp > timestamp: + insert_at = index + break + merged.insert(insert_at, node) return merged @@ -679,6 +707,7 @@ def unresolved_codex_finding_thread_urls( repo: str, number: int, *, + head_sha: str, allowed_authors: set[str], ) -> tuple[str, ...]: owner, name = repo.split("/", 1) @@ -719,6 +748,24 @@ def unresolved_codex_finding_thread_urls( continue if not is_needs_work_codex_body(comment.get("body")): continue + body = comment.get("body") + original_commit = comment.get("originalCommit") + original_oid = original_commit.get("oid") if isinstance(original_commit, dict) else None + commit = comment.get("commit") + commit_oid = commit.get("oid") if isinstance(commit, dict) else None + if ( + isinstance(original_oid, str) + and original_oid != head_sha + and not body_mentions_head(body, head_sha) + ): + continue + if ( + original_oid is None + and isinstance(commit_oid, str) + and commit_oid != head_sha + and not body_mentions_head(body, head_sha) + ): + continue url = comment.get("url") urls.append(str(url) if isinstance(url, str) else "unresolved Codex review thread") @@ -840,7 +887,12 @@ def decide_pr( head_sha=head_sha, allowed_authors=allowed_authors, ) - unresolved_finding_urls = unresolved_codex_finding_thread_urls(repo, number, allowed_authors=allowed_authors) + unresolved_finding_urls = unresolved_codex_finding_thread_urls( + repo, + number, + head_sha=head_sha, + allowed_authors=allowed_authors, + ) has_codex_news = has_codex_news_after_current_head( timeline_nodes, head_sha=head_sha, diff --git a/tests/unit/test_sync_codex_ok_labels.py b/tests/unit/test_sync_codex_ok_labels.py index db5c68c78..b5fae1712 100644 --- a/tests/unit/test_sync_codex_ok_labels.py +++ b/tests/unit/test_sync_codex_ok_labels.py @@ -211,3 +211,186 @@ def fail_apply(*_args: Any, **_kwargs: Any) -> tuple[str, ...]: captured = capsys.readouterr() assert result == 1 assert "Soju06/codex-lb#714: gh: HTTP 500 while writing labels" in captured.err + + +def test_pull_review_comment_nodes_uses_original_commit_or_head_reference(monkeypatch: pytest.MonkeyPatch) -> None: + module = load_sync_module() + head_sha = "a" * 40 + old_sha = "b" * 40 + comment_data = [ + { + "body": "this review is on old code only", + "commit_id": head_sha, + "original_commit_id": old_sha, + "pull_request_review_id": 1, + "created_at": "2026-06-11T00:00:00Z", + "html_url": "https://github.com/Soju06/codex-lb/pull/714#discussion_r1", + "user": {"login": "openai-codex"}, + }, + { + "body": f"stale but mentions head commit {head_sha[:12]}", + "commit_id": head_sha, + "original_commit_id": old_sha, + "pull_request_review_id": 2, + "created_at": "2026-06-11T00:00:00Z", + "html_url": "https://github.com/Soju06/codex-lb/pull/714#discussion_r2", + "user": {"login": "openai-codex"}, + }, + { + "body": "actual current-head inline review", + "commit_id": old_sha, + "original_commit_id": head_sha, + "pull_request_review_id": 3, + "created_at": "2026-06-11T00:00:00Z", + "html_url": "https://github.com/Soju06/codex-lb/pull/714#discussion_r3", + "user": {"login": "openai-codex"}, + }, + { + "body": "older unrelated comment", + "commit_id": old_sha, + "original_commit_id": old_sha, + "pull_request_review_id": 4, + "created_at": "2026-06-11T00:00:00Z", + "html_url": "https://github.com/Soju06/codex-lb/pull/714#discussion_r4", + "user": {"login": "openai-codex"}, + }, + ] + + monkeypatch.setattr(module, "paged_api", lambda _path: comment_data) + + nodes = module.pull_review_comment_nodes("Soju06/codex-lb", 714, head_sha=head_sha) + + assert [node.get("commit", {}).get("oid") for node in nodes] == [head_sha, head_sha] + assert [node.get("pullRequestReviewDatabaseId") for node in nodes] == [None, 3] + + +def test_head_mentioned_fallback_comment_keeps_timeline_chronology() -> None: + module = load_sync_module() + head_sha = "a" * 40 + review_id = 2 + timeline_nodes = [ + { + "__typename": "PullRequestCommit", + "commit": {"oid": head_sha}, + "committedDate": "2026-06-11T06:30:00Z", + }, + { + "__typename": "PullRequestReview", + "databaseId": review_id, + "author": {"login": "openai-codex"}, + "bodyText": "Reviewed older commit.", + "submittedAt": "2026-06-11T06:32:00Z", + "commit": {"oid": "b" * 40}, + }, + { + "__typename": "IssueComment", + "author": {"login": "openai-codex"}, + "bodyText": "Codex Review: Didn't find any major issues.", + "createdAt": "2026-06-11T06:40:00Z", + }, + ] + comment_nodes = [ + { + "__typename": "PullRequestReviewComment", + "author": {"login": "openai-codex"}, + "bodyText": f"**[P2]** stale finding mentioning {head_sha[:12]}", + "createdAt": "2026-06-11T06:34:00Z", + "commit": {"oid": head_sha}, + "pullRequestReviewDatabaseId": None, + } + ] + + merged = module.merge_review_comment_nodes(timeline_nodes, comment_nodes) + assert [node["__typename"] for node in merged] == [ + "PullRequestCommit", + "PullRequestReview", + "PullRequestReviewComment", + "IssueComment", + ] + + state, node = module.find_current_head_codex_review_state( + merged, + head_sha=head_sha, + allowed_authors={"openai-codex"}, + ) + + assert state == "clean" + assert node is timeline_nodes[-1] + + +def test_unresolved_codex_threads_filter_to_current_head(monkeypatch: pytest.MonkeyPatch) -> None: + module = load_sync_module() + head_sha = "a" * 40 + old_sha = "b" * 40 + + pages = [ + { + "data": { + "repository": { + "pullRequest": { + "reviewThreads": { + "pageInfo": {"hasNextPage": False, "endCursor": None}, + "nodes": [ + { + "isResolved": False, + "isOutdated": False, + "comments": { + "nodes": [ + { + "author": {"login": "openai-codex"}, + "body": "**[P1]** old finding", + "url": "https://example.invalid/stale", + "commit": {"oid": head_sha}, + "originalCommit": {"oid": old_sha}, + } + ] + }, + }, + { + "isResolved": False, + "isOutdated": False, + "comments": { + "nodes": [ + { + "author": {"login": "openai-codex"}, + "body": "**[P1]** current finding", + "url": "https://example.invalid/current", + "commit": {"oid": head_sha}, + "originalCommit": {"oid": head_sha}, + } + ] + }, + }, + { + "isResolved": False, + "isOutdated": False, + "comments": { + "nodes": [ + { + "author": {"login": "openai-codex"}, + "body": f"**[P2]** stale fallback for {head_sha[:12]}", + "url": "https://example.invalid/fallback", + "commit": {"oid": head_sha}, + "originalCommit": {"oid": old_sha}, + } + ] + }, + }, + ], + } + } + } + } + } + ] + + monkeypatch.setattr(module, "graphql", lambda *_args, **_kwargs: pages[0]) + + urls = module.unresolved_codex_finding_thread_urls( + "Soju06/codex-lb", + 714, + head_sha=head_sha, + allowed_authors={"openai-codex"}, + ) + + assert urls == ("https://example.invalid/current", "https://example.invalid/fallback") From c91b5130b38023eaa1b81926278379059882c576 Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Thu, 11 Jun 2026 11:49:34 +0400 Subject: [PATCH 2/6] fix(ci): ignore superseded duplicate check runs --- .github/scripts/sync_codex_ok_labels.py | 48 ++++++++--- tests/unit/test_sync_codex_ok_labels.py | 106 ++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 12 deletions(-) diff --git a/.github/scripts/sync_codex_ok_labels.py b/.github/scripts/sync_codex_ok_labels.py index dc89cc275..baba87854 100755 --- a/.github/scripts/sync_codex_ok_labels.py +++ b/.github/scripts/sync_codex_ok_labels.py @@ -639,8 +639,19 @@ def classify_check_state( ) -> str: states: list[str] = [] seen_check_names: set[str] = set() + named_check_runs: dict[str, dict[str, Any]] = {} + unnamed_check_runs: list[dict[str, Any]] = [] for item in check_runs: + name = item.get("name") + if isinstance(name, str): + previous = named_check_runs.get(name) + if previous is None or check_run_recency_key(item) >= check_run_recency_key(previous): + named_check_runs[name] = item + else: + unnamed_check_runs.append(item) + + for item in [*named_check_runs.values(), *unnamed_check_runs]: name = item.get("name") if isinstance(name, str): seen_check_names.add(name) @@ -665,6 +676,21 @@ def classify_check_state( return "unknown" +def check_run_recency_key(item: dict[str, Any]) -> tuple[str, str]: + return ( + str( + item.get("started_at") + or item.get("startedAt") + or item.get("created_at") + or item.get("createdAt") + or item.get("completed_at") + or item.get("completedAt") + or "" + ), + str(item.get("completed_at") or item.get("completedAt") or ""), + ) + + def commit_checks_state(repo: str, head_sha: str) -> str: check_runs = paged_api(f"/repos/{repo}/commits/{head_sha}/check-runs") combined_status = gh_api(f"/repos/{repo}/commits/{head_sha}/status") @@ -753,18 +779,16 @@ def unresolved_codex_finding_thread_urls( original_oid = original_commit.get("oid") if isinstance(original_commit, dict) else None commit = comment.get("commit") commit_oid = commit.get("oid") if isinstance(commit, dict) else None - if ( - isinstance(original_oid, str) - and original_oid != head_sha - and not body_mentions_head(body, head_sha) - ): - continue - if ( - original_oid is None - and isinstance(commit_oid, str) - and commit_oid != head_sha - and not body_mentions_head(body, head_sha) - ): + body_mentions_current_head = body_mentions_head(body, head_sha) + if body_mentions_current_head: + pass + elif isinstance(original_oid, str): + if original_oid != head_sha: + continue + elif isinstance(commit_oid, str): + if commit_oid != head_sha: + continue + else: continue url = comment.get("url") urls.append(str(url) if isinstance(url, str) else "unresolved Codex review thread") diff --git a/tests/unit/test_sync_codex_ok_labels.py b/tests/unit/test_sync_codex_ok_labels.py index b5fae1712..8047ad77d 100644 --- a/tests/unit/test_sync_codex_ok_labels.py +++ b/tests/unit/test_sync_codex_ok_labels.py @@ -44,6 +44,97 @@ def decision(module: ModuleType, **overrides: Any) -> Any: return module.SyncDecision(**values) +def test_classify_check_state_uses_latest_run_for_duplicate_check_names() -> None: + module = load_sync_module() + + check_runs = [ + { + "name": "CI Required", + "status": "completed", + "conclusion": "failure", + "completed_at": "2026-06-11T07:40:59Z", + }, + { + "name": "CI Required", + "status": "completed", + "conclusion": "success", + "completed_at": "2026-06-11T07:45:35Z", + }, + { + "name": "Type check (ty)", + "status": "completed", + "conclusion": "success", + "completed_at": "2026-06-11T07:41:20Z", + }, + ] + + assert ( + module.classify_check_state( + check_runs, + {"statuses": []}, + required_check_names=frozenset({"CI Required", "Type check (ty)"}), + ) + == "success" + ) + + +def test_classify_check_state_keeps_latest_pending_duplicate_pending() -> None: + module = load_sync_module() + + check_runs = [ + { + "name": "CI Required", + "status": "completed", + "conclusion": "success", + "completed_at": "2026-06-11T07:40:59Z", + }, + { + "name": "CI Required", + "status": "in_progress", + "conclusion": None, + "started_at": "2026-06-11T07:45:35Z", + }, + ] + + assert ( + module.classify_check_state( + check_runs, + {"statuses": []}, + required_check_names=frozenset({"CI Required"}), + ) + == "pending" + ) + + +def test_classify_check_state_ignores_stale_duplicate_that_finishes_late() -> None: + module = load_sync_module() + + check_runs = [ + { + "name": "CI Required", + "status": "completed", + "conclusion": "failure", + "started_at": "2026-06-11T07:40:59Z", + "completed_at": "2026-06-11T07:50:00Z", + }, + { + "name": "CI Required", + "status": "in_progress", + "conclusion": None, + "started_at": "2026-06-11T07:45:35Z", + }, + ] + + assert ( + module.classify_check_state( + check_runs, + {"statuses": []}, + required_check_names=frozenset({"CI Required"}), + ) + == "pending" + ) + + def test_apply_decision_tolerates_github_app_write_denial(monkeypatch: pytest.MonkeyPatch) -> None: module = load_sync_module() @@ -376,6 +467,21 @@ def test_unresolved_codex_threads_filter_to_current_head(monkeypatch: pytest.Mon ] }, }, + { + "isResolved": False, + "isOutdated": False, + "comments": { + "nodes": [ + { + "author": {"login": "openai-codex"}, + "body": "**[P1]** unresolved stale thread without commit metadata", + "url": "https://example.invalid/no-commit-metadata", + "commit": None, + "originalCommit": None, + } + ] + }, + }, ], } } From 681bfcacf2bc1ba3c6e7e54a1ca86ffd55e91bd1 Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Thu, 11 Jun 2026 13:52:42 +0400 Subject: [PATCH 3/6] docs(openspec): document codex label evidence --- .github/scripts/sync_codex_ok_labels.py | 10 +--- .../proposal.md | 27 +++++++++++ .../specs/github-automation/spec.md | 48 +++++++++++++++++++ .../tasks.md | 17 +++++++ tests/unit/test_sync_codex_ok_labels.py | 26 ++++++++-- 5 files changed, 116 insertions(+), 12 deletions(-) create mode 100644 openspec/changes/fix-codex-label-current-head-evidence/proposal.md create mode 100644 openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md create mode 100644 openspec/changes/fix-codex-label-current-head-evidence/tasks.md diff --git a/.github/scripts/sync_codex_ok_labels.py b/.github/scripts/sync_codex_ok_labels.py index baba87854..949966f71 100755 --- a/.github/scripts/sync_codex_ok_labels.py +++ b/.github/scripts/sync_codex_ok_labels.py @@ -777,17 +777,11 @@ def unresolved_codex_finding_thread_urls( body = comment.get("body") original_commit = comment.get("originalCommit") original_oid = original_commit.get("oid") if isinstance(original_commit, dict) else None - commit = comment.get("commit") - commit_oid = commit.get("oid") if isinstance(commit, dict) else None body_mentions_current_head = body_mentions_head(body, head_sha) if body_mentions_current_head: pass - elif isinstance(original_oid, str): - if original_oid != head_sha: - continue - elif isinstance(commit_oid, str): - if commit_oid != head_sha: - continue + elif original_oid == head_sha: + pass else: continue url = comment.get("url") diff --git a/openspec/changes/fix-codex-label-current-head-evidence/proposal.md b/openspec/changes/fix-codex-label-current-head-evidence/proposal.md new file mode 100644 index 000000000..05421c541 --- /dev/null +++ b/openspec/changes/fix-codex-label-current-head-evidence/proposal.md @@ -0,0 +1,27 @@ +# Fix Codex label current-head evidence + +## Why + +The Codex label synchronizer can leave `needs work` on rebased pull requests +when a stale inline review thread remains unresolved but no longer belongs to +the current head. It can also misclassify duplicate check runs when an older +run completes after a newer rerun starts. + +Both cases make PR readiness depend on stale GitHub evidence instead of the +current head. + +## What Changes + +- Filter unresolved Codex inline review threads with the same current-head and + original-commit evidence rules used for REST review comments. +- Treat unresolved threads without current-head evidence as stale rather than + blocking. +- Deduplicate check runs by run start/creation recency instead of completion + time so late-finishing superseded runs cannot override newer reruns. + +## Impact + +- **Spec**: `github-automation` +- **Code**: `.github/scripts/sync_codex_ok_labels.py` +- **Tests**: `tests/unit/test_sync_codex_ok_labels.py` + diff --git a/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md b/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md new file mode 100644 index 000000000..b1668bc99 --- /dev/null +++ b/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md @@ -0,0 +1,48 @@ +## MODIFIED Requirements + +### Requirement: Codex review label sync uses current-head evidence + +The Codex label synchronization script MUST grant `🤖 codex: ok` only when the +current pull-request head has green required checks, a clean Codex review for +that head, and no unresolved current-head Codex finding threads. It MUST treat +stale unresolved Codex inline threads as non-blocking when neither their +original commit nor their body text ties them to the current head. + +#### Scenario: stale rebased inline thread remains unresolved + +- **GIVEN** a pull request was rebased after a Codex inline finding +- **AND** the unresolved GraphQL review thread still reports `isOutdated=false` +- **AND** the thread's original commit is not the current head +- **AND** the thread body does not mention the current head +- **WHEN** the label synchronizer evaluates the pull request +- **THEN** that thread does not force `🤖 codex: needs work` + +#### Scenario: unresolved inline thread belongs to the current head + +- **GIVEN** an unresolved Codex inline finding thread +- **AND** the thread's original commit is the pull request head +- **WHEN** the label synchronizer evaluates the pull request +- **THEN** that thread blocks `🤖 codex: ok` +- **AND** the synchronizer records a needs-work reason that links to the thread + +#### Scenario: unresolved inline thread mentions the current head explicitly + +- **GIVEN** an unresolved Codex inline finding thread +- **AND** the thread body mentions the current pull request head +- **WHEN** the label synchronizer evaluates the pull request +- **THEN** that thread blocks `🤖 codex: ok` +- **AND** the synchronizer records a needs-work reason that links to the thread + +### Requirement: Codex label sync MUST use check-run recency evidence + +When multiple check runs have the same context name on a pull-request head, the label synchronizer MUST classify the current context from the newest run by +start or creation time. Completion time MUST NOT let an older superseded run +override a newer rerun that has already started. + +#### Scenario: older duplicate run completes after a newer rerun starts + +- **GIVEN** two check runs share the same name +- **AND** the older run started first but completes after the newer run starts +- **WHEN** the label synchronizer deduplicates check runs +- **THEN** it keeps the newer run +- **AND** a pending newer run keeps the pull request check state pending instead of failed diff --git a/openspec/changes/fix-codex-label-current-head-evidence/tasks.md b/openspec/changes/fix-codex-label-current-head-evidence/tasks.md new file mode 100644 index 000000000..4791f4a23 --- /dev/null +++ b/openspec/changes/fix-codex-label-current-head-evidence/tasks.md @@ -0,0 +1,17 @@ +## 1. Codex Review Evidence + +- [x] 1.1 Filter unresolved Codex inline review threads by current-head evidence. +- [x] 1.2 Preserve body-head fallback only when the review comment body mentions the current head. +- [x] 1.3 Treat threads with neither current-head commit metadata nor current-head body evidence as stale. + +## 2. Check Run Evidence + +- [x] 2.1 Deduplicate same-named check runs by start/creation recency. +- [x] 2.2 Keep completion time as a fallback when start/creation metadata is absent. +- [x] 2.3 Cover late-finishing superseded runs with regression tests. + +## 3. Verification + +- [x] 3.1 Run focused unit tests for the synchronizer. +- [x] 3.2 Run ruff format/check and ty on the synchronizer and tests. + diff --git a/tests/unit/test_sync_codex_ok_labels.py b/tests/unit/test_sync_codex_ok_labels.py index 8047ad77d..dfb5563d0 100644 --- a/tests/unit/test_sync_codex_ok_labels.py +++ b/tests/unit/test_sync_codex_ok_labels.py @@ -429,8 +429,8 @@ def test_unresolved_codex_threads_filter_to_current_head(monkeypatch: pytest.Mon "nodes": [ { "author": {"login": "openai-codex"}, - "body": "**[P1]** old finding", - "url": "https://example.invalid/stale", + "body": "**[P1]** rebased stale finding", + "url": "https://example.invalid/reanchored-stale", "commit": {"oid": head_sha}, "originalCommit": {"oid": old_sha}, } @@ -461,7 +461,22 @@ def test_unresolved_codex_threads_filter_to_current_head(monkeypatch: pytest.Mon "author": {"login": "openai-codex"}, "body": f"**[P2]** stale fallback for {head_sha[:12]}", "url": "https://example.invalid/fallback", - "commit": {"oid": head_sha}, + "commit": {"oid": old_sha}, + "originalCommit": {"oid": old_sha}, + } + ] + }, + }, + { + "isResolved": False, + "isOutdated": False, + "comments": { + "nodes": [ + { + "author": {"login": "openai-codex"}, + "body": "**[P2]** stale old commit finding", + "url": "https://example.invalid/stale", + "commit": {"oid": old_sha}, "originalCommit": {"oid": old_sha}, } ] @@ -499,4 +514,7 @@ def test_unresolved_codex_threads_filter_to_current_head(monkeypatch: pytest.Mon allowed_authors={"openai-codex"}, ) - assert urls == ("https://example.invalid/current", "https://example.invalid/fallback") + assert urls == ( + "https://example.invalid/current", + "https://example.invalid/fallback", + ) From d71cb6480b3249617ca4fdcbe91f4492c2123853 Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Sun, 14 Jun 2026 22:59:11 +0400 Subject: [PATCH 4/6] fix(ci): ignore resolved Codex inline findings --- .github/scripts/sync_codex_ok_labels.py | 54 +++++++++++++++++- .../proposal.md | 1 - .../tasks.md | 1 - .../proposal.md | 19 +++++++ .../specs/github-automation/spec.md | 23 ++++++++ .../tasks.md | 6 ++ tests/unit/test_sync_codex_ok_labels.py | 55 +++++++++++++++++++ 7 files changed, 156 insertions(+), 3 deletions(-) create mode 100644 openspec/changes/ignore-resolved-codex-inline-findings/proposal.md create mode 100644 openspec/changes/ignore-resolved-codex-inline-findings/specs/github-automation/spec.md create mode 100644 openspec/changes/ignore-resolved-codex-inline-findings/tasks.md diff --git a/.github/scripts/sync_codex_ok_labels.py b/.github/scripts/sync_codex_ok_labels.py index 949966f71..512a6f06e 100755 --- a/.github/scripts/sync_codex_ok_labels.py +++ b/.github/scripts/sync_codex_ok_labels.py @@ -387,8 +387,57 @@ def timeline_node_timestamp(node: dict[str, Any]) -> str | None: return None +def unresolved_review_comment_urls(repo: str, number: int) -> set[str]: + owner, name = repo.split("/", 1) + after: str | None = None + urls: set[str] = set() + + while True: + fields: dict[str, object] = {"owner": owner, "name": name, "number": number} + if after is not None: + fields["after"] = after + payload = graphql(PR_REVIEW_THREADS_QUERY, **fields) + pr = payload.get("data", {}).get("repository", {}).get("pullRequest") + if not isinstance(pr, dict): + raise GhError(f"{repo}#{number}: GraphQL did not return a pull request") + + threads = pr.get("reviewThreads") + if not isinstance(threads, dict): + raise GhError(f"{repo}#{number}: GraphQL did not return review threads") + nodes = threads.get("nodes", []) + if not isinstance(nodes, list): + raise GhError(f"{repo}#{number}: GraphQL did not return review thread nodes") + + for thread in nodes: + if not isinstance(thread, dict): + continue + if thread.get("isResolved") or thread.get("isOutdated"): + continue + comments = thread.get("comments") + comment_nodes = comments.get("nodes") if isinstance(comments, dict) else [] + if not isinstance(comment_nodes, list): + continue + for comment in comment_nodes: + if not isinstance(comment, dict): + continue + url = comment.get("url") + if isinstance(url, str): + urls.add(url) + + page_info = threads.get("pageInfo") + if not isinstance(page_info, dict) or not page_info.get("hasNextPage"): + break + end_cursor = page_info.get("endCursor") + if not isinstance(end_cursor, str) or not end_cursor: + break + after = end_cursor + + return urls + + def pull_review_comment_nodes(repo: str, number: int, *, head_sha: str) -> list[dict[str, Any]]: comments = paged_api(f"/repos/{repo}/pulls/{number}/comments") + unresolved_urls = unresolved_review_comment_urls(repo, number) nodes: list[dict[str, Any]] = [] for comment in comments: body = comment.get("body") @@ -402,6 +451,9 @@ def pull_review_comment_nodes(repo: str, number: int, *, head_sha: str) -> list[ continue effective_commit_id = original_commit_id if original_matches_head else commit_id effective_review_id = review_id if original_matches_head else None + html_url = comment.get("html_url") or comment.get("url") + if is_needs_work_codex_body(body) and html_url not in unresolved_urls: + continue user = comment.get("user") login = user.get("login") if isinstance(user, dict) else None nodes.append( @@ -410,7 +462,7 @@ def pull_review_comment_nodes(repo: str, number: int, *, head_sha: str) -> list[ "author": {"login": login} if isinstance(login, str) else None, "bodyText": body, "createdAt": comment.get("created_at"), - "url": comment.get("html_url") or comment.get("url"), + "url": html_url, "commit": {"oid": effective_commit_id} if isinstance(effective_commit_id, str) else None, "pullRequestReviewDatabaseId": effective_review_id if isinstance(effective_review_id, int) else None, } diff --git a/openspec/changes/fix-codex-label-current-head-evidence/proposal.md b/openspec/changes/fix-codex-label-current-head-evidence/proposal.md index 05421c541..c5895f85b 100644 --- a/openspec/changes/fix-codex-label-current-head-evidence/proposal.md +++ b/openspec/changes/fix-codex-label-current-head-evidence/proposal.md @@ -24,4 +24,3 @@ current head. - **Spec**: `github-automation` - **Code**: `.github/scripts/sync_codex_ok_labels.py` - **Tests**: `tests/unit/test_sync_codex_ok_labels.py` - diff --git a/openspec/changes/fix-codex-label-current-head-evidence/tasks.md b/openspec/changes/fix-codex-label-current-head-evidence/tasks.md index 4791f4a23..3ab05f0d0 100644 --- a/openspec/changes/fix-codex-label-current-head-evidence/tasks.md +++ b/openspec/changes/fix-codex-label-current-head-evidence/tasks.md @@ -14,4 +14,3 @@ - [x] 3.1 Run focused unit tests for the synchronizer. - [x] 3.2 Run ruff format/check and ty on the synchronizer and tests. - diff --git a/openspec/changes/ignore-resolved-codex-inline-findings/proposal.md b/openspec/changes/ignore-resolved-codex-inline-findings/proposal.md new file mode 100644 index 000000000..643d60e4a --- /dev/null +++ b/openspec/changes/ignore-resolved-codex-inline-findings/proposal.md @@ -0,0 +1,19 @@ +## Why + +The Codex review label synchronizer can keep `🤖 codex: needs work` on a PR +after a maintainer resolves an inline Codex finding. The unresolved-thread check +already knows resolved review threads are no longer blockers, but the timeline +classifier still merges the old inline review comment and treats it as current +needs-work evidence. + +## What Changes + +- Ignore inline Codex finding comments from resolved or outdated review threads + when computing current-head review state. +- Keep unresolved inline Codex finding comments as needs-work evidence. +- Add regression coverage for both resolved and unresolved inline findings. + +## Impact + +- GitHub automation only. +- No application runtime behavior changes. diff --git a/openspec/changes/ignore-resolved-codex-inline-findings/specs/github-automation/spec.md b/openspec/changes/ignore-resolved-codex-inline-findings/specs/github-automation/spec.md new file mode 100644 index 000000000..446ecca48 --- /dev/null +++ b/openspec/changes/ignore-resolved-codex-inline-findings/specs/github-automation/spec.md @@ -0,0 +1,23 @@ +## ADDED Requirements + +### Requirement: Codex review label sync review-thread state + +The Codex label synchronization script MUST treat unresolved, non-outdated +Codex inline review findings on the current head as needs-work evidence. It +MUST NOT treat inline Codex findings from resolved or outdated review threads as +active needs-work evidence. + +#### Scenario: Resolved inline finding no longer blocks the ok label + +- **WHEN** a current-head inline Codex finding comment belongs to a resolved + review thread +- **AND** a clean current-head Codex review exists +- **THEN** the script does not classify that inline finding as active + needs-work evidence + +#### Scenario: Unresolved inline finding still blocks the ok label + +- **WHEN** a current-head inline Codex finding comment belongs to an unresolved, + non-outdated review thread +- **THEN** the script classifies that inline finding as active needs-work + evidence diff --git a/openspec/changes/ignore-resolved-codex-inline-findings/tasks.md b/openspec/changes/ignore-resolved-codex-inline-findings/tasks.md new file mode 100644 index 000000000..7681377be --- /dev/null +++ b/openspec/changes/ignore-resolved-codex-inline-findings/tasks.md @@ -0,0 +1,6 @@ +## Implementation + +- [x] Filter current-head inline Codex finding comments by unresolved review-thread URLs. +- [x] Preserve unresolved inline finding comments as needs-work evidence. +- [x] Add unit coverage for resolved and unresolved inline findings. +- [x] Validate the focused test and OpenSpec gates. diff --git a/tests/unit/test_sync_codex_ok_labels.py b/tests/unit/test_sync_codex_ok_labels.py index dfb5563d0..8a09336de 100644 --- a/tests/unit/test_sync_codex_ok_labels.py +++ b/tests/unit/test_sync_codex_ok_labels.py @@ -348,6 +348,7 @@ def test_pull_review_comment_nodes_uses_original_commit_or_head_reference(monkey ] monkeypatch.setattr(module, "paged_api", lambda _path: comment_data) + monkeypatch.setattr(module, "unresolved_review_comment_urls", lambda *_args: set()) nodes = module.pull_review_comment_nodes("Soju06/codex-lb", 714, head_sha=head_sha) @@ -518,3 +519,57 @@ def test_unresolved_codex_threads_filter_to_current_head(monkeypatch: pytest.Mon "https://example.invalid/current", "https://example.invalid/fallback", ) + + +def test_resolved_inline_codex_finding_does_not_count_as_review_news( + monkeypatch: pytest.MonkeyPatch, +) -> None: + module = load_sync_module() + + monkeypatch.setattr( + module, + "paged_api", + lambda _path: [ + { + "body": "**P1 Badge** resolved finding", + "commit_id": "a" * 40, + "original_commit_id": "a" * 40, + "pull_request_review_id": 123, + "html_url": "https://github.test/review/resolved", + "created_at": "2026-06-14T00:00:00Z", + "user": {"login": "chatgpt-codex-connector"}, + } + ], + ) + monkeypatch.setattr(module, "unresolved_review_comment_urls", lambda *_args: set()) + + assert module.pull_review_comment_nodes("Soju06/codex-lb", 714, head_sha="a" * 40) == [] + + +def test_unresolved_inline_codex_finding_counts_as_review_news( + monkeypatch: pytest.MonkeyPatch, +) -> None: + module = load_sync_module() + url = "https://github.test/review/unresolved" + + monkeypatch.setattr( + module, + "paged_api", + lambda _path: [ + { + "body": "**P1 Badge** unresolved finding", + "commit_id": "a" * 40, + "original_commit_id": "a" * 40, + "pull_request_review_id": 123, + "html_url": url, + "created_at": "2026-06-14T00:00:00Z", + "user": {"login": "chatgpt-codex-connector"}, + } + ], + ) + monkeypatch.setattr(module, "unresolved_review_comment_urls", lambda *_args: {url}) + + nodes = module.pull_review_comment_nodes("Soju06/codex-lb", 714, head_sha="a" * 40) + + assert len(nodes) == 1 + assert nodes[0]["url"] == url From 706eff12fc9d8a270b0fe4b1653561423881429f Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Tue, 16 Jun 2026 03:33:45 +0400 Subject: [PATCH 5/6] fix(ci): keep current commit Codex evidence --- .github/scripts/sync_codex_ok_labels.py | 8 +++++++- .../specs/github-automation/spec.md | 12 +++++++++++- tests/unit/test_sync_codex_ok_labels.py | 11 ++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/.github/scripts/sync_codex_ok_labels.py b/.github/scripts/sync_codex_ok_labels.py index 512a6f06e..c54329481 100755 --- a/.github/scripts/sync_codex_ok_labels.py +++ b/.github/scripts/sync_codex_ok_labels.py @@ -446,8 +446,10 @@ def pull_review_comment_nodes(repo: str, number: int, *, head_sha: str) -> list[ commit_id = comment.get("commit_id") original_commit_id = comment.get("original_commit_id") review_id = comment.get("pull_request_review_id") + commit_matches_head = commit_id == head_sha original_matches_head = original_commit_id == head_sha - if not original_matches_head and not body_mentions_head(body, head_sha): + body_mentions_current_head = body_mentions_head(body, head_sha) + if not commit_matches_head and not original_matches_head and not body_mentions_current_head: continue effective_commit_id = original_commit_id if original_matches_head else commit_id effective_review_id = review_id if original_matches_head else None @@ -827,11 +829,15 @@ def unresolved_codex_finding_thread_urls( if not is_needs_work_codex_body(comment.get("body")): continue body = comment.get("body") + commit = comment.get("commit") + commit_oid = commit.get("oid") if isinstance(commit, dict) else None original_commit = comment.get("originalCommit") original_oid = original_commit.get("oid") if isinstance(original_commit, dict) else None body_mentions_current_head = body_mentions_head(body, head_sha) if body_mentions_current_head: pass + elif commit_oid == head_sha: + pass elif original_oid == head_sha: pass else: diff --git a/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md b/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md index b1668bc99..2deb09b43 100644 --- a/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md +++ b/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md @@ -6,17 +6,27 @@ The Codex label synchronization script MUST grant `🤖 codex: ok` only when the current pull-request head has green required checks, a clean Codex review for that head, and no unresolved current-head Codex finding threads. It MUST treat stale unresolved Codex inline threads as non-blocking when neither their -original commit nor their body text ties them to the current head. +current commit, original commit, nor body text ties them to the current head. #### Scenario: stale rebased inline thread remains unresolved - **GIVEN** a pull request was rebased after a Codex inline finding - **AND** the unresolved GraphQL review thread still reports `isOutdated=false` +- **AND** the thread's current commit is not the current head - **AND** the thread's original commit is not the current head - **AND** the thread body does not mention the current head - **WHEN** the label synchronizer evaluates the pull request - **THEN** that thread does not force `🤖 codex: needs work` +#### Scenario: reanchored unresolved inline thread belongs to the current head + +- **GIVEN** an unresolved Codex inline finding thread +- **AND** the thread's current commit is the pull request head +- **AND** the thread's original commit is older than the pull request head +- **WHEN** the label synchronizer evaluates the pull request +- **THEN** that thread blocks `🤖 codex: ok` +- **AND** the synchronizer records a needs-work reason that links to the thread + #### Scenario: unresolved inline thread belongs to the current head - **GIVEN** an unresolved Codex inline finding thread diff --git a/tests/unit/test_sync_codex_ok_labels.py b/tests/unit/test_sync_codex_ok_labels.py index 8a09336de..9321f6232 100644 --- a/tests/unit/test_sync_codex_ok_labels.py +++ b/tests/unit/test_sync_codex_ok_labels.py @@ -310,7 +310,7 @@ def test_pull_review_comment_nodes_uses_original_commit_or_head_reference(monkey old_sha = "b" * 40 comment_data = [ { - "body": "this review is on old code only", + "body": "reanchored current-head inline review", "commit_id": head_sha, "original_commit_id": old_sha, "pull_request_review_id": 1, @@ -352,8 +352,8 @@ def test_pull_review_comment_nodes_uses_original_commit_or_head_reference(monkey nodes = module.pull_review_comment_nodes("Soju06/codex-lb", 714, head_sha=head_sha) - assert [node.get("commit", {}).get("oid") for node in nodes] == [head_sha, head_sha] - assert [node.get("pullRequestReviewDatabaseId") for node in nodes] == [None, 3] + assert [node.get("commit", {}).get("oid") for node in nodes] == [head_sha, head_sha, head_sha] + assert [node.get("pullRequestReviewDatabaseId") for node in nodes] == [None, None, 3] def test_head_mentioned_fallback_comment_keeps_timeline_chronology() -> None: @@ -430,8 +430,8 @@ def test_unresolved_codex_threads_filter_to_current_head(monkeypatch: pytest.Mon "nodes": [ { "author": {"login": "openai-codex"}, - "body": "**[P1]** rebased stale finding", - "url": "https://example.invalid/reanchored-stale", + "body": "**[P1]** reanchored current-head finding", + "url": "https://example.invalid/reanchored-current", "commit": {"oid": head_sha}, "originalCommit": {"oid": old_sha}, } @@ -516,6 +516,7 @@ def test_unresolved_codex_threads_filter_to_current_head(monkeypatch: pytest.Mon ) assert urls == ( + "https://example.invalid/reanchored-current", "https://example.invalid/current", "https://example.invalid/fallback", ) From 6e24755d021e6cdf0662875f0a6fec7b8f8a86b9 Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Tue, 16 Jun 2026 03:54:08 +0400 Subject: [PATCH 6/6] ci: resync codex labels after thread resolution --- .github/workflows/codex-review-labels.yml | 12 ++++++++++-- .../specs/github-automation/spec.md | 7 +++++++ tests/unit/test_sync_codex_ok_labels.py | 4 ++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/.github/workflows/codex-review-labels.yml b/.github/workflows/codex-review-labels.yml index c36958715..eb4a17d3d 100644 --- a/.github/workflows/codex-review-labels.yml +++ b/.github/workflows/codex-review-labels.yml @@ -10,6 +10,10 @@ on: types: [created, edited] pull_request_review: types: [submitted, edited, dismissed] + pull_request_review_thread: + types: [resolved, unresolved] + schedule: + - cron: "*/15 * * * *" permissions: actions: write @@ -31,6 +35,7 @@ jobs: ${{ github.event_name == 'pull_request_target' || github.event_name == 'pull_request_review' || + github.event_name == 'pull_request_review_thread' || (github.event_name == 'issue_comment' && github.event.issue.pull_request) }} @@ -58,8 +63,11 @@ jobs: runs-on: ubuntu-24.04 if: >- ${{ - github.event_name == 'workflow_run' && - github.event.workflow_run.event == 'pull_request' + github.event_name == 'schedule' || + ( + github.event_name == 'workflow_run' && + github.event.workflow_run.event == 'pull_request' + ) }} steps: diff --git a/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md b/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md index 2deb09b43..3c4ea9790 100644 --- a/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md +++ b/openspec/changes/fix-codex-label-current-head-evidence/specs/github-automation/spec.md @@ -43,6 +43,13 @@ current commit, original commit, nor body text ties them to the current head. - **THEN** that thread blocks `🤖 codex: ok` - **AND** the synchronizer records a needs-work reason that links to the thread +#### Scenario: resolved inline thread triggers label resynchronization + +- **GIVEN** a pull request has a `🤖 codex: needs work` label from an unresolved Codex inline finding +- **WHEN** that review thread is resolved +- **THEN** the Codex label synchronization workflow runs for that pull request +- **AND** a scheduled fallback also resynchronizes open pull requests when no review-thread event is delivered + ### Requirement: Codex label sync MUST use check-run recency evidence When multiple check runs have the same context name on a pull-request head, the label synchronizer MUST classify the current context from the newest run by diff --git a/tests/unit/test_sync_codex_ok_labels.py b/tests/unit/test_sync_codex_ok_labels.py index 9321f6232..30e2a4301 100644 --- a/tests/unit/test_sync_codex_ok_labels.py +++ b/tests/unit/test_sync_codex_ok_labels.py @@ -219,6 +219,10 @@ def test_workflow_prefers_privileged_token_and_enables_tolerant_apply() -> None: workflow = Path(".github/workflows/codex-review-labels.yml").read_text(encoding="utf-8") assert "secrets.CODEX_LABEL_SYNC_TOKEN || secrets.RELEASE_PLEASE_TOKEN || github.token" in workflow + assert "pull_request_review_thread:" in workflow + assert "types: [resolved, unresolved]" in workflow + assert "github.event_name == 'pull_request_review_thread'" in workflow + assert 'cron: "*/15 * * * *"' in workflow assert workflow.count("--tolerate-write-permission-errors") == 2 assert workflow.count("--tolerate-read-errors") == 1