Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 134 additions & 6 deletions .github/scripts/sync_codex_ok_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,12 @@
}
body
url
commit {
oid
}
originalCommit {
oid
}
}
}
}
Expand Down Expand Up @@ -373,8 +379,65 @@ def timeline_head_oid(node: dict[str, Any]) -> str | None:
return None


def timeline_node_timestamp(node: dict[str, Any]) -> str | None:
for key in ("createdAt", "submittedAt", "committedDate"):
value = node.get(key)
if isinstance(value, str) and value:
return value
return None


def unresolved_review_comment_urls(repo: str, number: int) -> set[str]:
owner, name = repo.split("/", 1)
after: str | None = None
urls: set[str] = set()

while True:
fields: dict[str, object] = {"owner": owner, "name": name, "number": number}
if after is not None:
fields["after"] = after
payload = graphql(PR_REVIEW_THREADS_QUERY, **fields)
pr = payload.get("data", {}).get("repository", {}).get("pullRequest")
if not isinstance(pr, dict):
raise GhError(f"{repo}#{number}: GraphQL did not return a pull request")

threads = pr.get("reviewThreads")
if not isinstance(threads, dict):
raise GhError(f"{repo}#{number}: GraphQL did not return review threads")
nodes = threads.get("nodes", [])
if not isinstance(nodes, list):
raise GhError(f"{repo}#{number}: GraphQL did not return review thread nodes")

for thread in nodes:
if not isinstance(thread, dict):
continue
if thread.get("isResolved") or thread.get("isOutdated"):
continue
comments = thread.get("comments")
comment_nodes = comments.get("nodes") if isinstance(comments, dict) else []
if not isinstance(comment_nodes, list):
continue
for comment in comment_nodes:
if not isinstance(comment, dict):
continue
url = comment.get("url")
if isinstance(url, str):
urls.add(url)

page_info = threads.get("pageInfo")
if not isinstance(page_info, dict) or not page_info.get("hasNextPage"):
break
end_cursor = page_info.get("endCursor")
if not isinstance(end_cursor, str) or not end_cursor:
break
after = end_cursor

return urls


def pull_review_comment_nodes(repo: str, number: int, *, head_sha: str) -> list[dict[str, Any]]:
comments = paged_api(f"/repos/{repo}/pulls/{number}/comments")
unresolved_urls = unresolved_review_comment_urls(repo, number)
nodes: list[dict[str, Any]] = []
for comment in comments:
body = comment.get("body")
Expand All @@ -383,7 +446,15 @@ def pull_review_comment_nodes(repo: str, number: int, *, head_sha: str) -> list[
commit_id = comment.get("commit_id")
original_commit_id = comment.get("original_commit_id")
review_id = comment.get("pull_request_review_id")
if commit_id != head_sha and original_commit_id != head_sha:
commit_matches_head = commit_id == head_sha
original_matches_head = original_commit_id == head_sha
body_mentions_current_head = body_mentions_head(body, head_sha)
if not commit_matches_head and not original_matches_head and not body_mentions_current_head:
continue
effective_commit_id = original_commit_id if original_matches_head else commit_id
effective_review_id = review_id if original_matches_head else None
Comment thread
Komzpa marked this conversation as resolved.
html_url = comment.get("html_url") or comment.get("url")
if is_needs_work_codex_body(body) and html_url not in unresolved_urls:
Comment thread
Komzpa marked this conversation as resolved.
continue
user = comment.get("user")
login = user.get("login") if isinstance(user, dict) else None
Expand All @@ -393,9 +464,9 @@ def pull_review_comment_nodes(repo: str, number: int, *, head_sha: str) -> list[
"author": {"login": login} if isinstance(login, str) else None,
"bodyText": body,
"createdAt": comment.get("created_at"),
"url": comment.get("html_url") or comment.get("url"),
"commit": {"oid": commit_id} if isinstance(commit_id, str) else None,
"pullRequestReviewDatabaseId": review_id if isinstance(review_id, int) else None,
"url": html_url,
"commit": {"oid": effective_commit_id} if isinstance(effective_commit_id, str) else None,
"pullRequestReviewDatabaseId": effective_review_id if isinstance(effective_review_id, int) else None,
}
)
return nodes
Expand Down Expand Up @@ -433,7 +504,18 @@ def merge_review_comment_nodes(
for node in comment_nodes:
if id(node) not in placed_ids and node not in unplaced:
unplaced.append(node)
merged.extend(unplaced)
for node in unplaced:
timestamp = timeline_node_timestamp(node)
if timestamp is None:
merged.append(node)
continue
insert_at = len(merged)
for index, candidate in enumerate(merged):
candidate_timestamp = timeline_node_timestamp(candidate)
if candidate_timestamp is not None and candidate_timestamp > timestamp:
insert_at = index
break
merged.insert(insert_at, node)
return merged


Expand Down Expand Up @@ -611,8 +693,19 @@ def classify_check_state(
) -> str:
states: list[str] = []
seen_check_names: set[str] = set()
named_check_runs: dict[str, dict[str, Any]] = {}
unnamed_check_runs: list[dict[str, Any]] = []

for item in check_runs:
name = item.get("name")
if isinstance(name, str):
previous = named_check_runs.get(name)
if previous is None or check_run_recency_key(item) >= check_run_recency_key(previous):
named_check_runs[name] = item
else:
unnamed_check_runs.append(item)

for item in [*named_check_runs.values(), *unnamed_check_runs]:
name = item.get("name")
if isinstance(name, str):
seen_check_names.add(name)
Expand All @@ -637,6 +730,21 @@ def classify_check_state(
return "unknown"


def check_run_recency_key(item: dict[str, Any]) -> tuple[str, str]:
return (
str(
item.get("started_at")
or item.get("startedAt")
or item.get("created_at")
or item.get("createdAt")
or item.get("completed_at")
or item.get("completedAt")
or ""
),
str(item.get("completed_at") or item.get("completedAt") or ""),
)


def commit_checks_state(repo: str, head_sha: str) -> str:
check_runs = paged_api(f"/repos/{repo}/commits/{head_sha}/check-runs")
combined_status = gh_api(f"/repos/{repo}/commits/{head_sha}/status")
Expand Down Expand Up @@ -679,6 +787,7 @@ def unresolved_codex_finding_thread_urls(
repo: str,
number: int,
*,
head_sha: str,
allowed_authors: set[str],
) -> tuple[str, ...]:
owner, name = repo.split("/", 1)
Expand Down Expand Up @@ -719,6 +828,20 @@ def unresolved_codex_finding_thread_urls(
continue
if not is_needs_work_codex_body(comment.get("body")):
continue
body = comment.get("body")
commit = comment.get("commit")
commit_oid = commit.get("oid") if isinstance(commit, dict) else None
original_commit = comment.get("originalCommit")
original_oid = original_commit.get("oid") if isinstance(original_commit, dict) else None
body_mentions_current_head = body_mentions_head(body, head_sha)
if body_mentions_current_head:
pass
elif commit_oid == head_sha:
pass
elif original_oid == head_sha:
pass
else:
continue
url = comment.get("url")
urls.append(str(url) if isinstance(url, str) else "unresolved Codex review thread")

Expand Down Expand Up @@ -840,7 +963,12 @@ def decide_pr(
head_sha=head_sha,
allowed_authors=allowed_authors,
)
unresolved_finding_urls = unresolved_codex_finding_thread_urls(repo, number, allowed_authors=allowed_authors)
unresolved_finding_urls = unresolved_codex_finding_thread_urls(
repo,
number,
head_sha=head_sha,
allowed_authors=allowed_authors,
)
has_codex_news = has_codex_news_after_current_head(
timeline_nodes,
head_sha=head_sha,
Expand Down
26 changes: 26 additions & 0 deletions openspec/changes/fix-codex-label-current-head-evidence/proposal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Fix Codex label current-head evidence

## Why

The Codex label synchronizer can leave `needs work` on rebased pull requests
when a stale inline review thread remains unresolved but no longer belongs to
the current head. It can also misclassify duplicate check runs when an older
run completes after a newer rerun starts.

Both cases make PR readiness depend on stale GitHub evidence instead of the
current head.

## What Changes

- Filter unresolved Codex inline review threads with the same current-head and
original-commit evidence rules used for REST review comments.
- Treat unresolved threads without current-head evidence as stale rather than
blocking.
- Deduplicate check runs by run start/creation recency instead of completion
time so late-finishing superseded runs cannot override newer reruns.

## Impact

- **Spec**: `github-automation`
- **Code**: `.github/scripts/sync_codex_ok_labels.py`
- **Tests**: `tests/unit/test_sync_codex_ok_labels.py`
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
## MODIFIED Requirements

### Requirement: Codex review label sync uses current-head evidence

The Codex label synchronization script MUST grant `🤖 codex: ok` only when the
current pull-request head has green required checks, a clean Codex review for
that head, and no unresolved current-head Codex finding threads. It MUST treat
stale unresolved Codex inline threads as non-blocking when neither their
current commit, original commit, nor body text ties them to the current head.

#### Scenario: stale rebased inline thread remains unresolved

- **GIVEN** a pull request was rebased after a Codex inline finding
- **AND** the unresolved GraphQL review thread still reports `isOutdated=false`
- **AND** the thread's current commit is not the current head
- **AND** the thread's original commit is not the current head
- **AND** the thread body does not mention the current head
- **WHEN** the label synchronizer evaluates the pull request
- **THEN** that thread does not force `🤖 codex: needs work`

#### Scenario: reanchored unresolved inline thread belongs to the current head

- **GIVEN** an unresolved Codex inline finding thread
- **AND** the thread's current commit is the pull request head
- **AND** the thread's original commit is older than the pull request head
- **WHEN** the label synchronizer evaluates the pull request
- **THEN** that thread blocks `🤖 codex: ok`
- **AND** the synchronizer records a needs-work reason that links to the thread

#### Scenario: unresolved inline thread belongs to the current head

- **GIVEN** an unresolved Codex inline finding thread
- **AND** the thread's original commit is the pull request head
- **WHEN** the label synchronizer evaluates the pull request
- **THEN** that thread blocks `🤖 codex: ok`
- **AND** the synchronizer records a needs-work reason that links to the thread

#### Scenario: unresolved inline thread mentions the current head explicitly

- **GIVEN** an unresolved Codex inline finding thread
- **AND** the thread body mentions the current pull request head
- **WHEN** the label synchronizer evaluates the pull request
- **THEN** that thread blocks `🤖 codex: ok`
- **AND** the synchronizer records a needs-work reason that links to the thread

### Requirement: Codex label sync MUST use check-run recency evidence

When multiple check runs have the same context name on a pull-request head, the label synchronizer MUST classify the current context from the newest run by
start or creation time. Completion time MUST NOT let an older superseded run
override a newer rerun that has already started.

#### Scenario: older duplicate run completes after a newer rerun starts

- **GIVEN** two check runs share the same name
- **AND** the older run started first but completes after the newer run starts
- **WHEN** the label synchronizer deduplicates check runs
- **THEN** it keeps the newer run
- **AND** a pending newer run keeps the pull request check state pending instead of failed
16 changes: 16 additions & 0 deletions openspec/changes/fix-codex-label-current-head-evidence/tasks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
## 1. Codex Review Evidence

- [x] 1.1 Filter unresolved Codex inline review threads by current-head evidence.
- [x] 1.2 Preserve body-head fallback only when the review comment body mentions the current head.
- [x] 1.3 Treat threads with neither current-head commit metadata nor current-head body evidence as stale.

## 2. Check Run Evidence

- [x] 2.1 Deduplicate same-named check runs by start/creation recency.
- [x] 2.2 Keep completion time as a fallback when start/creation metadata is absent.
- [x] 2.3 Cover late-finishing superseded runs with regression tests.

## 3. Verification

- [x] 3.1 Run focused unit tests for the synchronizer.
- [x] 3.2 Run ruff format/check and ty on the synchronizer and tests.
19 changes: 19 additions & 0 deletions openspec/changes/ignore-resolved-codex-inline-findings/proposal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## Why

The Codex review label synchronizer can keep `🤖 codex: needs work` on a PR
after a maintainer resolves an inline Codex finding. The unresolved-thread check
already knows resolved review threads are no longer blockers, but the timeline
classifier still merges the old inline review comment and treats it as current
needs-work evidence.

## What Changes

- Ignore inline Codex finding comments from resolved or outdated review threads
when computing current-head review state.
- Keep unresolved inline Codex finding comments as needs-work evidence.
- Add regression coverage for both resolved and unresolved inline findings.

## Impact

- GitHub automation only.
- No application runtime behavior changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
## ADDED Requirements

### Requirement: Codex review label sync review-thread state

The Codex label synchronization script MUST treat unresolved, non-outdated
Codex inline review findings on the current head as needs-work evidence. It
MUST NOT treat inline Codex findings from resolved or outdated review threads as
active needs-work evidence.

#### Scenario: Resolved inline finding no longer blocks the ok label

- **WHEN** a current-head inline Codex finding comment belongs to a resolved
review thread
- **AND** a clean current-head Codex review exists
- **THEN** the script does not classify that inline finding as active
needs-work evidence

#### Scenario: Unresolved inline finding still blocks the ok label

- **WHEN** a current-head inline Codex finding comment belongs to an unresolved,
non-outdated review thread
- **THEN** the script classifies that inline finding as active needs-work
evidence
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
## Implementation

- [x] Filter current-head inline Codex finding comments by unresolved review-thread URLs.
- [x] Preserve unresolved inline finding comments as needs-work evidence.
- [x] Add unit coverage for resolved and unresolved inline findings.
- [x] Validate the focused test and OpenSpec gates.
Loading