Soju06 · ozpool · Jun 18, 2026 · Jun 19, 2026 · Jun 25, 2026 · chatgpt-codex-connector
diff --git a/app/core/utils/retry.py b/app/core/utils/retry.py
@@ -3,7 +3,45 @@
 import random
 import re
 
-_RETRY_PATTERN = re.compile(r"(?i)try again in\s*(\d+(?:\.\d+)?)\s*(s|ms|seconds?)")
+# Units the retry hint may use, longest-first within each family so that, for
+# example, ``ms`` is preferred over ``m`` and ``minutes`` over ``m``.
+_UNIT_ALTERNATION = r"ms|milliseconds?|hours?|hrs?|h|minutes?|mins?|m|seconds?|secs?|s"
+
+# A unit literal is only valid when it is not immediately followed by another
+# letter, otherwise the single-letter alternatives swallow the prefix of an
+# unsupported longer word (``m`` from ``month``, ``h`` from ``half``) and the
+# hint is silently mis-scaled. Digits still follow units in compound hints
+# (``6m0s``), so the boundary forbids letters only, not digits or whitespace.
+_UNIT_BOUNDARY = r"(?![A-Za-z])"
+
+# Seconds-per-unit for every literal ``_UNIT_ALTERNATION`` can capture.
+_UNIT_SECONDS: dict[str, float] = {
+    "ms": 0.001,
+    "millisecond": 0.001,
+    "milliseconds": 0.001,
+    "h": 3600.0,
+    "hr": 3600.0,
+    "hrs": 3600.0,
+    "hour": 3600.0,
+    "hours": 3600.0,
+    "m": 60.0,
+    "min": 60.0,
+    "mins": 60.0,
+    "minute": 60.0,
+    "minutes": 60.0,
+    "s": 1.0,
+    "sec": 1.0,
+    "secs": 1.0,
+    "second": 1.0,
+    "seconds": 1.0,
+}
+
+# Capture the contiguous run of ``<number><unit>`` components that immediately
+# follows "try again in" so compound hints such as ``6m0s`` or ``1h2m3s`` are
+# read in full instead of stopping at the first unit.
+_RETRY_PATTERN = re.compile(rf"(?i)try again in\s*((?:\d+(?:\.\d+)?\s*(?:{_UNIT_ALTERNATION}){_UNIT_BOUNDARY}\s*)+)")
+_DURATION_TOKEN = re.compile(rf"(?i)(\d+(?:\.\d+)?)\s*({_UNIT_ALTERNATION}){_UNIT_BOUNDARY}")
+
 _BACKOFF_INITIAL_DELAY_MS = 200
 _BACKOFF_FACTOR = 2.0
 _BACKOFF_JITTER_MIN = 0.9
@@ -14,11 +52,20 @@ def parse_retry_after(message: str) -> float | None:
     match = _RETRY_PATTERN.search(message or "")
     if not match:
         return None
-    value = float(match.group(1))
-    unit = match.group(2).lower()
-    if unit == "ms":
-        return value / 1000
-    return value
+    total = 0.0
+    matched = False
+    for value, unit in _DURATION_TOKEN.findall(match.group(1)):
+        multiplier = _UNIT_SECONDS.get(unit.lower())
+        if multiplier is None:
+            continue
+        if unit.lower() == "ms":
+            total += float(value) / 1000
+        else:
+            total += float(value) * multiplier
+        matched = True
+    if not matched:
+        return None
+    return total
 
 
 def backoff_seconds(attempt: int) -> float:

diff --git a/openspec/changes/honor-upstream-retry-after-hint-units/proposal.md b/openspec/changes/honor-upstream-retry-after-hint-units/proposal.md
@@ -0,0 +1,38 @@
+# Honor upstream Retry-After hint units in account cooldown
+
+## Problem
+
+When an upstream Codex/ChatGPT response rate-limits an account (HTTP 429), its
+message often carries a "try again in <duration>" hint. `parse_retry_after`
+only recognized second and millisecond units, so any minute, hour, or compound
+hint (for example `20m`, `6m0s`, `1h2m3s`) failed to match and returned `None`.
+`handle_rate_limit` then fell back to `backoff_seconds(state.error_count)`, a
+sub-second-to-few-second backoff, and set `cooldown_until` far earlier than the
+upstream asked. The balancer re-selected the still-rate-limited account almost
+immediately, re-sending traffic into the same 429 and amplifying the rate limit
+instead of waiting the cooldown out.
+
+## Solution
+
+Teach `parse_retry_after` to parse the full contiguous run of `<number><unit>`
+tokens after "try again in", summing hour, minute, second, and millisecond
+components (and their word forms). A longest-match-first unit alternation keeps
+`ms` distinct from `m` and `minutes` from `m`. When no token is recognized the
+function still returns `None`, so `handle_rate_limit` keeps its existing backoff
+fallback. The account then stays in cooldown for the duration the upstream
+actually requested.
+
+## Changes
+
+- Parse minute, hour, and compound `<num><unit>` retry hints in
+  `parse_retry_after`, in addition to seconds and milliseconds
+- Sum compound durations (for example `1h2m3s`) into a single seconds value
+- Preserve the `None` result for unparseable hints so the error-count backoff
+  fallback in `handle_rate_limit` is unchanged
+- Add unit coverage for minute, hour, compound, and word-form hints
+
+## Out of scope
+
+- Changing the `backoff_seconds` fallback schedule
+- Changing how `reset_at` is extracted or clamped
+- Changing the selector's user-visible "Try again in {N}s" hint ceiling
diff --git a/...pec/changes/honor-upstream-retry-after-hint-units/specs/account-routing/spec.md b/...pec/changes/honor-upstream-retry-after-hint-units/specs/account-routing/spec.md
@@ -0,0 +1,40 @@
+## ADDED Requirements
+
+### Requirement: Upstream rate-limit cooldown honors the Retry-After hint duration
+
+When an upstream rate-limit error carries a "try again in" hint, the account
+cooldown SHALL last for the full duration the hint expresses. The parser SHALL
+recognize hour, minute, second, and millisecond units, including their word
+forms, and SHALL sum compound hints such as `1h2m3s` into a single duration.
+A unit token SHALL be recognized only when it is not immediately followed by
+another letter, so an unsupported longer word whose prefix matches a unit (for
+example `month`, where `m` prefixes the word) is not mis-read as that shorter
+unit. When the hint contains no recognizable unit token, the system SHALL fall
+back to the error-count backoff schedule. A rate-limited account SHALL NOT be
+re-selected before its cooldown elapses.
+
+#### Scenario: Compound minute-and-second hint sets the full cooldown
+
+- **GIVEN** an upstream 429 whose message says "try again in 6m0s"
+- **WHEN** the balancer records the rate limit for the account
+- **THEN** the account cooldown lasts 360 seconds
+- **AND** the account is not re-selected until that cooldown elapses
+
+#### Scenario: Minutes-only hint is honored
+
+- **GIVEN** an upstream 429 whose message says "try again in 20m"
+- **WHEN** the balancer records the rate limit for the account
+- **THEN** the account cooldown lasts 1200 seconds
+
+#### Scenario: Unparseable hint falls back to backoff
+
+- **GIVEN** an upstream 429 whose message has no recognizable "try again in" duration
+- **WHEN** the balancer records the rate limit for the account
+- **THEN** the cooldown uses the error-count backoff schedule instead
+
+#### Scenario: Unsupported longer word is not mis-read as a shorter unit
+
+- **GIVEN** an upstream 429 whose message says "try again in 1 month"
+- **WHEN** the balancer records the rate limit for the account
+- **THEN** the `month` token is not read as a 1-minute hint
+- **AND** the cooldown uses the error-count backoff schedule instead
diff --git a/openspec/changes/honor-upstream-retry-after-hint-units/tasks.md b/openspec/changes/honor-upstream-retry-after-hint-units/tasks.md
@@ -0,0 +1,10 @@
+# Tasks
+
+- [x] Extend `parse_retry_after` to honor minute/hour and compound `<num><unit>` retry hints
+- [x] Keep the `None` fallback for unparseable hints so `handle_rate_limit` backoff is unchanged
+- [x] Add regression coverage for minute, hour, compound, and word-form hints in `tests/unit/test_retry.py`
+- [x] Require a non-letter boundary after each unit so an unsupported longer word (`month` -> `m`) is not mis-read as a shorter unit
+- [x] Add product-path regression coverage in `tests/unit/test_load_balancer.py` proving `handle_rate_limit` sets the cooldown from a word-unit hint and falls back to backoff for an unsupported longer word
+- [x] Document the account-routing cooldown requirement delta (proposal + ADDED requirement with GIVEN/WHEN/THEN scenarios, including the unit-boundary scenario)
+- [x] Run `uv run --frozen ruff check .` and `uv run --frozen ruff format --check .`
+- [x] Run `uv run --frozen pytest tests/unit/test_retry.py`
diff --git a/tests/unit/test_load_balancer.py b/tests/unit/test_load_balancer.py
@@ -843,6 +843,30 @@ def test_handle_rate_limit_uses_backoff_when_no_delay(monkeypatch):
     assert state.cooldown_until == pytest.approx(now + 0.2)
 
 
+def test_handle_rate_limit_cooldown_honors_word_unit_hint(monkeypatch):
+    now = 1_700_000_000.0
+    monkeypatch.setattr("app.core.balancer.logic.time.time", lambda: now)
+    state = AccountState("a", AccountStatus.ACTIVE, used_percent=5.0)
+    handle_rate_limit(state, {"message": "Try again in 2 minutes"})
+    assert state.cooldown_until is not None
+    assert state.cooldown_until - now == pytest.approx(120.0)
+
+
+def test_handle_rate_limit_cooldown_ignores_unsupported_longer_unit(monkeypatch):
+    # Regression for the externally failing product path: an unsupported word
+    # whose prefix is a real unit ("month" -> "m") must not be mis-read as a
+    # minute hint and persisted as a 60s cooldown. It has no usable hint, so the
+    # cooldown must fall back to backoff instead of a bogus parsed delay.
+    now = 1_700_000_000.0
+    monkeypatch.setattr("app.core.balancer.logic.time.time", lambda: now)
+    monkeypatch.setattr("app.core.balancer.logic.backoff_seconds", lambda _: 0.2)
+    state = AccountState("a", AccountStatus.ACTIVE, used_percent=5.0)
+    handle_rate_limit(state, {"message": "Try again in 1 month"})
+    # backoff (0.2), not a bogus 60s parsed from the "m" in "month".
+    assert state.cooldown_until is not None
+    assert state.cooldown_until - now == pytest.approx(0.2)
+
+
 def test_select_account_skips_cooldown_until_expired():
     now = 1_700_000_000.0
     states = [

diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py
@@ -17,3 +17,37 @@ def test_parse_retry_after_milliseconds():
 
 def test_parse_retry_after_missing():
     assert parse_retry_after("no retry info") is None
+
+
+def test_parse_retry_after_minutes():
+    assert parse_retry_after("Try again in 20m") == 1200.0
+
+
+def test_parse_retry_after_compound_minutes_seconds():
+    assert parse_retry_after("Please try again in 6m0s.") == 360.0
+    assert parse_retry_after("Try again in 1m30s") == 90.0
+
+
+def test_parse_retry_after_compound_hours():
+    assert parse_retry_after("Try again in 1h2m3s") == 3723.0
+
+
+def test_parse_retry_after_word_units():
+    assert parse_retry_after("Try again in 30 seconds") == 30.0
+    assert parse_retry_after("Try again in 2 minutes") == 120.0
+
+
+def test_parse_retry_after_rejects_unsupported_longer_unit():
+    # A supported unit literal must not match when it only prefixes a longer,
+    # unsupported word: "month" -> "m", "hippos" -> "h", "secondment" -> "sec".
+    assert parse_retry_after("Try again in 1 month") is None
+    assert parse_retry_after("Try again in 5 mo") is None
+    assert parse_retry_after("Try again in 2 hippos") is None
+    assert parse_retry_after("Try again in 3 secondment") is None
+
+
+def test_parse_retry_after_unit_boundary_keeps_digit_runs():
+    # The boundary forbids trailing letters only; digits still chain compound
+    # components, so "6m0s" and friends stay intact.
+    assert parse_retry_after("Try again in 6m0s") == 360.0
+    assert parse_retry_after("Try again in 1h2m3s") == 3723.0