Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,16 @@
"code",
"test"
]
},
{
"login": "KakatkarAkshay",
"name": "Akshay Kakatkar",
"avatar_url": "https://avatars.githubusercontent.com/u/49910222?v=4",
"profile": "https://github.com/KakatkarAkshay",
"contributions": [
"code",
"test"
]
}
],
"contributorsPerLine": 7,
Expand Down
38 changes: 36 additions & 2 deletions app/modules/proxy/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1716,7 +1716,7 @@ async def _build_models_response(api_key: ApiKeyData | None) -> Response:

if not models:
await _release_reservation(reservation)
return JSONResponse(content=ModelListResponse(data=[]).model_dump(mode="json"))
return JSONResponse(content=_dump_v1_models_response(ModelListResponse(data=[])))

items: list[ModelListItem] = []
for slug, model in models.items():
Expand Down Expand Up @@ -1745,7 +1745,19 @@ async def _build_models_response(api_key: ApiKeyData | None) -> Response:
)
)
await _release_reservation(reservation)
return JSONResponse(content=ModelListResponse(data=items).model_dump(mode="json"))
return JSONResponse(content=_dump_v1_models_response(ModelListResponse(data=items)))


def _dump_v1_models_response(response: ModelListResponse) -> dict[str, JsonValue]:
payload = response.model_dump(mode="json")
for item in payload["data"]:
metadata = item.get("metadata")
if not isinstance(metadata, dict):
continue
for key in ("additional_speed_tiers", "service_tiers", "default_service_tier"):
if metadata.get(key) is None:
metadata.pop(key, None)
return payload


def _allowed_models_for_api_key(api_key: ApiKeyData | None) -> set[str] | None:
Expand Down Expand Up @@ -1896,9 +1908,31 @@ def _to_model_metadata(model: UpstreamModel) -> ModelMetadata:
supported_in_api=model.supported_in_api,
minimal_client_version=model.minimal_client_version,
priority=model.priority,
additional_speed_tiers=_raw_string_list(model.raw, "additional_speed_tiers"),
service_tiers=_raw_object_list(model.raw, "service_tiers"),
default_service_tier=_raw_optional_string(model.raw, "default_service_tier"),
)


def _raw_string_list(raw: Mapping[str, JsonValue], key: str) -> list[str] | None:
value = raw.get(key)
if not isinstance(value, list):
return None
return [item for item in value if isinstance(item, str)]


def _raw_object_list(raw: Mapping[str, JsonValue], key: str) -> list[dict[str, JsonValue]] | None:
value = raw.get(key)
if not isinstance(value, list):
return None
return [dict(cast(Mapping[str, JsonValue], item)) for item in value if isinstance(item, Mapping)]


def _raw_optional_string(raw: Mapping[str, JsonValue], key: str) -> str | None:
value = raw.get(key)
return value if isinstance(value, str) else None


@v1_router.post(
"/chat/completions",
response_model=ChatCompletionResult,
Expand Down
3 changes: 3 additions & 0 deletions app/modules/proxy/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ class ModelMetadata(BaseModel):
supported_in_api: bool = True
minimal_client_version: str | None = None
priority: int = 0
additional_speed_tiers: list[str] | None = None
service_tiers: list[dict[str, JsonValue]] | None = None
default_service_tier: str | None = None


class ModelListItem(BaseModel):
Expand Down
14 changes: 14 additions & 0 deletions openspec/changes/expose-v1-model-speed-tiers/proposal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
## Why

OpenAI-compatible model discovery clients need to know when Codex models expose upstream speed tiers, such as GPT-5.5 Fast. The Codex-native `/backend-api/codex/models` endpoint already preserves these upstream fields, but `/v1/models` drops them from `metadata`.

## What Changes

- Preserve upstream speed-tier metadata on `/v1/models` metadata entries.
- Include `additional_speed_tiers`, `service_tiers`, and `default_service_tier` when upstream provides them.
- Keep existing model IDs and pricing/request behavior unchanged.

## Impact

- OpenAI-compatible clients can synthesize fast-mode model aliases from `/v1/models` metadata.
- No database migration or dashboard UI change.
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
## ADDED Requirements

### Requirement: OpenAI-compatible model metadata preserves speed tiers

When serving `GET /v1/models`, the system SHALL preserve upstream speed-tier metadata in each model's `metadata` object when upstream provides it. This includes `additional_speed_tiers`, `service_tiers`, and `default_service_tier`. The system MUST NOT invent speed tiers for models whose upstream catalog entry does not advertise them.

#### Scenario: /v1/models exposes upstream fast tier metadata

- **WHEN** the upstream model catalog contains `gpt-5.5` with `additional_speed_tiers=["fast"]`
- **AND** the upstream model catalog includes a `service_tiers` entry with `id="priority"` and `name="Fast"`
- **WHEN** a client calls `GET /v1/models`
- **THEN** the `gpt-5.5` entry's metadata includes `additional_speed_tiers=["fast"]`
- **AND** the metadata includes the upstream `service_tiers` entry
- **AND** the metadata includes the upstream `default_service_tier` when present
5 changes: 5 additions & 0 deletions openspec/changes/expose-v1-model-speed-tiers/tasks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
## Tasks

- [x] Add `/v1/models` metadata fields for upstream speed tiers.
- [x] Map fields from the refreshed model registry raw upstream payload.
- [x] Add integration coverage for speed-tier metadata on `/v1/models`.
53 changes: 53 additions & 0 deletions tests/integration/test_v1_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,59 @@ async def test_v1_models_reports_backend_context_window(async_client):
assert codex_by_slug["gpt-5.5"]["max_context_window"] == 272_000


@pytest.mark.asyncio
async def test_v1_models_exposes_speed_tier_metadata(async_client):
registry = get_model_registry()
models = [
_make_upstream_model(
"gpt-5.5",
raw={
"additional_speed_tiers": ["fast"],
"default_service_tier": "priority",
"service_tiers": [
{
"id": "priority",
"name": "Fast",
"description": "1.5x speed, increased usage",
}
],
},
)
]
await registry.update({"pro": models})

resp = await async_client.get("/v1/models")
assert resp.status_code == 200
entry = next(item for item in resp.json()["data"] if item["id"] == "gpt-5.5")
metadata = entry["metadata"]

assert metadata["additional_speed_tiers"] == ["fast"]
assert metadata["default_service_tier"] == "priority"
assert metadata["service_tiers"] == [
{
"id": "priority",
"name": "Fast",
"description": "1.5x speed, increased usage",
}
]


@pytest.mark.asyncio
async def test_v1_models_omits_speed_tier_metadata_when_upstream_omits_it(async_client):
registry = get_model_registry()
models = [_make_upstream_model("gpt-5.5")]
await registry.update({"pro": models})

resp = await async_client.get("/v1/models")
assert resp.status_code == 200
entry = next(item for item in resp.json()["data"] if item["id"] == "gpt-5.5")
metadata = entry["metadata"]

assert "additional_speed_tiers" not in metadata
assert "default_service_tier" not in metadata
assert "service_tiers" not in metadata


@pytest.mark.asyncio
async def test_v1_models_does_not_promote_raw_max_context_window(async_client):
registry = get_model_registry()
Expand Down