From 08765fd45b562230dc61720e9f2264c20c54b13d Mon Sep 17 00:00:00 2001 From: Akshay Kakatkar Date: Tue, 2 Jun 2026 13:53:47 +0530 Subject: [PATCH 1/4] feat(models): expose speed tiers in v1 metadata --- app/modules/proxy/api.py | 22 +++++++++++ app/modules/proxy/schemas.py | 3 ++ .../expose-v1-model-speed-tiers/proposal.md | 14 +++++++ .../specs/model-catalog-compat/spec.md | 14 +++++++ .../expose-v1-model-speed-tiers/tasks.md | 5 +++ tests/integration/test_v1_models.py | 37 +++++++++++++++++++ 6 files changed, 95 insertions(+) create mode 100644 openspec/changes/expose-v1-model-speed-tiers/proposal.md create mode 100644 openspec/changes/expose-v1-model-speed-tiers/specs/model-catalog-compat/spec.md create mode 100644 openspec/changes/expose-v1-model-speed-tiers/tasks.md diff --git a/app/modules/proxy/api.py b/app/modules/proxy/api.py index c6cef928e..1ba797e87 100644 --- a/app/modules/proxy/api.py +++ b/app/modules/proxy/api.py @@ -1896,9 +1896,31 @@ def _to_model_metadata(model: UpstreamModel) -> ModelMetadata: supported_in_api=model.supported_in_api, minimal_client_version=model.minimal_client_version, priority=model.priority, + additional_speed_tiers=_raw_string_list(model.raw, "additional_speed_tiers"), + service_tiers=_raw_object_list(model.raw, "service_tiers"), + default_service_tier=_raw_optional_string(model.raw, "default_service_tier"), ) +def _raw_string_list(raw: Mapping[str, JsonValue], key: str) -> list[str]: + value = raw.get(key) + if not isinstance(value, list): + return [] + return [item for item in value if isinstance(item, str)] + + +def _raw_object_list(raw: Mapping[str, JsonValue], key: str) -> list[dict[str, JsonValue]]: + value = raw.get(key) + if not isinstance(value, list): + return [] + return [dict(item) for item in value if isinstance(item, Mapping)] + + +def _raw_optional_string(raw: Mapping[str, JsonValue], key: str) -> str | None: + value = raw.get(key) + return value if isinstance(value, str) else None + + @v1_router.post( "/chat/completions", response_model=ChatCompletionResult, diff --git a/app/modules/proxy/schemas.py b/app/modules/proxy/schemas.py index 0877b78ad..9246bc9f1 100644 --- a/app/modules/proxy/schemas.py +++ b/app/modules/proxy/schemas.py @@ -178,6 +178,9 @@ class ModelMetadata(BaseModel): supported_in_api: bool = True minimal_client_version: str | None = None priority: int = 0 + additional_speed_tiers: list[str] = [] + service_tiers: list[dict[str, JsonValue]] = [] + default_service_tier: str | None = None class ModelListItem(BaseModel): diff --git a/openspec/changes/expose-v1-model-speed-tiers/proposal.md b/openspec/changes/expose-v1-model-speed-tiers/proposal.md new file mode 100644 index 000000000..20fb978f9 --- /dev/null +++ b/openspec/changes/expose-v1-model-speed-tiers/proposal.md @@ -0,0 +1,14 @@ +## Why + +OpenAI-compatible model discovery clients need to know when Codex models expose upstream speed tiers, such as GPT-5.5 Fast. The Codex-native `/backend-api/codex/models` endpoint already preserves these upstream fields, but `/v1/models` drops them from `metadata`. + +## What Changes + +- Preserve upstream speed-tier metadata on `/v1/models` metadata entries. +- Include `additional_speed_tiers`, `service_tiers`, and `default_service_tier` when upstream provides them. +- Keep existing model IDs and pricing/request behavior unchanged. + +## Impact + +- OpenAI-compatible clients can synthesize fast-mode model aliases from `/v1/models` metadata. +- No database migration or dashboard UI change. diff --git a/openspec/changes/expose-v1-model-speed-tiers/specs/model-catalog-compat/spec.md b/openspec/changes/expose-v1-model-speed-tiers/specs/model-catalog-compat/spec.md new file mode 100644 index 000000000..888bfaee9 --- /dev/null +++ b/openspec/changes/expose-v1-model-speed-tiers/specs/model-catalog-compat/spec.md @@ -0,0 +1,14 @@ +## ADDED Requirements + +### Requirement: OpenAI-compatible model metadata preserves speed tiers + +When serving `GET /v1/models`, the system SHALL preserve upstream speed-tier metadata in each model's `metadata` object when upstream provides it. This includes `additional_speed_tiers`, `service_tiers`, and `default_service_tier`. The system MUST NOT invent speed tiers for models whose upstream catalog entry does not advertise them. + +#### Scenario: /v1/models exposes upstream fast tier metadata + +- **WHEN** the upstream model catalog contains `gpt-5.5` with `additional_speed_tiers=["fast"]` +- **AND** the upstream model catalog includes a `service_tiers` entry with `id="priority"` and `name="Fast"` +- **WHEN** a client calls `GET /v1/models` +- **THEN** the `gpt-5.5` entry's metadata includes `additional_speed_tiers=["fast"]` +- **AND** the metadata includes the upstream `service_tiers` entry +- **AND** the metadata includes the upstream `default_service_tier` when present diff --git a/openspec/changes/expose-v1-model-speed-tiers/tasks.md b/openspec/changes/expose-v1-model-speed-tiers/tasks.md new file mode 100644 index 000000000..0d1d55e46 --- /dev/null +++ b/openspec/changes/expose-v1-model-speed-tiers/tasks.md @@ -0,0 +1,5 @@ +## Tasks + +- [x] Add `/v1/models` metadata fields for upstream speed tiers. +- [x] Map fields from the refreshed model registry raw upstream payload. +- [x] Add integration coverage for speed-tier metadata on `/v1/models`. diff --git a/tests/integration/test_v1_models.py b/tests/integration/test_v1_models.py index 868bbeb34..4d2f0a149 100644 --- a/tests/integration/test_v1_models.py +++ b/tests/integration/test_v1_models.py @@ -672,6 +672,43 @@ async def test_v1_models_reports_backend_context_window(async_client): assert codex_by_slug["gpt-5.5"]["max_context_window"] == 272_000 +@pytest.mark.asyncio +async def test_v1_models_exposes_speed_tier_metadata(async_client): + registry = get_model_registry() + models = [ + _make_upstream_model( + "gpt-5.5", + raw={ + "additional_speed_tiers": ["fast"], + "default_service_tier": "priority", + "service_tiers": [ + { + "id": "priority", + "name": "Fast", + "description": "1.5x speed, increased usage", + } + ], + }, + ) + ] + await registry.update({"pro": models}) + + resp = await async_client.get("/v1/models") + assert resp.status_code == 200 + entry = next(item for item in resp.json()["data"] if item["id"] == "gpt-5.5") + metadata = entry["metadata"] + + assert metadata["additional_speed_tiers"] == ["fast"] + assert metadata["default_service_tier"] == "priority" + assert metadata["service_tiers"] == [ + { + "id": "priority", + "name": "Fast", + "description": "1.5x speed, increased usage", + } + ] + + @pytest.mark.asyncio async def test_v1_models_does_not_promote_raw_max_context_window(async_client): registry = get_model_registry() From ab66ebc3103d78a66f482a1dcc91af90376c7474 Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Wed, 3 Jun 2026 00:13:02 +0400 Subject: [PATCH 2/4] fix(models): type service tier metadata --- app/modules/proxy/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/modules/proxy/api.py b/app/modules/proxy/api.py index 1ba797e87..3212528a8 100644 --- a/app/modules/proxy/api.py +++ b/app/modules/proxy/api.py @@ -1913,7 +1913,7 @@ def _raw_object_list(raw: Mapping[str, JsonValue], key: str) -> list[dict[str, J value = raw.get(key) if not isinstance(value, list): return [] - return [dict(item) for item in value if isinstance(item, Mapping)] + return [dict(cast(Mapping[str, JsonValue], item)) for item in value if isinstance(item, Mapping)] def _raw_optional_string(raw: Mapping[str, JsonValue], key: str) -> str | None: From 6ee5ecd8f6cee16ed3f24cd0c2b103c239d3cb62 Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Wed, 3 Jun 2026 00:13:02 +0400 Subject: [PATCH 3/4] docs: add @KakatkarAkshay as contributor --- .all-contributorsrc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.all-contributorsrc b/.all-contributorsrc index 21c67e2ee..2a34d1fe8 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -622,6 +622,16 @@ "code", "test" ] + }, + { + "login": "KakatkarAkshay", + "name": "Akshay Kakatkar", + "avatar_url": "https://avatars.githubusercontent.com/u/49910222?v=4", + "profile": "https://github.com/KakatkarAkshay", + "contributions": [ + "code", + "test" + ] } ], "contributorsPerLine": 7, From 4a227a7a3bb51fcbe3b3a6dc9f6764587fdb4155 Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Wed, 3 Jun 2026 00:39:14 +0400 Subject: [PATCH 4/4] fix(models): omit absent speed tier metadata --- app/modules/proxy/api.py | 24 ++++++++++++++++++------ app/modules/proxy/schemas.py | 4 ++-- tests/integration/test_v1_models.py | 16 ++++++++++++++++ 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/app/modules/proxy/api.py b/app/modules/proxy/api.py index 3212528a8..da1de56af 100644 --- a/app/modules/proxy/api.py +++ b/app/modules/proxy/api.py @@ -1716,7 +1716,7 @@ async def _build_models_response(api_key: ApiKeyData | None) -> Response: if not models: await _release_reservation(reservation) - return JSONResponse(content=ModelListResponse(data=[]).model_dump(mode="json")) + return JSONResponse(content=_dump_v1_models_response(ModelListResponse(data=[]))) items: list[ModelListItem] = [] for slug, model in models.items(): @@ -1745,7 +1745,19 @@ async def _build_models_response(api_key: ApiKeyData | None) -> Response: ) ) await _release_reservation(reservation) - return JSONResponse(content=ModelListResponse(data=items).model_dump(mode="json")) + return JSONResponse(content=_dump_v1_models_response(ModelListResponse(data=items))) + + +def _dump_v1_models_response(response: ModelListResponse) -> dict[str, JsonValue]: + payload = response.model_dump(mode="json") + for item in payload["data"]: + metadata = item.get("metadata") + if not isinstance(metadata, dict): + continue + for key in ("additional_speed_tiers", "service_tiers", "default_service_tier"): + if metadata.get(key) is None: + metadata.pop(key, None) + return payload def _allowed_models_for_api_key(api_key: ApiKeyData | None) -> set[str] | None: @@ -1902,17 +1914,17 @@ def _to_model_metadata(model: UpstreamModel) -> ModelMetadata: ) -def _raw_string_list(raw: Mapping[str, JsonValue], key: str) -> list[str]: +def _raw_string_list(raw: Mapping[str, JsonValue], key: str) -> list[str] | None: value = raw.get(key) if not isinstance(value, list): - return [] + return None return [item for item in value if isinstance(item, str)] -def _raw_object_list(raw: Mapping[str, JsonValue], key: str) -> list[dict[str, JsonValue]]: +def _raw_object_list(raw: Mapping[str, JsonValue], key: str) -> list[dict[str, JsonValue]] | None: value = raw.get(key) if not isinstance(value, list): - return [] + return None return [dict(cast(Mapping[str, JsonValue], item)) for item in value if isinstance(item, Mapping)] diff --git a/app/modules/proxy/schemas.py b/app/modules/proxy/schemas.py index 9246bc9f1..6c9444a37 100644 --- a/app/modules/proxy/schemas.py +++ b/app/modules/proxy/schemas.py @@ -178,8 +178,8 @@ class ModelMetadata(BaseModel): supported_in_api: bool = True minimal_client_version: str | None = None priority: int = 0 - additional_speed_tiers: list[str] = [] - service_tiers: list[dict[str, JsonValue]] = [] + additional_speed_tiers: list[str] | None = None + service_tiers: list[dict[str, JsonValue]] | None = None default_service_tier: str | None = None diff --git a/tests/integration/test_v1_models.py b/tests/integration/test_v1_models.py index 4d2f0a149..10c158fd7 100644 --- a/tests/integration/test_v1_models.py +++ b/tests/integration/test_v1_models.py @@ -709,6 +709,22 @@ async def test_v1_models_exposes_speed_tier_metadata(async_client): ] +@pytest.mark.asyncio +async def test_v1_models_omits_speed_tier_metadata_when_upstream_omits_it(async_client): + registry = get_model_registry() + models = [_make_upstream_model("gpt-5.5")] + await registry.update({"pro": models}) + + resp = await async_client.get("/v1/models") + assert resp.status_code == 200 + entry = next(item for item in resp.json()["data"] if item["id"] == "gpt-5.5") + metadata = entry["metadata"] + + assert "additional_speed_tiers" not in metadata + assert "default_service_tier" not in metadata + assert "service_tiers" not in metadata + + @pytest.mark.asyncio async def test_v1_models_does_not_promote_raw_max_context_window(async_client): registry = get_model_registry()