diff --git a/.all-contributorsrc b/.all-contributorsrc index 21c67e2ee..2a34d1fe8 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -622,6 +622,16 @@ "code", "test" ] + }, + { + "login": "KakatkarAkshay", + "name": "Akshay Kakatkar", + "avatar_url": "https://avatars.githubusercontent.com/u/49910222?v=4", + "profile": "https://github.com/KakatkarAkshay", + "contributions": [ + "code", + "test" + ] } ], "contributorsPerLine": 7, diff --git a/app/modules/proxy/api.py b/app/modules/proxy/api.py index c6cef928e..da1de56af 100644 --- a/app/modules/proxy/api.py +++ b/app/modules/proxy/api.py @@ -1716,7 +1716,7 @@ async def _build_models_response(api_key: ApiKeyData | None) -> Response: if not models: await _release_reservation(reservation) - return JSONResponse(content=ModelListResponse(data=[]).model_dump(mode="json")) + return JSONResponse(content=_dump_v1_models_response(ModelListResponse(data=[]))) items: list[ModelListItem] = [] for slug, model in models.items(): @@ -1745,7 +1745,19 @@ async def _build_models_response(api_key: ApiKeyData | None) -> Response: ) ) await _release_reservation(reservation) - return JSONResponse(content=ModelListResponse(data=items).model_dump(mode="json")) + return JSONResponse(content=_dump_v1_models_response(ModelListResponse(data=items))) + + +def _dump_v1_models_response(response: ModelListResponse) -> dict[str, JsonValue]: + payload = response.model_dump(mode="json") + for item in payload["data"]: + metadata = item.get("metadata") + if not isinstance(metadata, dict): + continue + for key in ("additional_speed_tiers", "service_tiers", "default_service_tier"): + if metadata.get(key) is None: + metadata.pop(key, None) + return payload def _allowed_models_for_api_key(api_key: ApiKeyData | None) -> set[str] | None: @@ -1896,9 +1908,31 @@ def _to_model_metadata(model: UpstreamModel) -> ModelMetadata: supported_in_api=model.supported_in_api, minimal_client_version=model.minimal_client_version, priority=model.priority, + additional_speed_tiers=_raw_string_list(model.raw, "additional_speed_tiers"), + service_tiers=_raw_object_list(model.raw, "service_tiers"), + default_service_tier=_raw_optional_string(model.raw, "default_service_tier"), ) +def _raw_string_list(raw: Mapping[str, JsonValue], key: str) -> list[str] | None: + value = raw.get(key) + if not isinstance(value, list): + return None + return [item for item in value if isinstance(item, str)] + + +def _raw_object_list(raw: Mapping[str, JsonValue], key: str) -> list[dict[str, JsonValue]] | None: + value = raw.get(key) + if not isinstance(value, list): + return None + return [dict(cast(Mapping[str, JsonValue], item)) for item in value if isinstance(item, Mapping)] + + +def _raw_optional_string(raw: Mapping[str, JsonValue], key: str) -> str | None: + value = raw.get(key) + return value if isinstance(value, str) else None + + @v1_router.post( "/chat/completions", response_model=ChatCompletionResult, diff --git a/app/modules/proxy/schemas.py b/app/modules/proxy/schemas.py index 0877b78ad..6c9444a37 100644 --- a/app/modules/proxy/schemas.py +++ b/app/modules/proxy/schemas.py @@ -178,6 +178,9 @@ class ModelMetadata(BaseModel): supported_in_api: bool = True minimal_client_version: str | None = None priority: int = 0 + additional_speed_tiers: list[str] | None = None + service_tiers: list[dict[str, JsonValue]] | None = None + default_service_tier: str | None = None class ModelListItem(BaseModel): diff --git a/openspec/changes/expose-v1-model-speed-tiers/proposal.md b/openspec/changes/expose-v1-model-speed-tiers/proposal.md new file mode 100644 index 000000000..20fb978f9 --- /dev/null +++ b/openspec/changes/expose-v1-model-speed-tiers/proposal.md @@ -0,0 +1,14 @@ +## Why + +OpenAI-compatible model discovery clients need to know when Codex models expose upstream speed tiers, such as GPT-5.5 Fast. The Codex-native `/backend-api/codex/models` endpoint already preserves these upstream fields, but `/v1/models` drops them from `metadata`. + +## What Changes + +- Preserve upstream speed-tier metadata on `/v1/models` metadata entries. +- Include `additional_speed_tiers`, `service_tiers`, and `default_service_tier` when upstream provides them. +- Keep existing model IDs and pricing/request behavior unchanged. + +## Impact + +- OpenAI-compatible clients can synthesize fast-mode model aliases from `/v1/models` metadata. +- No database migration or dashboard UI change. diff --git a/openspec/changes/expose-v1-model-speed-tiers/specs/model-catalog-compat/spec.md b/openspec/changes/expose-v1-model-speed-tiers/specs/model-catalog-compat/spec.md new file mode 100644 index 000000000..888bfaee9 --- /dev/null +++ b/openspec/changes/expose-v1-model-speed-tiers/specs/model-catalog-compat/spec.md @@ -0,0 +1,14 @@ +## ADDED Requirements + +### Requirement: OpenAI-compatible model metadata preserves speed tiers + +When serving `GET /v1/models`, the system SHALL preserve upstream speed-tier metadata in each model's `metadata` object when upstream provides it. This includes `additional_speed_tiers`, `service_tiers`, and `default_service_tier`. The system MUST NOT invent speed tiers for models whose upstream catalog entry does not advertise them. + +#### Scenario: /v1/models exposes upstream fast tier metadata + +- **WHEN** the upstream model catalog contains `gpt-5.5` with `additional_speed_tiers=["fast"]` +- **AND** the upstream model catalog includes a `service_tiers` entry with `id="priority"` and `name="Fast"` +- **WHEN** a client calls `GET /v1/models` +- **THEN** the `gpt-5.5` entry's metadata includes `additional_speed_tiers=["fast"]` +- **AND** the metadata includes the upstream `service_tiers` entry +- **AND** the metadata includes the upstream `default_service_tier` when present diff --git a/openspec/changes/expose-v1-model-speed-tiers/tasks.md b/openspec/changes/expose-v1-model-speed-tiers/tasks.md new file mode 100644 index 000000000..0d1d55e46 --- /dev/null +++ b/openspec/changes/expose-v1-model-speed-tiers/tasks.md @@ -0,0 +1,5 @@ +## Tasks + +- [x] Add `/v1/models` metadata fields for upstream speed tiers. +- [x] Map fields from the refreshed model registry raw upstream payload. +- [x] Add integration coverage for speed-tier metadata on `/v1/models`. diff --git a/tests/integration/test_v1_models.py b/tests/integration/test_v1_models.py index 868bbeb34..10c158fd7 100644 --- a/tests/integration/test_v1_models.py +++ b/tests/integration/test_v1_models.py @@ -672,6 +672,59 @@ async def test_v1_models_reports_backend_context_window(async_client): assert codex_by_slug["gpt-5.5"]["max_context_window"] == 272_000 +@pytest.mark.asyncio +async def test_v1_models_exposes_speed_tier_metadata(async_client): + registry = get_model_registry() + models = [ + _make_upstream_model( + "gpt-5.5", + raw={ + "additional_speed_tiers": ["fast"], + "default_service_tier": "priority", + "service_tiers": [ + { + "id": "priority", + "name": "Fast", + "description": "1.5x speed, increased usage", + } + ], + }, + ) + ] + await registry.update({"pro": models}) + + resp = await async_client.get("/v1/models") + assert resp.status_code == 200 + entry = next(item for item in resp.json()["data"] if item["id"] == "gpt-5.5") + metadata = entry["metadata"] + + assert metadata["additional_speed_tiers"] == ["fast"] + assert metadata["default_service_tier"] == "priority" + assert metadata["service_tiers"] == [ + { + "id": "priority", + "name": "Fast", + "description": "1.5x speed, increased usage", + } + ] + + +@pytest.mark.asyncio +async def test_v1_models_omits_speed_tier_metadata_when_upstream_omits_it(async_client): + registry = get_model_registry() + models = [_make_upstream_model("gpt-5.5")] + await registry.update({"pro": models}) + + resp = await async_client.get("/v1/models") + assert resp.status_code == 200 + entry = next(item for item in resp.json()["data"] if item["id"] == "gpt-5.5") + metadata = entry["metadata"] + + assert "additional_speed_tiers" not in metadata + assert "default_service_tier" not in metadata + assert "service_tiers" not in metadata + + @pytest.mark.asyncio async def test_v1_models_does_not_promote_raw_max_context_window(async_client): registry = get_model_registry()