diff --git a/app/db/alembic/versions/20260601_010000_add_api_key_usage_sections.py b/app/db/alembic/versions/20260601_010000_add_api_key_usage_sections.py new file mode 100644 index 000000000..aae59206b --- /dev/null +++ b/app/db/alembic/versions/20260601_010000_add_api_key_usage_sections.py @@ -0,0 +1,58 @@ +"""add api key usage sections + +Revision ID: 20260601_010000_add_api_key_usage_sections +Revises: 20260602_060000_merge_account_workspace_and_failure_heads +Create Date: 2026-06-01 01:00:00.000000 +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.engine import Connection + +revision = "20260601_010000_add_api_key_usage_sections" +down_revision = "20260602_060000_merge_account_workspace_and_failure_heads" +branch_labels = None +depends_on = None + + +def _table_exists(connection: Connection, table_name: str) -> bool: + inspector = sa.inspect(connection) + return inspector.has_table(table_name) + + +def _columns(connection: Connection, table_name: str) -> set[str]: + inspector = sa.inspect(connection) + if not inspector.has_table(table_name): + return set() + return {str(column["name"]) for column in inspector.get_columns(table_name) if column.get("name") is not None} + + +def upgrade() -> None: + bind = op.get_bind() + if not _table_exists(bind, "api_keys"): + return + + existing_columns = _columns(bind, "api_keys") + with op.batch_alter_table("api_keys") as batch_op: + if "usage_sections" not in existing_columns: + batch_op.add_column( + sa.Column( + "usage_sections", + sa.Text(), + nullable=False, + server_default="upstream_limits,account_pool_usage", + ) + ) + + +def downgrade() -> None: + bind = op.get_bind() + if not _table_exists(bind, "api_keys"): + return + + existing_columns = _columns(bind, "api_keys") + with op.batch_alter_table("api_keys") as batch_op: + if "usage_sections" in existing_columns: + batch_op.drop_column("usage_sections") diff --git a/app/db/alembic/versions/20260608_000000_add_hide_upstream_quota_from_api_keys.py b/app/db/alembic/versions/20260608_000000_add_hide_upstream_quota_from_api_keys.py new file mode 100644 index 000000000..b3688f25d --- /dev/null +++ b/app/db/alembic/versions/20260608_000000_add_hide_upstream_quota_from_api_keys.py @@ -0,0 +1,50 @@ +"""add hide upstream quota from api keys setting + +Revision ID: 20260608_000000_add_hide_upstream_quota_from_api_keys +Revises: 20260607_000000_merge_weekly_monthly_useragent_heads +Create Date: 2026-06-08 00:00:00.000000 +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.engine import Connection + +revision = "20260608_000000_add_hide_upstream_quota_from_api_keys" +down_revision = "20260607_000000_merge_weekly_monthly_useragent_heads" +branch_labels = None +depends_on = None + + +def _has_table(connection: Connection, table_name: str) -> bool: + return sa.inspect(connection).has_table(table_name) + + +def _columns(connection: Connection, table_name: str) -> set[str]: + if not _has_table(connection, table_name): + return set() + return {column["name"] for column in sa.inspect(connection).get_columns(table_name)} + + +def upgrade() -> None: + bind = op.get_bind() + dashboard_columns = _columns(bind, "dashboard_settings") + if dashboard_columns and "hide_upstream_quota_from_api_keys" not in dashboard_columns: + with op.batch_alter_table("dashboard_settings") as batch_op: + batch_op.add_column( + sa.Column( + "hide_upstream_quota_from_api_keys", + sa.Boolean(), + server_default=sa.false(), + nullable=False, + ) + ) + + +def downgrade() -> None: + bind = op.get_bind() + dashboard_columns = _columns(bind, "dashboard_settings") + if dashboard_columns and "hide_upstream_quota_from_api_keys" in dashboard_columns: + with op.batch_alter_table("dashboard_settings") as batch_op: + batch_op.drop_column("hide_upstream_quota_from_api_keys") diff --git a/app/db/alembic/versions/20260608_010000_merge_api_key_usage_and_quota_visibility_heads.py b/app/db/alembic/versions/20260608_010000_merge_api_key_usage_and_quota_visibility_heads.py new file mode 100644 index 000000000..8d6aed351 --- /dev/null +++ b/app/db/alembic/versions/20260608_010000_merge_api_key_usage_and_quota_visibility_heads.py @@ -0,0 +1,25 @@ +"""merge API key usage sections and quota visibility heads + +Revision ID: 20260608_010000_merge_api_key_usage_and_quota_visibility_heads +Revises: 20260601_010000_add_api_key_usage_sections, + 20260608_000000_add_hide_upstream_quota_from_api_keys +Create Date: 2026-06-08 01:00:00.000000 +""" + +from __future__ import annotations + +revision = "20260608_010000_merge_api_key_usage_and_quota_visibility_heads" +down_revision = ( + "20260601_010000_add_api_key_usage_sections", + "20260608_000000_add_hide_upstream_quota_from_api_keys", +) +branch_labels = None +depends_on = None + + +def upgrade() -> None: + pass + + +def downgrade() -> None: + pass diff --git a/app/db/alembic/versions/20260616_000000_merge_api_key_quota_and_dashboard_guest_heads.py b/app/db/alembic/versions/20260616_000000_merge_api_key_quota_and_dashboard_guest_heads.py new file mode 100644 index 000000000..4e4aafb55 --- /dev/null +++ b/app/db/alembic/versions/20260616_000000_merge_api_key_quota_and_dashboard_guest_heads.py @@ -0,0 +1,26 @@ +"""merge API key quota visibility and dashboard guest heads + +Revision ID: 20260616_000000_merge_api_key_quota_and_dashboard_guest_heads +Revises: +- 20260608_010000_merge_api_key_usage_and_quota_visibility_heads +- 20260611_000000_merge_dashboard_guest_and_weekly_useragent_heads +Create Date: 2026-06-16 00:00:00.000000 +""" + +from __future__ import annotations + +revision = "20260616_000000_merge_api_key_quota_and_dashboard_guest_heads" +down_revision = ( + "20260608_010000_merge_api_key_usage_and_quota_visibility_heads", + "20260611_000000_merge_dashboard_guest_and_weekly_useragent_heads", +) +branch_labels = None +depends_on = None + + +def upgrade() -> None: + pass + + +def downgrade() -> None: + pass diff --git a/app/db/models.py b/app/db/models.py index 0eef688c8..318bc524b 100644 --- a/app/db/models.py +++ b/app/db/models.py @@ -478,6 +478,12 @@ class DashboardSettings(Base): default=False, nullable=False, ) + hide_upstream_quota_from_api_keys: Mapped[bool] = mapped_column( + Boolean, + default=False, + server_default=false(), + nullable=False, + ) totp_secret_encrypted: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True) totp_last_verified_step: Mapped[int | None] = mapped_column(Integer, nullable=True) http_responses_session_bridge_prompt_cache_idle_ttl_seconds: Mapped[int] = mapped_column( @@ -625,6 +631,12 @@ class ApiKey(Base): server_default=false(), nullable=False, ) + usage_sections: Mapped[str | None] = mapped_column( + Text, + nullable=False, + default="upstream_limits,account_pool_usage", + server_default="upstream_limits,account_pool_usage", + ) expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) is_active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), nullable=False) diff --git a/app/modules/api_keys/api.py b/app/modules/api_keys/api.py index 92a729a98..cf8ca97fa 100644 --- a/app/modules/api_keys/api.py +++ b/app/modules/api_keys/api.py @@ -48,6 +48,7 @@ def _to_response(row: ApiKeyData) -> ApiKeyResponse: enforced_reasoning_effort=row.enforced_reasoning_effort, enforced_service_tier=row.enforced_service_tier, traffic_class=row.traffic_class, + usage_sections=row.usage_sections, expires_at=row.expires_at, is_active=row.is_active, account_assignment_scope_enabled=row.account_assignment_scope_enabled, @@ -133,6 +134,11 @@ async def create_api_key( enforced_reasoning_effort=payload.enforced_reasoning_effort, enforced_service_tier=payload.enforced_service_tier, traffic_class=payload.traffic_class or "foreground", + usage_sections=( + payload.usage_sections + if payload.usage_sections is not None + else "upstream_limits,account_pool_usage" + ), expires_at=payload.expires_at, assigned_account_ids=payload.assigned_account_ids, limits=limit_inputs, @@ -188,6 +194,8 @@ async def update_api_key( enforced_service_tier_set="enforced_service_tier" in fields, traffic_class=payload.traffic_class, traffic_class_set="traffic_class" in fields, + usage_sections=payload.usage_sections, + usage_sections_set="usage_sections" in fields, expires_at=payload.expires_at, expires_at_set="expires_at" in fields, is_active=payload.is_active, diff --git a/app/modules/api_keys/repository.py b/app/modules/api_keys/repository.py index 0c3fc0807..10ce906d0 100644 --- a/app/modules/api_keys/repository.py +++ b/app/modules/api_keys/repository.py @@ -5,7 +5,7 @@ from datetime import datetime from enum import Enum -from sqlalchemy import Integer, cast, delete, func, select, true, update +from sqlalchemy import BigInteger, Integer, cast, delete, func, select, true, update from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import load_only, selectinload @@ -256,6 +256,47 @@ async def get_usage_summary_by_key_id(self, key_id: str) -> ApiKeyUsageSummary: total_cost_usd=round(float(row.total_cost_usd or 0.0), 6), ) + async def get_limit_usage_value( + self, + key_id: str, + *, + limit_type: LimitType, + since: datetime, + until: datetime, + model_filter: str | None, + ) -> int: + if limit_type == LimitType.CREDITS: + return 0 + + if limit_type == LimitType.TOTAL_TOKENS: + value_expr = func.coalesce(RequestLog.input_tokens, 0) + func.coalesce( + RequestLog.output_tokens, + RequestLog.reasoning_tokens, + 0, + ) + elif limit_type == LimitType.INPUT_TOKENS: + value_expr = func.coalesce(RequestLog.input_tokens, 0) + elif limit_type == LimitType.OUTPUT_TOKENS: + value_expr = func.coalesce(RequestLog.output_tokens, RequestLog.reasoning_tokens, 0) + elif limit_type == LimitType.COST_USD: + value_expr = cast(func.floor(func.coalesce(RequestLog.cost_usd, 0.0) * 1_000_000), BigInteger) + else: + return 0 + + stmt = select(func.coalesce(func.sum(value_expr), 0)).where( + RequestLog.api_key_id == key_id, + RequestLog.status == "success", + self._exclude_warmup_clause(), + RequestLog.requested_at >= since, + RequestLog.requested_at < until, + ) + if model_filter is not None: + stmt = stmt.where(RequestLog.model == model_filter) + + result = await self._session.execute(stmt) + value = result.scalar_one() + return int(value or 0) + async def update( self, key_id: str, @@ -267,6 +308,7 @@ async def update( enforced_reasoning_effort: str | None | _Unset = _UNSET, enforced_service_tier: str | None | _Unset = _UNSET, traffic_class: str | _Unset = _UNSET, + usage_sections: str | _Unset = _UNSET, account_assignment_scope_enabled: bool | _Unset = _UNSET, expires_at: datetime | None | _Unset = _UNSET, is_active: bool | _Unset = _UNSET, @@ -298,6 +340,9 @@ async def update( if traffic_class is not _UNSET: assert isinstance(traffic_class, str) row.traffic_class = traffic_class + if usage_sections is not _UNSET: + assert isinstance(usage_sections, str) + row.usage_sections = usage_sections if account_assignment_scope_enabled is not _UNSET: assert isinstance(account_assignment_scope_enabled, bool) row.account_assignment_scope_enabled = account_assignment_scope_enabled diff --git a/app/modules/api_keys/schemas.py b/app/modules/api_keys/schemas.py index 03c4a694c..57c1a44b0 100644 --- a/app/modules/api_keys/schemas.py +++ b/app/modules/api_keys/schemas.py @@ -32,6 +32,7 @@ class ApiKeyCreateRequest(DashboardModel): enforced_reasoning_effort: str | None = Field(default=None, pattern=r"(?i)^(none|minimal|low|medium|high|xhigh)$") enforced_service_tier: str | None = Field(default=None, pattern=r"(?i)^(auto|default|priority|flex|fast)$") traffic_class: str | None = Field(default=None, pattern=r"(?i)^(foreground|opportunistic)$") + usage_sections: str | None = None weekly_token_limit: int | None = Field(default=None, ge=1) expires_at: datetime | None = None assigned_account_ids: list[str] | None = None @@ -46,6 +47,7 @@ class ApiKeyUpdateRequest(DashboardModel): enforced_reasoning_effort: str | None = Field(default=None, pattern=r"(?i)^(none|minimal|low|medium|high|xhigh)$") enforced_service_tier: str | None = Field(default=None, pattern=r"(?i)^(auto|default|priority|flex|fast)$") traffic_class: str | None = Field(default=None, pattern=r"(?i)^(foreground|opportunistic)$") + usage_sections: str | None = None weekly_token_limit: int | None = Field(default=None, ge=1) expires_at: datetime | None = None is_active: bool | None = None @@ -71,6 +73,7 @@ class ApiKeyResponse(DashboardModel): enforced_reasoning_effort: str | None enforced_service_tier: str | None traffic_class: str + usage_sections: str = "upstream_limits,account_pool_usage" expires_at: datetime | None is_active: bool account_assignment_scope_enabled: bool = False diff --git a/app/modules/api_keys/service.py b/app/modules/api_keys/service.py index 14d6bf216..6f3a833c9 100644 --- a/app/modules/api_keys/service.py +++ b/app/modules/api_keys/service.py @@ -23,7 +23,7 @@ from app.core.utils.time import to_utc_naive, utcnow from app.db.models import Account, AccountStatus, ApiKey, ApiKeyLimit, LimitType, LimitWindow, UsageHistory from app.db.session import sqlite_writer_section -from app.modules.api_keys.limit_windows import advance_limit_reset, next_limit_reset +from app.modules.api_keys.limit_windows import advance_limit_reset, limit_window_delta, next_limit_reset from app.modules.api_keys.repository import ( _UNSET, ApiKeyTrendBucket, @@ -60,6 +60,15 @@ async def get_by_hash(self, key_hash: str) -> ApiKey | None: ... async def list_all(self) -> list[ApiKey]: ... async def list_usage_summary_by_key(self) -> dict[str, ApiKeyUsageSummary]: ... async def get_usage_summary_by_key_id(self, key_id: str) -> ApiKeyUsageSummary: ... + async def get_limit_usage_value( + self, + key_id: str, + *, + limit_type: LimitType, + since: datetime, + until: datetime, + model_filter: str | None, + ) -> int: ... async def list_accounts_by_ids(self, account_ids: list[str]) -> list[Account]: ... async def list_all_accounts(self) -> list[Account]: ... @@ -74,6 +83,7 @@ async def update( enforced_reasoning_effort: str | None | _Unset = ..., enforced_service_tier: str | None | _Unset = ..., traffic_class: str | _Unset = ..., + usage_sections: str | _Unset = ..., account_assignment_scope_enabled: bool | _Unset = ..., expires_at: datetime | None | _Unset = ..., is_active: bool | _Unset = ..., @@ -257,6 +267,7 @@ class ApiKeyCreateData: enforced_reasoning_effort: str | None = None enforced_service_tier: str | None = None traffic_class: str = TRAFFIC_CLASS_FOREGROUND + usage_sections: str = "upstream_limits,account_pool_usage" expires_at: datetime | None = None assigned_account_ids: list[str] | None = None limits: list[LimitRuleInput] = field(default_factory=list) @@ -278,6 +289,8 @@ class ApiKeyUpdateData: enforced_service_tier_set: bool = False traffic_class: str | None = None traffic_class_set: bool = False + usage_sections: str | None = None + usage_sections_set: bool = False expires_at: datetime | None = None expires_at_set: bool = False is_active: bool | None = None @@ -304,6 +317,7 @@ class ApiKeyData: last_used_at: datetime | None apply_to_codex_model: bool = False traffic_class: str = TRAFFIC_CLASS_FOREGROUND + usage_sections: str = "upstream_limits,account_pool_usage" limits: list[LimitRuleData] = field(default_factory=list) usage_summary: "ApiKeyUsageSummaryData | None" = None account_assignment_scope_enabled: bool = False @@ -337,10 +351,14 @@ def _compute_pooled_credits( all_accounts: list[Account], primary_usage: dict[str, UsageHistory], secondary_usage: dict[str, UsageHistory], + account_assignment_scope_enabled: bool = False, ) -> PooledCreditData: import app.core.usage as usage_core from app.modules.usage.mappers import usage_history_to_window_row + if account_assignment_scope_enabled and not assigned_account_ids: + return PooledCreditData() + if assigned_account_ids: requested_account_ids = set(assigned_account_ids) else: @@ -419,6 +437,7 @@ async def create_key(self, payload: ApiKeyCreateData) -> ApiKeyCreatedData: enforced_reasoning_effort = _normalize_reasoning_effort(payload.enforced_reasoning_effort) enforced_service_tier = _normalize_service_tier(payload.enforced_service_tier) traffic_class = _normalize_traffic_class(payload.traffic_class) + usage_sections = _normalize_usage_sections(payload.usage_sections) _validate_model_enforcement(enforced_model=enforced_model, allowed_models=normalized_allowed_models) row = ApiKey( id=str(__import__("uuid").uuid4()), @@ -432,6 +451,7 @@ async def create_key(self, payload: ApiKeyCreateData) -> ApiKeyCreatedData: enforced_service_tier=enforced_service_tier, account_assignment_scope_enabled=bool(assigned_account_ids), traffic_class=traffic_class, + usage_sections=usage_sections, expires_at=expires_at, is_active=True, created_at=now, @@ -467,25 +487,35 @@ async def list_keys(self) -> list[ApiKeyData]: assigned_ids_by_key = { row.id: [a.account_id for a in getattr(row, "account_assignments", [])] for row in rows } - needs_all_accounts = any(not assigned_ids for assigned_ids in assigned_ids_by_key.values()) + needs_all_accounts = any( + not assigned_ids_by_key[row.id] and not row.account_assignment_scope_enabled for row in rows + ) if needs_all_accounts: all_accounts = await self._repository.list_all_accounts() primary_usage = await self._usage_repository.latest_by_account("primary") secondary_usage = await self._usage_repository.latest_by_account("secondary") else: all_account_ids = sorted({account_id for ids in assigned_ids_by_key.values() for account_id in ids}) - all_accounts = await self._repository.list_accounts_by_ids(all_account_ids) - primary_usage = await self._usage_repository.latest_by_account("primary", account_ids=all_account_ids) - secondary_usage = await self._usage_repository.latest_by_account( - "secondary", - account_ids=all_account_ids, - ) + if all_account_ids: + all_accounts = await self._repository.list_accounts_by_ids(all_account_ids) + primary_usage = await self._usage_repository.latest_by_account( + "primary", account_ids=all_account_ids + ) + secondary_usage = await self._usage_repository.latest_by_account( + "secondary", + account_ids=all_account_ids, + ) + else: + all_accounts = [] + primary_usage = {} + secondary_usage = {} for row in rows: pooled_by_key[row.id] = _compute_pooled_credits( assigned_account_ids=assigned_ids_by_key[row.id], all_accounts=all_accounts, primary_usage=primary_usage, secondary_usage=secondary_usage, + account_assignment_scope_enabled=row.account_assignment_scope_enabled, ) return [ @@ -541,6 +571,9 @@ async def update_key(self, key_id: str, payload: ApiKeyUpdateData) -> ApiKeyData traffic_class_update: str | _Unset = _UNSET if payload.traffic_class_set: traffic_class_update = _normalize_traffic_class(payload.traffic_class) + usage_sections: str | _Unset = _UNSET + if payload.usage_sections_set: + usage_sections = _normalize_usage_sections(payload.usage_sections) if payload.allowed_models_set or payload.enforced_model_set: effective_allowed_models = ( @@ -559,12 +592,13 @@ async def update_key(self, key_id: str, payload: ApiKeyUpdateData) -> ApiKeyData now = utcnow() existing_limits = await self._repository.get_limits_by_key(key_id) submitted_limits = payload.limits or [] - limit_rows = _build_limit_rows_for_update( + limit_rows = await _build_limit_rows_for_update( key_id=key_id, now=now, submitted_limits=submitted_limits, existing_limits=existing_limits, reset_usage=payload.reset_usage, + repository=self._repository, ) elif payload.reset_usage: now = utcnow() @@ -583,6 +617,7 @@ async def update_key(self, key_id: str, payload: ApiKeyUpdateData) -> ApiKeyData ), enforced_service_tier=(enforced_service_tier if payload.enforced_service_tier_set else _UNSET), traffic_class=traffic_class_update, + usage_sections=usage_sections, account_assignment_scope_enabled=account_assignment_scope_enabled, expires_at=expires_at if payload.expires_at_set else _UNSET, is_active=(payload.is_active if payload.is_active_set and payload.is_active is not None else _UNSET), @@ -613,6 +648,7 @@ async def update_key(self, key_id: str, payload: ApiKeyUpdateData) -> ApiKeyData or payload.enforced_reasoning_effort_set or payload.enforced_service_tier_set or payload.traffic_class_set + or payload.usage_sections_set or payload.expires_at_set or payload.is_active_set ): @@ -1139,6 +1175,29 @@ def _normalize_name(name: str) -> str: return normalized +_VALID_USAGE_SECTIONS = {"upstream_limits", "account_pool_usage"} +_DEFAULT_USAGE_SECTIONS = "upstream_limits,account_pool_usage" + + +def _normalize_usage_sections(raw: str | None) -> str: + if raw is None: + return _DEFAULT_USAGE_SECTIONS + if not raw.strip(): + return "" + sections = [s.strip() for s in raw.split(",") if s.strip()] + invalid = [s for s in sections if s not in _VALID_USAGE_SECTIONS] + if invalid: + raise ApiKeyValidationError(f"Invalid usage sections: {', '.join(invalid)}") + return ",".join(sections) + + +def _get_usage_sections_with_default(row: ApiKey) -> str: + value = getattr(row, "usage_sections", None) + if value is None: + return _DEFAULT_USAGE_SECTIONS + return value + + def _generate_plain_key() -> str: return f"sk-clb-{secrets.token_urlsafe(32)}" @@ -1469,6 +1528,7 @@ def _to_created_data(data: ApiKeyData, key: str) -> ApiKeyCreatedData: enforced_reasoning_effort=data.enforced_reasoning_effort, enforced_service_tier=data.enforced_service_tier, traffic_class=data.traffic_class, + usage_sections=data.usage_sections, expires_at=data.expires_at, is_active=data.is_active, created_at=data.created_at, @@ -1499,6 +1559,7 @@ def _to_api_key_data( enforced_reasoning_effort=_normalize_reasoning_effort_lenient(row.enforced_reasoning_effort), enforced_service_tier=_normalize_service_tier_lenient(row.enforced_service_tier), traffic_class=_normalize_traffic_class_lenient(getattr(row, "traffic_class", TRAFFIC_CLASS_FOREGROUND)), + usage_sections=_get_usage_sections_with_default(row), expires_at=row.expires_at, is_active=row.is_active, created_at=row.created_at, @@ -1544,13 +1605,14 @@ def _limit_input_to_row( ) -def _build_limit_rows_for_update( +async def _build_limit_rows_for_update( *, key_id: str, now: datetime, submitted_limits: list[LimitRuleInput], existing_limits: list[ApiKeyLimit], reset_usage: bool, + repository: ApiKeysRepositoryProtocol | None = None, ) -> list[ApiKeyLimit]: existing_by_key = {_limit_identity_from_row(limit): limit for limit in existing_limits} submitted_by_key = {_limit_identity_from_input(limit): limit for limit in submitted_limits} @@ -1561,9 +1623,14 @@ def _build_limit_rows_for_update( for submitted in submitted_limits: identity = _limit_identity_from_input(submitted) matched = existing_by_key.get(identity) - if matched is None or reset_usage: + if reset_usage: rows.append(_limit_input_to_row(submitted, key_id, now)) continue + if matched is None: + if repository is None: + raise TypeError("repository is required to backfill new API key limit usage") + rows.append(await _new_limit_input_to_backfilled_row(submitted, key_id, now, repository)) + continue rows.append( _limit_input_to_row( submitted, @@ -1576,6 +1643,32 @@ def _build_limit_rows_for_update( return rows +async def _new_limit_input_to_backfilled_row( + submitted: LimitRuleInput, + key_id: str, + now: datetime, + repository: ApiKeysRepositoryProtocol, +) -> ApiKeyLimit: + limit_type = LimitType(submitted.limit_type) + window = LimitWindow(submitted.limit_window) + reset_at = next_limit_reset(now, window) + since = now - limit_window_delta(window) + current_value = await repository.get_limit_usage_value( + key_id, + limit_type=limit_type, + since=since, + until=now, + model_filter=submitted.model_filter, + ) + return _limit_input_to_row( + submitted, + key_id, + now, + current_value=current_value, + reset_at=reset_at, + ) + + def _build_reset_limit_rows( *, key_id: str, diff --git a/app/modules/proxy/api.py b/app/modules/proxy/api.py index eea5e1f0c..4911a152f 100644 --- a/app/modules/proxy/api.py +++ b/app/modules/proxy/api.py @@ -100,6 +100,7 @@ ApiKeySelfLimitData, ApiKeysService, ApiKeyUsageReservationData, + _compute_pooled_credits, ) from app.modules.firewall.repository import FirewallRepository from app.modules.firewall.service import FirewallRepositoryPort, FirewallService @@ -121,6 +122,7 @@ validate_model_access, ) from app.modules.proxy.schemas import ( + AccountPoolUsageResponse, CodexModelEntry, CodexModelsResponse, FileCreateRequest, @@ -692,11 +694,22 @@ async def v1_models( @v1_router.get("/usage", response_model=V1UsageResponse) async def v1_usage( api_key: ApiKeyData = Security(validate_usage_api_key), -) -> V1UsageResponse: +) -> V1UsageResponse | JSONResponse: + usage_sections = _parse_usage_sections(api_key.usage_sections) async with get_background_session() as session: - service = ApiKeysService(ApiKeysRepository(session)) + service = ApiKeysService(ApiKeysRepository(session), usage_repository=UsageRepository(session)) usage = await service.get_key_usage_summary_for_self(api_key.id) - aggregate_limits = await _build_aggregate_credit_limits(session) + aggregate_limits = await _build_aggregate_credit_limits(session) if "upstream_limits" in usage_sections else {} + hide_upstream_limits = await _hide_upstream_quota_for_api_key_clients(api_key) + account_pool_usage = ( + await _build_account_pool_usage( + session, + assigned_account_ids=api_key.assigned_account_ids, + account_assignment_scope_enabled=api_key.account_assignment_scope_enabled, + ) + if "account_pool_usage" in usage_sections and not hide_upstream_limits + else None + ) if usage is None: raise ProxyAuthError("Invalid API key") @@ -707,7 +720,8 @@ async def v1_usage( cached_input_tokens=usage.cached_input_tokens, total_cost_usd=usage.total_cost_usd, limits=[_to_v1_usage_limit_response(limit) for limit in usage.limits], - upstream_limits=_ordered_aggregate_limits(aggregate_limits), + upstream_limits=[] if hide_upstream_limits else _ordered_aggregate_limits(aggregate_limits), + account_pool_usage=account_pool_usage, ) @@ -806,6 +820,45 @@ def _ordered_aggregate_limits(aggregate_limits: dict[str, V1UsageLimitResponse]) return [limit for window in ("5h", "7d", "monthly") if (limit := aggregate_limits.get(window)) is not None] +def _parse_usage_sections(raw: str) -> set[str]: + if not raw or not raw.strip(): + return set() + return {s.strip() for s in raw.split(",") if s.strip()} + + +async def _build_account_pool_usage( + session: AsyncSession, + *, + assigned_account_ids: list[str], + account_assignment_scope_enabled: bool, +) -> AccountPoolUsageResponse | None: + from app.modules.api_keys.repository import ApiKeysRepository + + repo = ApiKeysRepository(session) + usage_repo = UsageRepository(session) + if account_assignment_scope_enabled: + all_accounts = await repo.list_accounts_by_ids(assigned_account_ids) + usage_account_ids: list[str] | None = assigned_account_ids + else: + all_accounts = await repo.list_all_accounts() + usage_account_ids = None + + primary_usage = await usage_repo.latest_by_account("primary", account_ids=usage_account_ids) + secondary_usage = await usage_repo.latest_by_account("secondary", account_ids=usage_account_ids) + + data = _compute_pooled_credits( + assigned_account_ids=assigned_account_ids, + all_accounts=all_accounts, + primary_usage=primary_usage, + secondary_usage=secondary_usage, + account_assignment_scope_enabled=account_assignment_scope_enabled, + ) + return AccountPoolUsageResponse( + primary=data.remaining_percent_primary, + secondary=data.remaining_percent_secondary, + ) + + def _to_v1_usage_limit_response(limit: ApiKeySelfLimitData) -> V1UsageLimitResponse: current_value = max(0, min(limit.current_value, limit.max_value)) return V1UsageLimitResponse( @@ -846,6 +899,22 @@ async def _build_codex_usage_payload_for_api_key(api_key: ApiKeyData) -> RateLim ) +async def _hide_upstream_quota_for_api_key_clients(api_key: ApiKeyData | None) -> bool: + if api_key is None: + return False + settings = await get_settings_cache().get() + return bool(getattr(settings, "hide_upstream_quota_from_api_keys", False)) + + +async def _rate_limit_headers_for_request( + context: ProxyContext, + api_key: ApiKeyData | None, +) -> dict[str, str]: + if await _hide_upstream_quota_for_api_key_clients(api_key): + return {} + return await context.service.rate_limit_headers() + + def _select_codex_usage_limit( limits: list[V1UsageLimitResponse], window: str, @@ -1356,7 +1425,7 @@ async def _proxy_images_generation_request( # Re-raise so the global handler maps to 403. raise - rate_limit_headers = await context.service.rate_limit_headers() + rate_limit_headers = await _rate_limit_headers_for_request(context, api_key) reservation = await _enforce_request_limits( api_key, request_model=effective_model, @@ -1552,7 +1621,7 @@ async def _proxy_images_edit_request( validate_model_access(api_key, effective_model) - rate_limit_headers = await context.service.rate_limit_headers() + rate_limit_headers = await _rate_limit_headers_for_request(context, api_key) reservation = await _enforce_request_limits( api_key, request_model=effective_model, @@ -1952,7 +2021,7 @@ async def v1_chat_completions( effective_model = _effective_model_for_api_key(api_key, payload.model) validate_model_access(api_key, effective_model) - rate_limit_headers = await context.service.rate_limit_headers() + rate_limit_headers = await _rate_limit_headers_for_request(context, api_key) try: responses_shaped_payload = not payload.messages and payload.input is not None if not responses_shaped_payload: @@ -2110,7 +2179,7 @@ async def _stream_responses( ) ) - rate_limit_headers = await context.service.rate_limit_headers() if include_rate_limit_headers else {} + rate_limit_headers = await _rate_limit_headers_for_request(context, api_key) if include_rate_limit_headers else {} bridge_active = prefer_http_bridge and proxy_service_module.get_settings().http_responses_session_bridge_enabled effective_headers = forwarded_headers or request.headers downstream_turn_state = ( @@ -2306,7 +2375,7 @@ async def _collect_responses( request_usage_budget=estimate_api_key_request_usage(payload), ) - rate_limit_headers = await context.service.rate_limit_headers() + rate_limit_headers = await _rate_limit_headers_for_request(context, api_key) bridge_active = prefer_http_bridge and proxy_service_module.get_settings().http_responses_session_bridge_enabled downstream_turn_state = ( proxy_affinity_module.ensure_http_downstream_turn_state(request.headers) if bridge_active else None @@ -2438,7 +2507,7 @@ async def _compact_responses( request_usage_budget=estimate_api_key_request_usage(payload), ) - rate_limit_headers = await context.service.rate_limit_headers() + rate_limit_headers = await _rate_limit_headers_for_request(context, api_key) try: result = await context.service.compact_responses( payload, @@ -2575,7 +2644,7 @@ async def _transcribe_request( request_model=_TRANSCRIPTION_MODEL, request_service_tier=None, ) - rate_limit_headers = await context.service.rate_limit_headers() + rate_limit_headers = await _rate_limit_headers_for_request(context, api_key) try: audio_bytes = await file.read() result = await context.service.transcribe( diff --git a/app/modules/proxy/schemas.py b/app/modules/proxy/schemas.py index bf48118a0..53b39c3c6 100644 --- a/app/modules/proxy/schemas.py +++ b/app/modules/proxy/schemas.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import Optional + from pydantic import BaseModel, ConfigDict, Field from app.core.clients.files import OPENAI_FILE_UPLOAD_LIMIT_BYTES, OPENAI_FILE_USE_CASE @@ -216,6 +218,13 @@ class V1UsageLimitResponse(BaseModel): source: str = "api_key_limit" +class AccountPoolUsageResponse(BaseModel): + model_config = ConfigDict(extra="forbid") + + primary: float | None = None + secondary: float | None = None + + class V1UsageResponse(BaseModel): model_config = ConfigDict(extra="forbid") @@ -225,6 +234,7 @@ class V1UsageResponse(BaseModel): total_cost_usd: float limits: list[V1UsageLimitResponse] upstream_limits: list[V1UsageLimitResponse] = [] + account_pool_usage: Optional[AccountPoolUsageResponse] = None class WarmupRequest(BaseModel): diff --git a/app/modules/settings/api.py b/app/modules/settings/api.py index b06797638..c13269d56 100644 --- a/app/modules/settings/api.py +++ b/app/modules/settings/api.py @@ -129,6 +129,7 @@ def _dashboard_settings_response(settings) -> DashboardSettingsResponse: totp_required_on_login=settings.totp_required_on_login, totp_configured=settings.totp_configured, api_key_auth_enabled=settings.api_key_auth_enabled, + hide_upstream_quota_from_api_keys=settings.hide_upstream_quota_from_api_keys, limit_warmup_enabled=settings.limit_warmup_enabled, limit_warmup_windows=settings.limit_warmup_windows, limit_warmup_model=settings.limit_warmup_model, @@ -534,6 +535,11 @@ async def update_settings( if payload.api_key_auth_enabled is not None else current.api_key_auth_enabled ), + hide_upstream_quota_from_api_keys=( + payload.hide_upstream_quota_from_api_keys + if payload.hide_upstream_quota_from_api_keys is not None + else current.hide_upstream_quota_from_api_keys + ), limit_warmup_enabled=( payload.limit_warmup_enabled if payload.limit_warmup_enabled is not None @@ -593,6 +599,7 @@ async def update_settings( "import_without_overwrite", "totp_required_on_login", "api_key_auth_enabled", + "hide_upstream_quota_from_api_keys", "limit_warmup_enabled", "limit_warmup_windows", "limit_warmup_model", diff --git a/app/modules/settings/repository.py b/app/modules/settings/repository.py index c16da3d40..88792af3e 100644 --- a/app/modules/settings/repository.py +++ b/app/modules/settings/repository.py @@ -41,6 +41,7 @@ async def get_or_create(self) -> DashboardSettings: bootstrap_token_encrypted=None, bootstrap_token_hash=None, api_key_auth_enabled=False, + hide_upstream_quota_from_api_keys=False, totp_secret_encrypted=None, totp_last_verified_step=None, sticky_reallocation_primary_budget_threshold_pct=95.0, @@ -91,6 +92,7 @@ async def update( import_without_overwrite: bool | None = None, totp_required_on_login: bool | None = None, api_key_auth_enabled: bool | None = None, + hide_upstream_quota_from_api_keys: bool | None = None, limit_warmup_enabled: bool | None = None, limit_warmup_windows: str | None = None, limit_warmup_model: str | None = None, @@ -148,6 +150,8 @@ async def update( settings.totp_required_on_login = totp_required_on_login if api_key_auth_enabled is not None: settings.api_key_auth_enabled = api_key_auth_enabled + if hide_upstream_quota_from_api_keys is not None: + settings.hide_upstream_quota_from_api_keys = hide_upstream_quota_from_api_keys if limit_warmup_enabled is not None: settings.limit_warmup_enabled = limit_warmup_enabled if limit_warmup_windows is not None: diff --git a/app/modules/settings/schemas.py b/app/modules/settings/schemas.py index 2452d560e..f7a1c6409 100644 --- a/app/modules/settings/schemas.py +++ b/app/modules/settings/schemas.py @@ -54,6 +54,7 @@ class DashboardSettingsResponse(DashboardModel): totp_required_on_login: bool totp_configured: bool api_key_auth_enabled: bool + hide_upstream_quota_from_api_keys: bool limit_warmup_enabled: bool limit_warmup_windows: str = Field(pattern=r"^(primary|secondary|both)$") limit_warmup_model: str = Field(min_length=1, max_length=128) @@ -96,6 +97,7 @@ class DashboardSettingsUpdateRequest(DashboardModel): import_without_overwrite: bool | None = None totp_required_on_login: bool | None = None api_key_auth_enabled: bool | None = None + hide_upstream_quota_from_api_keys: bool | None = None limit_warmup_enabled: bool | None = None limit_warmup_windows: str | None = Field(default=None, pattern=r"^(primary|secondary|both)$") limit_warmup_model: str | None = Field(default=None, min_length=1, max_length=128) diff --git a/app/modules/settings/service.py b/app/modules/settings/service.py index f2a5b10bb..20d100dc9 100644 --- a/app/modules/settings/service.py +++ b/app/modules/settings/service.py @@ -35,6 +35,7 @@ class DashboardSettingsData: totp_required_on_login: bool totp_configured: bool api_key_auth_enabled: bool + hide_upstream_quota_from_api_keys: bool limit_warmup_enabled: bool limit_warmup_windows: str limit_warmup_model: str @@ -70,6 +71,7 @@ class DashboardSettingsUpdateData: import_without_overwrite: bool totp_required_on_login: bool api_key_auth_enabled: bool + hide_upstream_quota_from_api_keys: bool limit_warmup_enabled: bool limit_warmup_windows: str limit_warmup_model: str @@ -114,6 +116,7 @@ async def get_settings(self) -> DashboardSettingsData: totp_required_on_login=row.totp_required_on_login, totp_configured=row.totp_secret_encrypted is not None, api_key_auth_enabled=row.api_key_auth_enabled, + hide_upstream_quota_from_api_keys=row.hide_upstream_quota_from_api_keys, limit_warmup_enabled=row.limit_warmup_enabled, limit_warmup_windows=row.limit_warmup_windows, limit_warmup_model=row.limit_warmup_model, @@ -156,6 +159,7 @@ async def update_settings(self, payload: DashboardSettingsUpdateData) -> Dashboa import_without_overwrite=payload.import_without_overwrite, totp_required_on_login=payload.totp_required_on_login, api_key_auth_enabled=payload.api_key_auth_enabled, + hide_upstream_quota_from_api_keys=payload.hide_upstream_quota_from_api_keys, limit_warmup_enabled=payload.limit_warmup_enabled, limit_warmup_windows=payload.limit_warmup_windows, limit_warmup_model=payload.limit_warmup_model, @@ -193,6 +197,7 @@ async def update_settings(self, payload: DashboardSettingsUpdateData) -> Dashboa totp_required_on_login=row.totp_required_on_login, totp_configured=row.totp_secret_encrypted is not None, api_key_auth_enabled=row.api_key_auth_enabled, + hide_upstream_quota_from_api_keys=row.hide_upstream_quota_from_api_keys, limit_warmup_enabled=row.limit_warmup_enabled, limit_warmup_windows=row.limit_warmup_windows, limit_warmup_model=row.limit_warmup_model, diff --git a/frontend/src/features/api-keys/components/api-key-create-dialog.tsx b/frontend/src/features/api-keys/components/api-key-create-dialog.tsx index 403065999..98414c247 100644 --- a/frontend/src/features/api-keys/components/api-key-create-dialog.tsx +++ b/frontend/src/features/api-keys/components/api-key-create-dialog.tsx @@ -27,6 +27,7 @@ import { ExpiryPicker } from "@/features/api-keys/components/expiry-picker"; import { LimitRulesEditor } from "@/features/api-keys/components/limit-rules-editor"; import { ModelMultiSelect } from "@/features/api-keys/components/model-multi-select"; import type { ApiKeyCreateRequest, LimitRuleCreate, ServiceTierType, TrafficClass } from "@/features/api-keys/schemas"; +import { UsageSectionsMultiSelect } from "@/features/api-keys/components/usage-sections-multi-select"; const formSchema = z.object({ name: z.string().min(1, "Name is required"), @@ -50,6 +51,7 @@ type ApiKeyCreateFormProps = { type ApiKeyCreateDraft = { selectedModels: string[]; selectedAccountIds: string[]; + usageSections: string; limitRules: LimitRuleCreate[]; expiresAt: Date | null; enforcedModel: string; @@ -62,6 +64,7 @@ type ApiKeyCreateDraft = { const initialApiKeyCreateDraft: ApiKeyCreateDraft = { selectedModels: [], selectedAccountIds: [], + usageSections: "upstream_limits,account_pool_usage", limitRules: [], expiresAt: null, enforcedModel: "", @@ -93,6 +96,7 @@ function ApiKeyCreateForm({ busy, onClose, onSubmit }: ApiKeyCreateFormProps) { allowedModels: draft.selectedModels.length > 0 ? draft.selectedModels : undefined, applyToCodexModel: draft.applyToCodexModel, ...(draft.selectedAccountIds.length > 0 ? { assignedAccountIds: draft.selectedAccountIds } : {}), + usageSections: draft.usageSections, enforcedModel: draft.enforcedModel.trim() ? draft.enforcedModel.trim() : null, enforcedReasoningEffort: draft.enforcedReasoningEffort === "none" @@ -155,6 +159,11 @@ function ApiKeyCreateForm({ busy, onClose, onSubmit }: ApiKeyCreateFormProps) { updateDraft({ selectedAccountIds })} /> +
+ + updateDraft({ usageSections })} /> +
+
updateDraft({ selectedAccountIds })} />
+
+
Usage sections shown to client
+ updateDraft({ usageSections })} /> +
+
Enforced model
void; +}; + +export function ApiKeyQuotaPrivacyToggle({ + enabled, + disabled = false, + onChange, +}: ApiKeyQuotaPrivacyToggleProps) { + return ( +
+
+

Hide upstream quota

+

API-key clients only see the key's own quota and usage.

+
+ +
+ ); +} diff --git a/frontend/src/features/api-keys/components/api-keys-section.tsx b/frontend/src/features/api-keys/components/api-keys-section.tsx index 63d33db92..42d88cb6e 100644 --- a/frontend/src/features/api-keys/components/api-keys-section.tsx +++ b/frontend/src/features/api-keys/components/api-keys-section.tsx @@ -6,6 +6,7 @@ import { AlertMessage } from "@/components/alert-message"; import { Button } from "@/components/ui/button"; import { useDialogState } from "@/hooks/use-dialog-state"; import { ApiKeyAuthToggle } from "@/features/api-keys/components/api-key-auth-toggle"; +import { ApiKeyQuotaPrivacyToggle } from "@/features/api-keys/components/api-key-quota-privacy-toggle"; import { ApiKeyCreatedDialog } from "@/features/api-keys/components/api-key-created-dialog"; import { ApiKeyTable } from "@/features/api-keys/components/api-key-table"; import { useApiKeys } from "@/features/api-keys/hooks/use-api-keys"; @@ -21,14 +22,18 @@ const ApiKeyEditDialog = lazy(() => export type ApiKeysSectionProps = { apiKeyAuthEnabled: boolean; + hideUpstreamQuotaFromApiKeys: boolean; disabled?: boolean; onApiKeyAuthEnabledChange: (enabled: boolean) => void; + onHideUpstreamQuotaFromApiKeysChange: (enabled: boolean) => void; }; export function ApiKeysSection({ apiKeyAuthEnabled, + hideUpstreamQuotaFromApiKeys, disabled = false, onApiKeyAuthEnabledChange, + onHideUpstreamQuotaFromApiKeysChange, }: ApiKeysSectionProps) { const { apiKeysQuery, @@ -96,6 +101,12 @@ export function ApiKeysSection({ onChange={onApiKeyAuthEnabledChange} /> + + {mutationError ? {mutationError} : null} { + it("renders an explicitly empty selection with no checked sections", async () => { + const user = userEvent.setup(); + + renderWithProviders(); + + await user.click(screen.getByRole("button", { name: /None/i })); + + expect(screen.getByRole("menuitemcheckbox", { name: "Upstream limits" })).not.toBeChecked(); + expect(screen.getByRole("menuitemcheckbox", { name: "Account pool usage" })).not.toBeChecked(); + }); +}); diff --git a/frontend/src/features/api-keys/components/usage-sections-multi-select.tsx b/frontend/src/features/api-keys/components/usage-sections-multi-select.tsx new file mode 100644 index 000000000..2b1eac5ac --- /dev/null +++ b/frontend/src/features/api-keys/components/usage-sections-multi-select.tsx @@ -0,0 +1,84 @@ +import { useCallback, useMemo } from "react"; +import { ChevronsUpDown } from "lucide-react"; + +import { Button } from "@/components/ui/button"; +import { + DropdownMenu, + DropdownMenuCheckboxItem, + DropdownMenuContent, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu"; +import { USAGE_SECTION_LABELS, USAGE_SECTIONS, type UsageSection } from "@/features/api-keys/schemas"; + +export type UsageSectionsMultiSelectProps = { + value: string; + onChange: (value: string) => void; + placeholder?: string; +}; + +function parseSections(raw: string): Set { + const trimmed = raw.trim(); + if (trimmed === "") return new Set(); + + const sections = trimmed + .split(",") + .map((s) => s.trim()) + .filter((s): s is UsageSection => USAGE_SECTIONS.includes(s as UsageSection)); + if (sections.length === 0) return new Set(USAGE_SECTIONS); + return new Set(sections); +} + +function formatSections(raw: string, allSectionsLabel: string): string { + const sections = parseSections(raw); + if (sections.size === USAGE_SECTIONS.length) return allSectionsLabel; + if (sections.size === 0) return "None"; + return USAGE_SECTIONS.filter((s) => sections.has(s)) + .map((s) => USAGE_SECTION_LABELS[s]) + .join(", "); +} + +export function UsageSectionsMultiSelect({ + value, + onChange, + placeholder = "All sections", +}: UsageSectionsMultiSelectProps) { + const selected = useMemo(() => parseSections(value), [value]); + + const toggle = useCallback( + (section: UsageSection) => { + const next = new Set(selected); + if (next.has(section)) { + next.delete(section); + } else { + next.add(section); + } + onChange(Array.from(next).join(",")); + }, + [onChange, selected], + ); + + const label = value.trim() === "" ? "None" : formatSections(value, placeholder); + + return ( + + + + + + {USAGE_SECTIONS.map((section) => ( + toggle(section)} + onSelect={(event) => event.preventDefault()} + > + {USAGE_SECTION_LABELS[section]} + + ))} + + + ); +} diff --git a/frontend/src/features/api-keys/schemas.test.ts b/frontend/src/features/api-keys/schemas.test.ts index 1afe87e28..8a32a1cc0 100644 --- a/frontend/src/features/api-keys/schemas.test.ts +++ b/frontend/src/features/api-keys/schemas.test.ts @@ -81,6 +81,21 @@ describe("ApiKeySchema", () => { expect(parsed.pooledRemainingPercentSecondary).toBe(85.0); expect(parsed.pooledCapacityCreditsPrimary).toBe(225.0); }); + + it("defaults usage sections to both visible sections", () => { + const parsed = ApiKeySchema.parse({ + id: "key-1", + name: "Service Key", + keyPrefix: "sk-live", + allowedModels: null, + expiresAt: null, + isActive: true, + createdAt: ISO, + lastUsedAt: null, + }); + + expect(parsed.usageSections).toBe("upstream_limits,account_pool_usage"); + }); }); describe("ApiKeyCreateResponseSchema", () => { @@ -107,9 +122,11 @@ describe("ApiKeyCreateRequestSchema", () => { const parsed = ApiKeyCreateRequestSchema.parse({ name: "Scoped Key", assignedAccountIds: ["acc_primary"], + usageSections: "account_pool_usage", }); expect(parsed.assignedAccountIds).toEqual(["acc_primary"]); + expect(parsed.usageSections).toBe("account_pool_usage"); }); it("accepts opportunistic traffic class in create payload", () => { @@ -140,11 +157,13 @@ describe("ApiKeyUpdateRequestSchema", () => { weeklyTokenLimit: 50000, expiresAt: ISO, isActive: false, + usageSections: "upstream_limits", }); expect(parsed.name).toBe("Updated Key"); expect(parsed.applyToCodexModel).toBe(true); expect(parsed.isActive).toBe(false); + expect(parsed.usageSections).toBe("upstream_limits"); }); it("rejects invalid weeklyTokenLimit", () => { diff --git a/frontend/src/features/api-keys/schemas.ts b/frontend/src/features/api-keys/schemas.ts index 907b7e027..fd09713f8 100644 --- a/frontend/src/features/api-keys/schemas.ts +++ b/frontend/src/features/api-keys/schemas.ts @@ -54,6 +54,7 @@ export const ApiKeySchema = z.object({ .enum(SERVICE_TIERS) .nullable() .default(null), + usageSections: z.string().default("upstream_limits,account_pool_usage"), expiresAt: z.iso.datetime({ offset: true }).nullable(), isActive: z.boolean(), accountAssignmentScopeEnabled: z.boolean().default(false), @@ -67,6 +68,14 @@ export const ApiKeySchema = z.object({ pooledCapacityCreditsPrimary: z.number().default(0), }); +export const USAGE_SECTIONS = ["upstream_limits", "account_pool_usage"] as const; +export type UsageSection = (typeof USAGE_SECTIONS)[number]; + +export const USAGE_SECTION_LABELS: Record = { + upstream_limits: "Upstream limits", + account_pool_usage: "Account pool usage", +}; + export const ApiKeyCreateRequestSchema = z.object({ name: z.string().min(1).max(128), allowedModels: z.array(z.string()).optional(), @@ -81,6 +90,7 @@ export const ApiKeyCreateRequestSchema = z.object({ .enum(SERVICE_TIERS) .nullable() .optional(), + usageSections: z.string().optional(), weeklyTokenLimit: z.number().int().positive().nullable().optional(), expiresAt: z.iso.datetime({ offset: true }).nullable().optional(), assignedAccountIds: z.array(z.string()).optional(), @@ -105,6 +115,7 @@ export const ApiKeyUpdateRequestSchema = z.object({ .enum(SERVICE_TIERS) .nullable() .optional(), + usageSections: z.string().optional(), weeklyTokenLimit: z.number().int().positive().nullable().optional(), expiresAt: z.iso.datetime({ offset: true }).nullable().optional(), isActive: z.boolean().optional(), diff --git a/frontend/src/features/settings/components/settings-page.tsx b/frontend/src/features/settings/components/settings-page.tsx index 698a16a0a..2a138700b 100644 --- a/frontend/src/features/settings/components/settings-page.tsx +++ b/frontend/src/features/settings/components/settings-page.tsx @@ -144,10 +144,14 @@ export function SettingsPage() { void handleSave(buildSettingsUpdateRequest(settings, { apiKeyAuthEnabled: enabled })) } + onHideUpstreamQuotaFromApiKeysChange={(enabled) => + void handleSave(buildSettingsUpdateRequest(settings, { hideUpstreamQuotaFromApiKeys: enabled })) + } /> diff --git a/frontend/src/features/settings/payload.ts b/frontend/src/features/settings/payload.ts index 298db7fb6..c3184d3a5 100644 --- a/frontend/src/features/settings/payload.ts +++ b/frontend/src/features/settings/payload.ts @@ -34,6 +34,7 @@ export function buildSettingsUpdateRequest( limitWarmupMinAvailablePercent: settings.limitWarmupMinAvailablePercent, weeklyPaceWorkingDays: settings.weeklyPaceWorkingDays, guestAccessEnabled: settings.guestAccessEnabled, + hideUpstreamQuotaFromApiKeys: settings.hideUpstreamQuotaFromApiKeys, ...patch, }; if ( diff --git a/frontend/src/features/settings/schemas.test.ts b/frontend/src/features/settings/schemas.test.ts index 1f09786fb..7aaec169e 100644 --- a/frontend/src/features/settings/schemas.test.ts +++ b/frontend/src/features/settings/schemas.test.ts @@ -32,6 +32,7 @@ describe("DashboardSettingsSchema", () => { guestAccessEnabled: true, guestPasswordConfigured: false, apiKeyAuthEnabled: true, + hideUpstreamQuotaFromApiKeys: false, limitWarmupEnabled: false, limitWarmupWindows: "both", limitWarmupModel: "auto", @@ -59,6 +60,7 @@ describe("DashboardSettingsSchema", () => { expect(parsed.guestAccessEnabled).toBe(true); expect(parsed.guestPasswordConfigured).toBe(false); expect(parsed.apiKeyAuthEnabled).toBe(true); + expect(parsed.hideUpstreamQuotaFromApiKeys).toBe(false); expect(parsed.limitWarmupEnabled).toBe(false); expect(parsed.limitWarmupWindows).toBe("both"); }); @@ -72,6 +74,7 @@ describe("DashboardSettingsSchema", () => { stickyReallocationBudgetThresholdPct: 95, totpConfigured: false, apiKeyAuthEnabled: true, + hideUpstreamQuotaFromApiKeys: false, }); expect(parsed.upstreamStreamTransport).toBe("default"); @@ -157,6 +160,7 @@ describe("SettingsUpdateRequestSchema", () => { importWithoutOverwrite: true, totpRequiredOnLogin: true, apiKeyAuthEnabled: false, + hideUpstreamQuotaFromApiKeys: true, limitWarmupEnabled: true, limitWarmupWindows: "primary", limitWarmupModel: "gpt-5.1-codex-mini", @@ -182,6 +186,7 @@ describe("SettingsUpdateRequestSchema", () => { expect(parsed.weeklyPaceWorkingDays).toBe("0,1,2,3,4"); expect(parsed.totpRequiredOnLogin).toBe(true); expect(parsed.apiKeyAuthEnabled).toBe(false); + expect(parsed.hideUpstreamQuotaFromApiKeys).toBe(true); expect(parsed.limitWarmupEnabled).toBe(true); expect(parsed.limitWarmupWindows).toBe("primary"); }); @@ -208,6 +213,7 @@ describe("SettingsUpdateRequestSchema", () => { expect(parsed.importWithoutOverwrite).toBeUndefined(); expect(parsed.totpRequiredOnLogin).toBeUndefined(); expect(parsed.apiKeyAuthEnabled).toBeUndefined(); + expect(parsed.hideUpstreamQuotaFromApiKeys).toBeUndefined(); expect(parsed.relativeAvailabilityPower).toBeUndefined(); expect(parsed.relativeAvailabilityTopK).toBeUndefined(); expect(parsed.singleAccountId).toBeUndefined(); diff --git a/frontend/src/features/settings/schemas.ts b/frontend/src/features/settings/schemas.ts index 49ec8d0f3..108c3e32b 100644 --- a/frontend/src/features/settings/schemas.ts +++ b/frontend/src/features/settings/schemas.ts @@ -81,6 +81,7 @@ export const DashboardSettingsSchema = z totpRequiredOnLogin: z.boolean(), totpConfigured: z.boolean(), apiKeyAuthEnabled: z.boolean(), + hideUpstreamQuotaFromApiKeys: z.boolean().optional().default(false), limitWarmupEnabled: z.boolean().optional().default(false), limitWarmupWindows: LimitWarmupWindowsSchema.optional().default("both"), limitWarmupModel: LimitWarmupModelSchema.optional().default("auto"), @@ -142,6 +143,7 @@ export const SettingsUpdateRequestSchema = z.object({ importWithoutOverwrite: z.boolean().optional(), totpRequiredOnLogin: z.boolean().optional(), apiKeyAuthEnabled: z.boolean().optional(), + hideUpstreamQuotaFromApiKeys: z.boolean().optional(), limitWarmupEnabled: z.boolean().optional(), limitWarmupWindows: LimitWarmupWindowsSchema.optional(), limitWarmupModel: LimitWarmupModelSchema.optional(), diff --git a/frontend/src/test/mocks/factories.ts b/frontend/src/test/mocks/factories.ts index 33ac57db8..ec59e7846 100644 --- a/frontend/src/test/mocks/factories.ts +++ b/frontend/src/test/mocks/factories.ts @@ -447,6 +447,7 @@ export function createDashboardSettings( totpRequiredOnLogin: false, totpConfigured: true, apiKeyAuthEnabled: true, + hideUpstreamQuotaFromApiKeys: false, limitWarmupEnabled: false, limitWarmupWindows: "both", limitWarmupModel: "auto", diff --git a/frontend/src/test/mocks/handlers.ts b/frontend/src/test/mocks/handlers.ts index adaaec50b..247d59bd6 100644 --- a/frontend/src/test/mocks/handlers.ts +++ b/frontend/src/test/mocks/handlers.ts @@ -126,6 +126,7 @@ const SettingsPayloadSchema = z.looseObject({ totpRequiredOnLogin: z.boolean().optional(), totpConfigured: z.boolean().optional(), apiKeyAuthEnabled: z.boolean().optional(), + hideUpstreamQuotaFromApiKeys: z.boolean().optional(), }); const QuotaPlannerSettingsPayloadSchema = z.looseObject({ diff --git a/openspec/changes/add-account-pool-usage-to-v1-usage/.openspec.yaml b/openspec/changes/add-account-pool-usage-to-v1-usage/.openspec.yaml new file mode 100644 index 000000000..e7c42aca7 --- /dev/null +++ b/openspec/changes/add-account-pool-usage-to-v1-usage/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-27 diff --git a/openspec/changes/add-account-pool-usage-to-v1-usage/design.md b/openspec/changes/add-account-pool-usage-to-v1-usage/design.md new file mode 100644 index 000000000..3ead403b0 --- /dev/null +++ b/openspec/changes/add-account-pool-usage-to-v1-usage/design.md @@ -0,0 +1,43 @@ +## Context + +The `/v1/usage` endpoint currently returns `request_count`, `total_tokens`, `cached_input_tokens`, `total_cost_usd`, `limits[]` (API-key-level limits), and `upstream_limits[]` (aggregate Codex credit windows). There is no way for clients to see pooled account remaining capacity percentages. + +The codebase already computes pooled credit data (`PooledCreditData` in `app/modules/api_keys/service.py:316-318`) using `_compute_pooled_credits()` — this is used for the dashboard API key list sidebar display. The same computation can be leveraged for `/v1/usage`. + +We also want API key administrators to control which usage detail sections (`upstream_limits`, `account_pool_usage`) are visible to clients calling `/v1/usage`. + +## Goals / Non-Goals + +**Goals:** +- Add `account_pool_usage` object (`primary`, `secondary` float fields) to `GET /v1/usage` response +- Allow API key creators/editors to select which usage sections are exposed via a multi-select dropdown +- Store selection as `usage_sections` TEXT column with comma-separated values +- Default to all sections selected (`upstream_limits,account_pool_usage`) + +**Non-Goals:** +- Changing existing `limits[]` or `upstream_limits[]` structure +- Adding new usage statistics beyond remaining percent +- Changing the public `/v1/models` endpoint + +## Decisions + +### Decision 1: Store `usage_sections` as comma-separated TEXT + +**Rationale**: The existing codebase uses similar patterns (`allowed_models` as JSON array in TEXT). Comma-separated values are simpler than JSON for a fixed set of 2 options and don't require JSON parsing overhead. A migration sets the default to `"upstream_limits,account_pool_usage"`. + +**Alternatives considered:** +- Two boolean columns (`show_upstream_limits`, `show_account_pool_usage`) — adds schema bloat for a small number of flags; the user explicitly requested TEXT with comma separator. +- JSON array — valid, but overkill for a fixed set of 2 values. + +### Decision 2: Compute `account_pool_usage` in the `/v1/usage` handler + +**Rationale**: The `/v1/usage` handler already has access to `ApiKeysService` and `UsageRepository`. It can reuse the existing `_compute_pooled_credits` helper or a similar computation path. This keeps the logic close to where it's consumed. + +### Decision 3: Conditional response based on `usage_sections` + +**Rationale**: The `/v1/usage` handler reads the API key's `usage_sections`, parses it into a set, and conditionally includes `upstream_limits` and `account_pool_usage` in the response. This is a simple filter pass — no complex orchestration needed. + +## Risks / Trade-offs + +- [Risk] Migration failure on large tables → Mitigation: adding a nullable TEXT column with a default value is a lightweight ALTER TABLE operation. +- [Risk] Out-of-sync `usage_sections` values if enum changes → Mitigation: the set of options (`upstream_limits`, `account_pool_usage`) is intentionally small and unlikely to change; parsing is tolerant of unknown values. diff --git a/openspec/changes/add-account-pool-usage-to-v1-usage/proposal.md b/openspec/changes/add-account-pool-usage-to-v1-usage/proposal.md new file mode 100644 index 000000000..a6cff1f6e --- /dev/null +++ b/openspec/changes/add-account-pool-usage-to-v1-usage/proposal.md @@ -0,0 +1,28 @@ +## Why + +API key clients calling `/v1/usage` cannot see pooled account capacity usage. They only see their own key-level limits and aggregate upstream credit windows, but not the actual remaining percent of primary/secondary account pool capacity. Additionally, API key administrators cannot control which usage sections are exposed to clients — all sections are always returned. + +## What Changes + +- Add `account_pool_usage` object to `GET /v1/usage` response with `primary` (float) and `secondary` (float) remaining_percent values +- Add a `usage_sections` field to the API key model (TEXT, comma-separated values) that controls which sections are returned in `/v1/usage` +- Add a multi-select dropdown in the API key create/edit UI labeled "Usage sections shown to client" with options `upstream_limits` and `account_pool_usage`, placed below "Assigned accounts" +- Default `usage_sections` to `"upstream_limits,account_pool_usage"` (select all) via migration +- `/v1/usage` response conditionally includes `account_pool_usage` and `upstream_limits` based on the key's `usage_sections` setting + +## Capabilities + +### New Capabilities + +- `account-pool-usage-v1-usage`: Expose pooled account remaining percentages on `/v1/usage` and allow API key admins to control which usage detail sections are visible to clients + +### Modified Capabilities + +- `api-keys`: API key model gains `usage_sections` TEXT field; create/update endpoints accept `usage_sections`; frontend forms include usage sections multi-select; `/v1/usage` response schema gains `account_pool_usage` and honors `usage_sections` filtering + +## Impact + +- **Database**: New `usage_sections` TEXT column on `api_keys` table (migration) +- **Backend**: `ApiKey` model, `ApiKeyCreateRequest`/`ApiKeyUpdateRequest`/`ApiKeyResponse` schemas, `ApiKeysService.create_key`/`update_key`, `GET /v1/usage` handler, `V1UsageResponse` schema +- **Frontend**: `ApiKeySchema`, `ApiKeyCreateRequestSchema`, `ApiKeyUpdateRequestSchema`, `api-key-create-dialog.tsx`, `api-key-edit-dialog.tsx` +- **No breaking changes**: New field defaults to both sections enabled, preserving existing behavior diff --git a/openspec/changes/add-account-pool-usage-to-v1-usage/specs/account-pool-usage-v1-usage/spec.md b/openspec/changes/add-account-pool-usage-to-v1-usage/specs/account-pool-usage-v1-usage/spec.md new file mode 100644 index 000000000..35294d387 --- /dev/null +++ b/openspec/changes/add-account-pool-usage-to-v1-usage/specs/account-pool-usage-v1-usage/spec.md @@ -0,0 +1,65 @@ +## ADDED Requirements + +### Requirement: /v1/usage response includes account_pool_usage + +The system SHALL include an `account_pool_usage` object in the `GET /v1/usage` response containing pooled account remaining capacity percentages for the primary (5h) and secondary (7d/weekly) windows, when the API key's `usage_sections` setting includes `account_pool_usage`. + +The `account_pool_usage` object SHALL contain: +- `primary` (float): the remaining percent of primary (5h) pooled account capacity, or `null` if no primary capacity exists +- `secondary` (float): the remaining percent of secondary (7d) pooled account capacity, or `null` if no secondary capacity exists + +The computation SHALL use the same account scope as the API key's assigned accounts. When no accounts are assigned, all active (non-DEACTIVATED, non-PAUSED) accounts SHALL be considered. Computation SHALL mirror the existing `PooledCreditData` computation used for the dashboard API key list. + +#### Scenario: Account pool usage with assigned accounts + +- **WHEN** a valid API key with assigned accounts calls `GET /v1/usage` +- **AND** the API key has `usage_sections` containing `account_pool_usage` +- **AND** pooled account capacity has remaining percentages of 75.0 (primary) and 90.0 (secondary) +- **THEN** the response includes `account_pool_usage: { primary: 75.0, secondary: 90.0 }` + +#### Scenario: Account pool usage when no primary capacity + +- **WHEN** a valid API key calls `GET /v1/usage` +- **AND** pooled primary capacity is 0 +- **THEN** `account_pool_usage.primary` is `null` + +#### Scenario: Account pool usage when the enabled pool has no active capacity-bearing accounts + +- **WHEN** a valid API key calls `GET /v1/usage` +- **AND** the API key has `usage_sections` containing `account_pool_usage` +- **AND** the scoped pool has no active or capacity-bearing accounts +- **THEN** the response includes `account_pool_usage: { primary: null, secondary: null }` + +#### Scenario: Account pool usage excluded by usage_sections + +- **WHEN** a valid API key calls `GET /v1/usage` +- **AND** the API key's `usage_sections` does NOT include `account_pool_usage` +- **THEN** `account_pool_usage` is `null` + +### Requirement: API key usage_sections controls which /v1/usage detail sections are returned + +The system SHALL store an API key's visible usage sections in a `usage_sections` TEXT field as a comma-separated list. The supported values SHALL be `upstream_limits` and `account_pool_usage`. The system SHALL parse this field when building the `/v1/usage` response and conditionally include the corresponding sections. + +#### Scenario: Default usage_sections includes all sections + +- **WHEN** an API key is created without specifying `usage_sections` +- **THEN** the key's `usage_sections` SHALL be `"upstream_limits,account_pool_usage"` +- **AND** `/v1/usage` response includes both `upstream_limits` and `account_pool_usage` + +#### Scenario: upstream_limits excluded + +- **WHEN** an API key has `usage_sections` set to `"account_pool_usage"` +- **AND** a client calls `GET /v1/usage` +- **THEN** the response includes `account_pool_usage` but `upstream_limits` is an empty list + +#### Scenario: account_pool_usage excluded + +- **WHEN** an API key has `usage_sections` set to `"upstream_limits"` +- **AND** a client calls `GET /v1/usage` +- **THEN** the response includes `upstream_limits` and `account_pool_usage` is `null` + +#### Scenario: Empty usage_sections excludes both + +- **WHEN** an API key has `usage_sections` set to `""` +- **AND** a client calls `GET /v1/usage` +- **THEN** `upstream_limits` is an empty list AND `account_pool_usage` is `null` diff --git a/openspec/changes/add-account-pool-usage-to-v1-usage/specs/api-keys/spec.md b/openspec/changes/add-account-pool-usage-to-v1-usage/specs/api-keys/spec.md new file mode 100644 index 000000000..c188c0d9b --- /dev/null +++ b/openspec/changes/add-account-pool-usage-to-v1-usage/specs/api-keys/spec.md @@ -0,0 +1,140 @@ +## ADDED Requirements + +### Requirement: API key usage_sections controls visible /v1/usage detail sections + +The system SHALL accept an optional `usage_sections` field in `POST /api/api-keys` and `PATCH /api/api-keys/{id}`. The field SHALL be a comma-separated string of section names. Supported values SHALL be `upstream_limits` and `account_pool_usage`. When `usage_sections` is omitted during creation, the system SHALL default it to `"upstream_limits,account_pool_usage"`. + +The `ApiKeyResponse` SHALL include `usage_sections` as a string. + +#### Scenario: Create key with explicit usage_sections + +- **WHEN** admin submits `POST /api/api-keys` with `{ "name": "dev-key", "usageSections": "upstream_limits" }` +- **THEN** the created key returns `usageSections: "upstream_limits"` + +#### Scenario: Create key without usage_sections defaults to all + +- **WHEN** admin submits `POST /api/api-keys` without `usageSections` +- **THEN** the created key returns `usageSections: "upstream_limits,account_pool_usage"` + +#### Scenario: Update key usage_sections + +- **WHEN** admin submits `PATCH /api/api-keys/{id}` with `{ "usageSections": "account_pool_usage" }` +- **THEN** the key returns `usageSections: "account_pool_usage"` + +#### Scenario: Reject unknown usage_sections values + +- **WHEN** admin submits `POST /api/api-keys` with `usageSections` containing an unsupported value +- **THEN** the system returns 400 + +## MODIFIED Requirements + +### Requirement: API Key creation + +The system SHALL allow the admin to create API keys via `POST /api/api-keys` with a `name` (required), `allowed_models` (optional list), `weekly_token_limit` (optional integer), `expires_at` (optional ISO 8601 datetime), `assigned_account_ids` (optional list), and `usage_sections` (optional comma-separated string, defaults to `"upstream_limits,account_pool_usage"`). The system MUST generate a key in the format `sk-clb-{48 hex chars}`, store only the `sha256` hash in the database, and return the plain key exactly once in the creation response. The system MUST accept timezone-aware ISO 8601 datetimes for `expiresAt`, normalize them to UTC naive for persistence, and return the expiration as UTC in API responses. + +When `assigned_account_ids` is omitted or empty, the created key SHALL remain unscoped and apply to all accounts. When `assigned_account_ids` is provided with one or more valid account IDs, the created key SHALL enable account-assignment scope and persist those assignments. + +#### Scenario: Create unscoped key without assigned accounts + +- **WHEN** admin submits `POST /api/api-keys` without `assignedAccountIds` +- **THEN** the created key returns `accountAssignmentScopeEnabled = false` +- **AND** `assignedAccountIds = []` + +#### Scenario: Create scoped key with assigned accounts + +- **WHEN** admin submits `POST /api/api-keys` with `assignedAccountIds` containing valid account IDs +- **THEN** the created key returns `accountAssignmentScopeEnabled = true` +- **AND** `assignedAccountIds` matches the supplied accounts + +#### Scenario: Reject unknown assigned account IDs on create + +- **WHEN** admin submits `POST /api/api-keys` with an unknown account ID in `assignedAccountIds` +- **THEN** the system returns 400 + +#### Scenario: Create key and show plain key + +- **WHEN** admin submits `POST /api/api-keys` with a valid payload +- **THEN** the response contains the full plain key exactly once and the system never returns the plain key on subsequent reads + +#### Scenario: Create key with timezone-aware expiration + +- **WHEN** admin submits `POST /api/api-keys` with `{ "name": "dev-key", "expiresAt": "2025-12-31T00:00:00Z" }` +- **THEN** the system persists the expiration successfully without PostgreSQL datetime binding errors +- **AND** the response returns `expiresAt` representing the same UTC instant + +### Requirement: API Key update +The system SHALL allow updating key properties via `PATCH /api/api-keys/{id}`. Updatable fields: `name`, `allowedModels`, `weeklyTokenLimit`, `expiresAt`, `isActive`, `usageSections`. The key hash and prefix MUST NOT be modifiable. The system MUST accept timezone-aware ISO 8601 datetimes for `expiresAt` and normalize them to UTC naive before persistence. + +#### Scenario: Update key with timezone-aware expiration +- **WHEN** admin submits `PATCH /api/api-keys/{id}` with `{ "expiresAt": "2025-12-31T00:00:00Z" }` +- **THEN** the system persists the expiration successfully without PostgreSQL datetime binding errors +- **AND** the response returns `expiresAt` representing the same UTC instant + +### Requirement: Frontend API Key management + +The SPA settings page SHALL include an API Key management section with: a toggle for `apiKeyAuthEnabled`, a key list table showing prefix/name/models/limit/usage/expiry/status, a create dialog (name, model selection, assigned-account selection, usage sections multi-select, weekly limit, expiry date), and key actions (edit, delete, regenerate). On key creation, the SPA MUST display the plain key in a copy-able dialog with a warning that it will not be shown again. + +#### Scenario: Create key with optional account scoping + +- **WHEN** an admin opens the create API key dialog +- **THEN** the dialog shows the Assigned accounts picker +- **AND** leaving the picker at `All accounts` creates an unscoped key +- **AND** selecting one or more accounts creates a scoped key for only those accounts + +#### Scenario: Create key with usage sections multi-select + +- **WHEN** an admin opens the create API key dialog +- **THEN** the dialog shows a "Usage sections shown to client" multi-select dropdown below the Assigned accounts picker +- **AND** the dropdown includes "Upstream limits" and "Account pool usage" options +- **AND** by default both options are selected + +#### Scenario: Create key and show plain key + +- **WHEN** admin creates a key via the UI +- **THEN** a dialog shows the full plain key with a copy button and a warning message + +### Requirement: API keys can read their own /v1/usage + +The system SHALL expose `GET /v1/usage` for self-service usage lookup by API-key clients. The route MUST require a valid API key in the `Authorization` header using the Bearer authentication scheme even when `api_key_auth_enabled` is false globally. The response MUST include only data for the authenticated key and MUST return: + +- `request_count` +- `total_tokens` +- `cached_input_tokens` +- `total_cost_usd` +- `limits[]` containing only limits configured on the authenticated API key, with `limit_type`, `limit_window`, `max_value`, `current_value`, `remaining_value`, `model_filter`, `reset_at`, and `source` +- `upstream_limits[]` containing aggregate upstream Codex credit windows when available, with the same fields and `source: "aggregate"`, subject to the key's `usage_sections` containing `upstream_limits` +- `account_pool_usage` containing `primary` and `secondary` float remaining percentages, subject to the key's `usage_sections` containing `account_pool_usage` + +Validation failures MUST use the existing OpenAI error envelope used by `/v1/*` routes. + +#### Scenario: Missing API key is rejected + +- **WHEN** a client calls `GET /v1/usage` without a Bearer token +- **THEN** the system returns 401 in the OpenAI error format + +#### Scenario: Invalid API key is rejected + +- **WHEN** a client calls `GET /v1/usage` with an unknown, expired, or inactive Bearer key +- **THEN** the system returns 401 in the OpenAI error format + +#### Scenario: Key with no usage returns zero totals + +- **WHEN** a valid API key with no request-log usage calls `GET /v1/usage` +- **THEN** the system returns `request_count: 0`, `total_tokens: 0`, `cached_input_tokens: 0`, `total_cost_usd: 0.0` + +#### Scenario: Usage is scoped to the authenticated key + +- **WHEN** multiple API keys have request-log history and one of them calls `GET /v1/usage` +- **THEN** the response includes only the usage totals and limits for that authenticated key + +#### Scenario: Upstream limits are separate from API-key limits + +- **WHEN** an API key with its own limit calls `GET /v1/usage` +- **AND** upstream Codex aggregate usage data exists +- **THEN** `limits[]` contains the API-key limit values +- **AND** `upstream_limits[]` contains the aggregate Codex credit windows + +#### Scenario: Self-usage works while global proxy auth is disabled + +- **WHEN** `api_key_auth_enabled` is false and a client calls `GET /v1/usage` with a valid Bearer key +- **THEN** the system still authenticates that key and returns the self-usage payload diff --git a/openspec/changes/add-account-pool-usage-to-v1-usage/tasks.md b/openspec/changes/add-account-pool-usage-to-v1-usage/tasks.md new file mode 100644 index 000000000..2733cfa9e --- /dev/null +++ b/openspec/changes/add-account-pool-usage-to-v1-usage/tasks.md @@ -0,0 +1,59 @@ +## 1. Database Migration + +- [x] 1.1 Add `usage_sections` TEXT column to `api_keys` table with default `"upstream_limits,account_pool_usage"` +- [x] 1.2 Create Alembic migration file with idempotent column check and batch_alter_table pattern + +## 2. Backend Model + +- [x] 2.1 Add `usage_sections` Mapped[str | None] column to ApiKey model in `app/db/models.py` + +## 3. Backend Dataclasses + +- [x] 3.1 Add `usage_sections: str = "upstream_limits,account_pool_usage"` to `ApiKeyCreateData` +- [x] 3.2 Add `usage_sections: str | None = None` and `usage_sections_set: bool = False` to `ApiKeyUpdateData` +- [x] 3.3 Add `usage_sections: str = "upstream_limits,account_pool_usage"` to `ApiKeyData` + +## 4. Backend API Schemas + +- [x] 4.1 Add `usage_sections: str | None = None` to `ApiKeyCreateRequest` in `app/modules/api_keys/schemas.py` +- [x] 4.2 Add `usage_sections: str | None = None` to `ApiKeyUpdateRequest` in `app/modules/api_keys/schemas.py` +- [x] 4.3 Add `usage_sections: str = "upstream_limits,account_pool_usage"` to `ApiKeyResponse` +- [x] 4.4 Add `AccountPoolUsageResponse` class and `account_pool_usage: AccountPoolUsageResponse | None = None` to `V1UsageResponse` in `app/modules/proxy/schemas.py` + +## 5. Backend Service Layer + +- [x] 5.1 Handle `usage_sections` in `create_key` — pass `payload.usage_sections` to ApiKey row +- [x] 5.2 Handle `usage_sections` in `update_key` — pass to repository update when set +- [x] 5.3 Add `usage_sections` to `_to_api_key_data` and `_to_created_data` converters + +## 6. Backend API Handlers + +- [x] 6.1 Pass `usage_sections` from `ApiKeyCreateRequest` to `ApiKeyCreateData` in `create_api_key` handler +- [x] 6.2 Handle `usage_sections` in `update_api_key` handler — set `usage_sections_set` and pass value +- [x] 6.3 Add `usage_sections` to `_to_response` in `app/modules/api_keys/api.py` +- [x] 6.4 Validate `usage_sections` values (only allow `upstream_limits`, `account_pool_usage`) on create/update + +## 7. /v1/usage Handler Changes + +- [x] 7.1 Compute `account_pool_usage` by reusing `_compute_pooled_credits` or inline computation in `/v1/usage` handler +- [x] 7.2 Parse `usage_sections` from the authenticated API key in the handler +- [x] 7.3 Conditionally include `account_pool_usage` and `upstream_limits` based on parsed sections + +## 8. Frontend Schemas + +- [x] 8.1 Add `usageSections` string field to `ApiKeySchema`, `ApiKeyCreateRequestSchema`, `ApiKeyUpdateRequestSchema` + +## 9. Frontend UI Components + +- [x] 9.1 Add "Usage sections shown to client" multi-select dropdown below "Assigned accounts" in `api-key-create-dialog.tsx` +- [x] 9.2 Add "Usage sections shown to client" multi-select dropdown below "Assigned accounts" in `api-key-edit-dialog.tsx` + +## 10. Tests + +- [x] 10.1 Add unit tests for `usage_sections` in `test_api_keys_service.py` +- [x] 10.2 Add unit tests for `usage_sections` in `test_api_keys_repository.py` +- [x] 10.3 Add integration tests for `/v1/usage` with `account_pool_usage` in `test_v1_usage.py` +- [x] 10.4 Add integration tests for API key create/update with `usage_sections` in `test_api_keys_api.py` +- [x] 10.5 Add frontend schema tests for `usageSections` in `schemas.test.ts` +- [x] 10.6 Add frontend component tests for usage sections dropdown +- [x] 10.7 Verify all existing tests pass diff --git a/openspec/changes/backfill-api-key-limit-current-usage/.openspec.yaml b/openspec/changes/backfill-api-key-limit-current-usage/.openspec.yaml new file mode 100644 index 000000000..12e66c27b --- /dev/null +++ b/openspec/changes/backfill-api-key-limit-current-usage/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-30 diff --git a/openspec/changes/backfill-api-key-limit-current-usage/design.md b/openspec/changes/backfill-api-key-limit-current-usage/design.md new file mode 100644 index 000000000..e0f10d733 --- /dev/null +++ b/openspec/changes/backfill-api-key-limit-current-usage/design.md @@ -0,0 +1,13 @@ +## Design + +New limit rules are initialized in the API key service during update. For each submitted rule without an existing `(limit_type, limit_window, model_filter)` match and without `resetUsage`, the service asks the repository to aggregate usage for the key inside the new rule's active window. + +The lookback window is derived from the same duration used for the new limit: + +- `reset_at = next_limit_reset(now, limit_window)` +- `since = now - limit_window_delta(limit_window)` +- `until = now` + +The repository computes usage from `request_logs` scoped to the API key, the time window, and the optional model filter. Token limits use token columns; cost limits convert each `cost_usd` row to truncated integer microdollars before summing, matching live cost-limit accrual. Credit limits are not derived from request logs and remain zero. + +Existing limit rows keep their current value, preserving the established update behavior. `resetUsage=true` still forces all submitted limit rows to zero. diff --git a/openspec/changes/backfill-api-key-limit-current-usage/proposal.md b/openspec/changes/backfill-api-key-limit-current-usage/proposal.md new file mode 100644 index 000000000..9ce54b7e1 --- /dev/null +++ b/openspec/changes/backfill-api-key-limit-current-usage/proposal.md @@ -0,0 +1,17 @@ +## Why + +When an API key has existing request-log usage and an admin later adds a limit rule, the new limit starts at `0`. This makes the dashboard and enforcement state disagree with actual usage in the current limit window. + +Issue #518 reports the visible case: a key has already used about 10k tokens, then a 100k daily token limit is added, but the limit shows `0/100k` instead of `10k/100k`. + +## What Changes + +- Backfill newly-added API key limit rules from existing request logs in the active window. +- Preserve current values for existing matching limits unless the admin explicitly resets usage. +- Keep `resetUsage=true` as the explicit way to start all submitted limits from zero. + +## Impact + +- Admins can add limits to an already-used API key without losing current-window usage visibility. +- Enforcement immediately accounts for current-window usage on newly-added rules. +- No schema, API shape, or frontend contract changes are required. diff --git a/openspec/changes/backfill-api-key-limit-current-usage/specs/api-keys/spec.md b/openspec/changes/backfill-api-key-limit-current-usage/specs/api-keys/spec.md new file mode 100644 index 000000000..014158150 --- /dev/null +++ b/openspec/changes/backfill-api-key-limit-current-usage/specs/api-keys/spec.md @@ -0,0 +1,77 @@ +## MODIFIED Requirements + +### Requirement: Limit update with usage state preservation +When updating API key limits, the system SHALL preserve existing usage state (`current_value`, `reset_at`) for unchanged limit rules. Limit comparison key is `(limit_type, limit_window, model_filter)`. + +- Matching existing rule: `current_value` and `reset_at` SHALL be preserved; only `max_value` is updated +- New rule (no match) without `resetUsage`: `current_value` SHALL be initialized from the API key's successful existing request-log usage in the new rule's current window, with a fresh `reset_at` +- New rule (no match) with `resetUsage`: `current_value=0` and fresh `reset_at` +- Removed rule (in existing but not in update): row is deleted + +Usage reset SHALL only occur via an explicit action (`resetUsage` field or dedicated endpoint), never as a side-effect of metadata or policy edits. + +#### Scenario: Metadata-only edit preserves usage state + +- **WHEN** an API key PATCH updates only name or is_active +- **AND** `limits` field is not included in the payload +- **THEN** existing `current_value` and `reset_at` are unchanged + +#### Scenario: Same policy re-submission preserves usage state + +- **WHEN** an API key PATCH includes `limits` with identical rules (same type/window/filter/max_value) +- **THEN** existing `current_value` and `reset_at` are unchanged + +#### Scenario: max_value adjustment preserves counters + +- **WHEN** an API key PATCH changes only `max_value` for an existing matched limit rule +- **THEN** that rule's existing `current_value` and `reset_at` are unchanged + +#### Scenario: Adding a new limit backfills current-window usage + +- **WHEN** an API key has successful request-log usage in the active window +- **AND** an API key PATCH adds a limit rule that does not match any existing rule +- **AND** `resetUsage` is not true +- **THEN** the new rule's `current_value` reflects successful existing request-log usage for that rule's current window +- **AND** the new rule receives a fresh `reset_at` + +#### Scenario: resetUsage keeps new limits at zero + +- **WHEN** an API key has request-log usage in the active window +- **AND** an API key PATCH adds a limit rule that does not match any existing rule +- **AND** `resetUsage` is true +- **THEN** the new rule's `current_value` is `0` +- **AND** the new rule receives a fresh `reset_at` + +### Requirement: API Key update +The system SHALL allow updating key properties via `PATCH /api/api-keys/{id}`. Updatable fields: `name`, `allowedModels`, `weeklyTokenLimit`, `expiresAt`, `isActive`. The key hash and prefix MUST NOT be modifiable. The system MUST accept timezone-aware ISO 8601 datetimes for `expiresAt` and normalize them to UTC naive before persistence. + +When a submitted API key limit rule does not match an existing rule by `limit_type`, `limit_window`, and `model_filter`, the system MUST initialize the new rule's `current_value` from the API key's successful existing request-log usage in that rule's current window. If `resetUsage` is true, the system MUST initialize submitted limits with `current_value: 0`. + +#### Scenario: Update key with timezone-aware expiration +- **WHEN** admin submits `PATCH /api/api-keys/{id}` with `{ "expiresAt": "2025-12-31T00:00:00Z" }` +- **THEN** the system persists the expiration successfully without PostgreSQL datetime binding errors +- **AND** the response returns `expiresAt` representing the same UTC instant + +#### Scenario: Update non-existent key + +- **WHEN** admin submits `PATCH /api/api-keys/{id}` with an unknown ID +- **THEN** the system returns 404 + +#### Scenario: Add token limit after current-window usage exists + +- **WHEN** an API key has successful request-log token usage in the active daily window +- **AND** the API key has error or incomplete request-log token usage in the same window +- **AND** admin submits `PATCH /api/api-keys/{id}` adding a daily `total_tokens` limit without `resetUsage` +- **THEN** the new limit's `current_value` includes only the successful current-window token usage + +#### Scenario: Add cost limit after current-window usage exists + +- **WHEN** an API key has successful request-log costs in the active daily window +- **AND** admin submits `PATCH /api/api-keys/{id}` adding a daily `cost_usd` limit without `resetUsage` +- **THEN** the new limit's `current_value` is the sum of each successful request log's `cost_usd` converted to truncated integer microdollars + +#### Scenario: Reset usage when adding a limit + +- **WHEN** an API key has request-log usage in the active window +- **AND** admin submits `PATCH /api/api-keys/{id}` adding a limit with `resetUsage: true` +- **THEN** the new limit's `current_value` is `0` diff --git a/openspec/changes/backfill-api-key-limit-current-usage/tasks.md b/openspec/changes/backfill-api-key-limit-current-usage/tasks.md new file mode 100644 index 000000000..777b6543e --- /dev/null +++ b/openspec/changes/backfill-api-key-limit-current-usage/tasks.md @@ -0,0 +1,17 @@ +## 1. Specs + +- [x] 1.1 Add an API-key update requirement for backfilling newly-added limit rules from current-window request logs. +- [x] 1.2 Validate OpenSpec changes. + +## 2. Implementation + +- [x] 2.1 Add repository support for aggregating API-key usage by limit type, window, and optional model filter. +- [x] 2.2 Initialize newly-added API key limit rules from the aggregate when `resetUsage` is false. +- [x] 2.3 Preserve existing matching limit values and explicit reset behavior. + +## 3. Verification + +- [x] 3.1 Add regression coverage for adding a total-token daily limit after existing usage. +- [x] 3.2 Add regression coverage for model-filtered limit backfill. +- [x] 3.3 Add regression coverage that `resetUsage=true` keeps new limits at zero. +- [x] 3.4 Run targeted tests and static checks. diff --git a/openspec/changes/hide-upstream-quota-from-api-keys/proposal.md b/openspec/changes/hide-upstream-quota-from-api-keys/proposal.md new file mode 100644 index 000000000..ee724a14e --- /dev/null +++ b/openspec/changes/hide-upstream-quota-from-api-keys/proposal.md @@ -0,0 +1,14 @@ +## Problem + +API-key clients can still see the owner's upstream Codex quota state through `/v1/usage` and upstream quota headers on proxy responses. That exposes account-level usage and reset timing to temporary or third-party users who only need the quota on their own API key. + +## Proposed change + +Add a dashboard setting that hides upstream quota details from API-key-authenticated requests while leaving dashboard/admin views unchanged. + +## Scope + +- Add a dashboard setting for quota privacy. +- Omit upstream quota details from `/v1/usage` when the requester authenticated with an API key and the setting is enabled. +- Omit upstream quota headers from proxy responses for API-key-authenticated requests when the setting is enabled. +- Keep existing dashboard and owner-facing quota views unchanged. diff --git a/openspec/changes/hide-upstream-quota-from-api-keys/specs/api-keys/spec.md b/openspec/changes/hide-upstream-quota-from-api-keys/specs/api-keys/spec.md new file mode 100644 index 000000000..71ee4641c --- /dev/null +++ b/openspec/changes/hide-upstream-quota-from-api-keys/specs/api-keys/spec.md @@ -0,0 +1,29 @@ +## MODIFIED Requirements + +### Requirement: API-key quota privacy toggle +The system SHALL provide a `hide_upstream_quota_from_api_keys` boolean in `DashboardSettings`, defaulting to `false`. The dashboard settings API SHALL accept and return this field. + +#### Scenario: Default preserves current behavior + +- **WHEN** the setting is not enabled +- **THEN** API-key-authenticated requests continue to receive upstream quota details exactly as they do today + +#### Scenario: API-key usage response hides upstream limits + +- **GIVEN** `hide_upstream_quota_from_api_keys` is `true` +- **WHEN** an API-key-authenticated client calls `GET /v1/usage` +- **THEN** the response SHALL omit upstream quota entries +- **AND** the response SHALL still include the API key's own quota data + +#### Scenario: Proxy responses hide upstream quota headers + +- **GIVEN** `hide_upstream_quota_from_api_keys` is `true` +- **WHEN** an API-key-authenticated client calls a protected proxy route that emits quota headers +- **THEN** the response SHALL NOT include `x-codex-primary-*`, `x-codex-secondary-*`, or `x-codex-credits-*` headers +- **AND** internal routing headers such as `x-codex-turn-state` SHALL remain unchanged + +#### Scenario: Dashboard views stay visible + +- **GIVEN** `hide_upstream_quota_from_api_keys` is `true` +- **WHEN** an owner views dashboard settings or owner-facing usage data without API-key authentication +- **THEN** upstream quota details SHALL remain visible diff --git a/openspec/changes/hide-upstream-quota-from-api-keys/specs/frontend-architecture/spec.md b/openspec/changes/hide-upstream-quota-from-api-keys/specs/frontend-architecture/spec.md new file mode 100644 index 000000000..bf439225f --- /dev/null +++ b/openspec/changes/hide-upstream-quota-from-api-keys/specs/frontend-architecture/spec.md @@ -0,0 +1,14 @@ +## MODIFIED Requirements + +### Requirement: API keys settings expose quota privacy toggle +The Settings page SHALL include a toggle in the API Keys section that controls `hide_upstream_quota_from_api_keys`. + +#### Scenario: Toggle is visible with the API keys controls + +- **WHEN** the Settings page renders the API Keys section +- **THEN** the quota privacy toggle SHALL be shown alongside the API key auth toggle + +#### Scenario: Toggle persists through settings save + +- **WHEN** the user changes the quota privacy toggle +- **THEN** the settings update request SHALL include `hideUpstreamQuotaFromApiKeys` diff --git a/openspec/changes/hide-upstream-quota-from-api-keys/tasks.md b/openspec/changes/hide-upstream-quota-from-api-keys/tasks.md new file mode 100644 index 000000000..cce195198 --- /dev/null +++ b/openspec/changes/hide-upstream-quota-from-api-keys/tasks.md @@ -0,0 +1,5 @@ +## Tasks + +- [x] Add the dashboard setting to backend models, settings API, and frontend settings forms. +- [x] Hide upstream quota details from API-key-authenticated `/v1/usage` and proxy responses when the setting is enabled. +- [x] Add regression tests for settings persistence, `/v1/usage`, and proxy header suppression. diff --git a/openspec/specs/api-keys/spec.md b/openspec/specs/api-keys/spec.md index 080013c89..f5cb25358 100644 --- a/openspec/specs/api-keys/spec.md +++ b/openspec/specs/api-keys/spec.md @@ -377,7 +377,7 @@ The service contract SHALL be typed explicitly: `enforce_limits_for_request(key_ When updating API key limits, the system SHALL preserve existing usage state (`current_value`, `reset_at`) for unchanged limit rules. Limit comparison key is `(limit_type, limit_window, model_filter)`. - Matching existing rule: `current_value` and `reset_at` SHALL be preserved; only `max_value` is updated -- New rule (no match): `current_value=0` and fresh `reset_at` +- New rule (no match): when `resetUsage` is false, `current_value` is initialized from successful request-log usage in the new rule's current window; when `resetUsage` is true, `current_value=0`; always with a fresh `reset_at` - Removed rule (in existing but not in update): row is deleted Usage reset SHALL only occur via an explicit action (`reset_usage` field or dedicated endpoint), never as a side-effect of metadata or policy edits. diff --git a/tests/integration/test_api_keys_api.py b/tests/integration/test_api_keys_api.py index 4aab900e9..5c03470b1 100644 --- a/tests/integration/test_api_keys_api.py +++ b/tests/integration/test_api_keys_api.py @@ -148,6 +148,26 @@ async def test_api_keys_crud_and_regenerate(async_client): assert listed_after_delete.json() == [] +@pytest.mark.asyncio +async def test_create_api_key_preserves_empty_usage_sections(async_client): + create = await async_client.post( + "/api/api-keys/", + json={ + "name": "hidden-usage-key", + "allowedModels": [], + "usageSections": "", + }, + ) + + assert create.status_code == 200 + payload = create.json() + assert payload["usageSections"] == "" + + listed = await async_client.get("/api/api-keys/") + assert listed.status_code == 200 + assert listed.json()[0]["usageSections"] == "" + + @pytest.mark.asyncio async def test_api_key_update_persists_assigned_account_ids(async_client): first_account_id = await _import_account(async_client, "acc-assigned-a", "assigned-a@example.com") @@ -280,8 +300,19 @@ async def test_deleted_assigned_accounts_do_not_fall_back_to_other_accounts(asyn listed = await async_client.get("/api/api-keys/") assert listed.status_code == 200 - assert listed.json()[0]["assignedAccountIds"] == [] - assert listed.json()[0]["accountAssignmentScopeEnabled"] is True + listed_key = listed.json()[0] + assert listed_key["assignedAccountIds"] == [] + assert listed_key["accountAssignmentScopeEnabled"] is True + assert listed_key["pooledCapacityCreditsPrimary"] == 0.0 + assert listed_key["pooledRemainingPercentPrimary"] is None + assert listed_key["pooledRemainingPercentSecondary"] is None + + usage = await async_client.get("/v1/usage", headers={"Authorization": f"Bearer {key}"}) + assert usage.status_code == 200 + assert usage.json()["account_pool_usage"] == { + "primary": None, + "secondary": None, + } called = False diff --git a/tests/integration/test_api_keys_trends_api.py b/tests/integration/test_api_keys_trends_api.py index fde3d19c8..82880c944 100644 --- a/tests/integration/test_api_keys_trends_api.py +++ b/tests/integration/test_api_keys_trends_api.py @@ -18,6 +18,12 @@ async def _create_api_key(async_client, *, name: str) -> str: return response.json()["id"] +def _only_limit(payload: dict) -> dict: + limits = payload["limits"] + assert len(limits) == 1 + return limits[0] + + async def _insert_request_logs(*rows: RequestLog) -> None: async with SessionLocal() as session: session.add_all(rows) @@ -43,6 +49,271 @@ async def test_api_key_detail_endpoints_return_404_for_missing_key(async_client, assert response.status_code == 404 +@pytest.mark.asyncio +async def test_adding_limit_backfills_current_window_usage(async_client, monkeypatch: pytest.MonkeyPatch): + key_id = await _create_api_key(async_client, name="limit-backfill-key") + other_key_id = await _create_api_key(async_client, name="limit-backfill-other-key") + now = datetime(2026, 4, 30, 12, 0, 0) + monkeypatch.setattr("app.modules.api_keys.service.utcnow", lambda: now) + + await _insert_request_logs( + RequestLog( + api_key_id=key_id, + request_id="req-limit-backfill-a", + requested_at=now - timedelta(hours=2), + model="gpt-5.1", + status="success", + input_tokens=8_000, + output_tokens=1_500, + cached_input_tokens=200, + cost_usd=0.25, + ), + RequestLog( + api_key_id=key_id, + request_id="req-limit-backfill-b", + requested_at=now - timedelta(minutes=30), + model="gpt-5.1", + status="success", + input_tokens=300, + output_tokens=None, + reasoning_tokens=200, + cached_input_tokens=20, + cost_usd=0.03, + ), + RequestLog( + api_key_id=key_id, + request_id="req-limit-backfill-error", + requested_at=now - timedelta(minutes=20), + model="gpt-5.1", + status="error", + error_code="response.incomplete", + input_tokens=40_000, + output_tokens=10_000, + cached_input_tokens=0, + cost_usd=5.0, + ), + RequestLog( + api_key_id=key_id, + request_id="req-limit-backfill-old", + requested_at=now - timedelta(days=2), + model="gpt-5.1", + status="success", + input_tokens=10_000, + output_tokens=10_000, + cached_input_tokens=0, + cost_usd=2.0, + ), + RequestLog( + api_key_id=other_key_id, + request_id="req-limit-backfill-other-key", + requested_at=now - timedelta(minutes=10), + model="gpt-5.1", + status="success", + input_tokens=50_000, + output_tokens=50_000, + cached_input_tokens=0, + cost_usd=10.0, + ), + ) + + response = await async_client.patch( + f"/api/api-keys/{key_id}", + json={ + "limits": [ + { + "limitType": "total_tokens", + "limitWindow": "daily", + "maxValue": 100_000, + } + ] + }, + ) + + assert response.status_code == 200 + limit = _only_limit(response.json()) + assert limit["currentValue"] == 10_000 + assert limit["maxValue"] == 100_000 + + +@pytest.mark.asyncio +async def test_adding_model_scoped_limit_backfills_only_matching_model(async_client, monkeypatch: pytest.MonkeyPatch): + key_id = await _create_api_key(async_client, name="limit-model-backfill-key") + now = datetime(2026, 4, 30, 12, 0, 0) + monkeypatch.setattr("app.modules.api_keys.service.utcnow", lambda: now) + + await _insert_request_logs( + RequestLog( + api_key_id=key_id, + request_id="req-limit-model-match", + requested_at=now - timedelta(hours=1), + model="gpt-5.1", + status="success", + input_tokens=400, + output_tokens=100, + cached_input_tokens=0, + cost_usd=0.04, + ), + RequestLog( + api_key_id=key_id, + request_id="req-limit-model-other", + requested_at=now - timedelta(minutes=45), + model="gpt-5.4", + status="success", + input_tokens=9_000, + output_tokens=1_000, + cached_input_tokens=0, + cost_usd=0.5, + ), + ) + + response = await async_client.patch( + f"/api/api-keys/{key_id}", + json={ + "limits": [ + { + "limitType": "total_tokens", + "limitWindow": "daily", + "maxValue": 50_000, + "modelFilter": "gpt-5.1", + } + ] + }, + ) + + assert response.status_code == 200 + limit = _only_limit(response.json()) + assert limit["currentValue"] == 500 + assert limit["modelFilter"] == "gpt-5.1" + + +@pytest.mark.asyncio +async def test_adding_cost_limit_uses_per_request_truncation( + async_client, + monkeypatch: pytest.MonkeyPatch, +): + key_id = await _create_api_key(async_client, name="cost-truncation-zero-key") + now = datetime(2026, 4, 30, 12, 0, 0) + monkeypatch.setattr("app.modules.api_keys.service.utcnow", lambda: now) + + await _insert_request_logs( + *[ + RequestLog( + api_key_id=key_id, + request_id=f"req-cost-truncation-zero-{index}", + requested_at=now - timedelta(minutes=index), + model="gpt-5.1", + status="success", + input_tokens=1, + output_tokens=1, + cached_input_tokens=0, + cost_usd=0.0000005, + ) + for index in range(1, 101) + ] + ) + + response = await async_client.patch( + f"/api/api-keys/{key_id}", + json={ + "limits": [ + { + "limitType": "cost_usd", + "limitWindow": "daily", + "maxValue": 1_000_000, + } + ] + }, + ) + + assert response.status_code == 200 + limit = _only_limit(response.json()) + assert limit["currentValue"] == 0 + assert limit["maxValue"] == 1_000_000 + + +@pytest.mark.asyncio +async def test_adding_cost_limit_backfills_truncated_microdollars( + async_client, + monkeypatch: pytest.MonkeyPatch, +): + key_id = await _create_api_key(async_client, name="cost-truncation-total-key") + now = datetime(2026, 4, 30, 12, 0, 0) + monkeypatch.setattr("app.modules.api_keys.service.utcnow", lambda: now) + + await _insert_request_logs( + *[ + RequestLog( + api_key_id=key_id, + request_id=f"req-cost-truncation-total-{index}", + requested_at=now - timedelta(minutes=index), + model="gpt-5.1", + status="success", + input_tokens=1, + output_tokens=1, + cached_input_tokens=0, + cost_usd=0.0000015, + ) + for index in range(1, 101) + ] + ) + + response = await async_client.patch( + f"/api/api-keys/{key_id}", + json={ + "limits": [ + { + "limitType": "cost_usd", + "limitWindow": "daily", + "maxValue": 1_000_000, + } + ] + }, + ) + + assert response.status_code == 200 + limit = _only_limit(response.json()) + assert limit["currentValue"] == 100 + assert limit["maxValue"] == 1_000_000 + + +@pytest.mark.asyncio +async def test_adding_limit_with_reset_usage_keeps_current_value_zero(async_client, monkeypatch: pytest.MonkeyPatch): + key_id = await _create_api_key(async_client, name="limit-reset-backfill-key") + now = datetime(2026, 4, 30, 12, 0, 0) + monkeypatch.setattr("app.modules.api_keys.service.utcnow", lambda: now) + + await _insert_request_logs( + RequestLog( + api_key_id=key_id, + request_id="req-limit-reset-existing", + requested_at=now - timedelta(hours=2), + model="gpt-5.1", + status="success", + input_tokens=2_000, + output_tokens=500, + cached_input_tokens=0, + cost_usd=0.1, + ) + ) + + response = await async_client.patch( + f"/api/api-keys/{key_id}", + json={ + "resetUsage": True, + "limits": [ + { + "limitType": "total_tokens", + "limitWindow": "daily", + "maxValue": 100_000, + } + ], + }, + ) + + assert response.status_code == 200 + assert _only_limit(response.json())["currentValue"] == 0 + + @pytest.mark.asyncio async def test_trends_returns_hourly_zero_filled_points_with_bucket_aggregation( async_client, diff --git a/tests/integration/test_proxy_compact.py b/tests/integration/test_proxy_compact.py index ae47b8c16..7025402f7 100644 --- a/tests/integration/test_proxy_compact.py +++ b/tests/integration/test_proxy_compact.py @@ -18,6 +18,8 @@ from app.core.utils.time import utcnow from app.db.models import Account, AccountStatus from app.db.session import SessionLocal +from app.modules.api_keys.repository import ApiKeysRepository +from app.modules.api_keys.service import ApiKeyCreateData, ApiKeysService from app.modules.proxy.rate_limit_cache import get_rate_limit_headers_cache from app.modules.usage.repository import AdditionalUsageRepository, UsageRepository @@ -93,6 +95,23 @@ def _session_call_json(session: _JsonSession) -> dict[str, object]: return cast(dict[str, object], session.calls[0]["json"]) +async def _create_api_key( + *, + name: str, + assigned_account_ids: list[str] | None = None, +) -> tuple[str, str]: + async with SessionLocal() as session: + service = ApiKeysService(ApiKeysRepository(session)) + created = await service.create_key( + ApiKeyCreateData( + name=name, + allowed_models=None, + assigned_account_ids=assigned_account_ids, + ) + ) + return created.id, created.key + + @pytest.mark.asyncio async def test_proxy_compact_no_accounts(async_client): payload = {"model": "gpt-5.1", "instructions": "hi", "input": []} @@ -271,6 +290,65 @@ async def fake_compact(payload, headers, access_token, account_id): assert response.headers.get("x-codex-credits-balance") == "8.75" +@pytest.mark.asyncio +async def test_proxy_compact_hides_upstream_quota_for_api_key_clients_when_setting_enabled(async_client, monkeypatch): + email = "compact-hidden@example.com" + raw_account_id = "acc_compact_hidden" + auth_json = _make_auth_json(raw_account_id, email) + files = {"auth_json": ("auth.json", json.dumps(auth_json), "application/json")} + response = await async_client.post("/api/accounts/import", files=files) + assert response.status_code == 200 + + expected_account_id = generate_unique_account_id(raw_account_id, email) + now = utcnow() + now_epoch = int(now.replace(tzinfo=timezone.utc).timestamp()) + + async with SessionLocal() as session: + usage_repo = UsageRepository(session) + await usage_repo.add_entry( + account_id=expected_account_id, + used_percent=25.0, + window="primary", + reset_at=now_epoch + 300, + window_minutes=5, + recorded_at=now, + credits_has=True, + credits_unlimited=False, + credits_balance=12.5, + ) + + _, key = await _create_api_key(name="compact-hidden", assigned_account_ids=[expected_account_id]) + + async def fake_compact(payload, headers, access_token, account_id): + return OpenAIResponsePayload.model_validate({"output": []}) + + monkeypatch.setattr(proxy_module, "core_compact_responses", fake_compact) + + settings = await async_client.put( + "/api/settings", + json={ + "apiKeyAuthEnabled": True, + "hideUpstreamQuotaFromApiKeys": True, + }, + ) + assert settings.status_code == 200 + + payload = {"model": "gpt-5.1", "instructions": "hi", "input": []} + response = await async_client.post( + "/backend-api/codex/responses/compact", + json=payload, + headers={"Authorization": f"Bearer {key}"}, + ) + assert response.status_code == 200 + assert response.json()["output"] == [] + assert response.headers.get("x-codex-primary-used-percent") is None + assert response.headers.get("x-codex-primary-window-minutes") is None + assert response.headers.get("x-codex-primary-reset-at") is None + assert response.headers.get("x-codex-credits-has-credits") is None + assert response.headers.get("x-codex-credits-unlimited") is None + assert response.headers.get("x-codex-credits-balance") is None + + @pytest.mark.asyncio async def test_proxy_compact_success_preserves_compaction_payload(async_client, monkeypatch): email = "compact-pass-through@example.com" diff --git a/tests/integration/test_settings_api.py b/tests/integration/test_settings_api.py index 6f1b18a89..584c32b4d 100644 --- a/tests/integration/test_settings_api.py +++ b/tests/integration/test_settings_api.py @@ -66,6 +66,7 @@ async def test_settings_api_get_and_update(async_client): assert payload["totpRequiredOnLogin"] is False assert payload["totpConfigured"] is False assert payload["apiKeyAuthEnabled"] is False + assert payload["hideUpstreamQuotaFromApiKeys"] is False assert payload["limitWarmupEnabled"] is False assert payload["limitWarmupWindows"] == "both" assert payload["limitWarmupModel"] == "auto" @@ -98,6 +99,7 @@ async def test_settings_api_get_and_update(async_client): "importWithoutOverwrite": False, "totpRequiredOnLogin": False, "apiKeyAuthEnabled": True, + "hideUpstreamQuotaFromApiKeys": True, "limitWarmupEnabled": True, "limitWarmupWindows": "primary", "limitWarmupModel": "gpt-5.1-codex-mini", @@ -131,6 +133,7 @@ async def test_settings_api_get_and_update(async_client): assert updated["totpRequiredOnLogin"] is False assert updated["totpConfigured"] is False assert updated["apiKeyAuthEnabled"] is True + assert updated["hideUpstreamQuotaFromApiKeys"] is True assert updated["limitWarmupEnabled"] is True assert updated["limitWarmupWindows"] == "primary" assert updated["limitWarmupModel"] == "gpt-5.1-codex-mini" @@ -164,6 +167,7 @@ async def test_settings_api_get_and_update(async_client): assert payload["totpRequiredOnLogin"] is False assert payload["totpConfigured"] is False assert payload["apiKeyAuthEnabled"] is True + assert payload["hideUpstreamQuotaFromApiKeys"] is True assert payload["limitWarmupEnabled"] is True assert payload["limitWarmupWindows"] == "primary" assert payload["limitWarmupModel"] == "gpt-5.1-codex-mini" @@ -351,6 +355,7 @@ async def test_settings_api_allows_partial_updates(async_client): assert updated["routingStrategy"] == original["routingStrategy"] assert updated["upstreamProxyRoutingEnabled"] == original["upstreamProxyRoutingEnabled"] assert updated["upstreamProxyDefaultPoolId"] == original["upstreamProxyDefaultPoolId"] + assert updated["hideUpstreamQuotaFromApiKeys"] == original["hideUpstreamQuotaFromApiKeys"] @pytest.mark.asyncio diff --git a/tests/integration/test_settings_audit_changed_fields.py b/tests/integration/test_settings_audit_changed_fields.py index 038eedd93..a696399c1 100644 --- a/tests/integration/test_settings_audit_changed_fields.py +++ b/tests/integration/test_settings_audit_changed_fields.py @@ -65,6 +65,7 @@ def _default_put_body() -> dict[str, Any]: ("importWithoutOverwrite", False, "import_without_overwrite"), ("apiKeyAuthEnabled", True, "api_key_auth_enabled"), ("weeklyPaceWorkingDays", "0,1,2,3,4", "weekly_pace_working_days"), + ("hideUpstreamQuotaFromApiKeys", True, "hide_upstream_quota_from_api_keys"), ], ) @pytest.mark.asyncio diff --git a/tests/integration/test_v1_usage.py b/tests/integration/test_v1_usage.py index 70b0ab926..b454473cd 100644 --- a/tests/integration/test_v1_usage.py +++ b/tests/integration/test_v1_usage.py @@ -18,6 +18,7 @@ async def _create_api_key( *, name: str, limits: list[LimitRuleInput] | None = None, + usage_sections: str = "upstream_limits,account_pool_usage", ) -> tuple[str, str]: async with SessionLocal() as session: service = ApiKeysService(ApiKeysRepository(session)) @@ -25,6 +26,7 @@ async def _create_api_key( ApiKeyCreateData( name=name, allowed_models=None, + usage_sections=usage_sections, limits=limits or [], ) ) @@ -290,6 +292,31 @@ async def test_v1_usage_returns_zero_usage_for_key_without_logs(async_client): "total_cost_usd": 0.0, "limits": [], "upstream_limits": [], + "account_pool_usage": { + "primary": None, + "secondary": None, + }, + } + + +@pytest.mark.asyncio +async def test_v1_usage_omits_disabled_account_pool_usage_section(async_client): + _, plain_key = await _create_api_key( + name="no-account-pool-usage", + usage_sections="upstream_limits", + ) + + response = await async_client.get("/v1/usage", headers={"Authorization": f"Bearer {plain_key}"}) + + assert response.status_code == 200 + assert response.json() == { + "request_count": 0, + "total_tokens": 0, + "cached_input_tokens": 0, + "total_cost_usd": 0.0, + "limits": [], + "upstream_limits": [], + "account_pool_usage": None, } @@ -466,6 +493,27 @@ async def test_v1_usage_returns_aggregate_credit_limits_when_upstream_usage_exis assert payload["upstream_limits"][1]["reset_at"].endswith("Z") +@pytest.mark.asyncio +async def test_v1_usage_hides_upstream_limits_for_api_key_clients_when_setting_enabled(async_client): + _, plain_key = await _create_api_key(name="hidden-upstream-aggregate") + now = utcnow() + await _seed_upstream_usage(now=now) + + settings = await async_client.put( + "/api/settings", + json={"hideUpstreamQuotaFromApiKeys": True}, + ) + assert settings.status_code == 200 + + response = await async_client.get("/v1/usage", headers={"Authorization": f"Bearer {plain_key}"}) + + assert response.status_code == 200 + payload = response.json() + assert payload["limits"] == [] + assert payload["upstream_limits"] == [] + assert payload["account_pool_usage"] is None + + @pytest.mark.asyncio async def test_v1_usage_returns_api_key_and_upstream_credit_limits_separately(async_client): key_id, plain_key = await _create_api_key( diff --git a/tests/unit/test_api_keys_repository.py b/tests/unit/test_api_keys_repository.py index 880bc4d96..91f6567bc 100644 --- a/tests/unit/test_api_keys_repository.py +++ b/tests/unit/test_api_keys_repository.py @@ -5,8 +5,9 @@ from unittest.mock import AsyncMock import pytest +from sqlalchemy.dialects.postgresql import dialect as postgresql_dialect -from app.db.models import LimitWindow +from app.db.models import LimitType, LimitWindow from app.modules.api_keys.repository import ApiKeyAccountCost, ApiKeysRepository pytestmark = pytest.mark.unit @@ -246,3 +247,41 @@ async def test_returns_totals_and_account_costs_from_single_execute(self) -> Non ), ] session.execute.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_cost_limit_backfill_uses_bigint_cast_for_microdollars() -> None: + session = AsyncMock() + repo = ApiKeysRepository(session) + since = datetime(2026, 5, 1, 0, 0, 0) + until = datetime(2026, 5, 8, 0, 0, 0) + int32_max = 2_147_483_647 + overflow_total = int32_max + 100 + executed_sql: list[str] = [] + + async def _execute(statement): + executed_sql.append( + str( + statement.compile( + dialect=postgresql_dialect(), + compile_kwargs={"literal_binds": True}, + ) + ) + ) + return SimpleNamespace(scalar_one=lambda: overflow_total) + + session.execute.side_effect = _execute + + value = await repo.get_limit_usage_value( + "key_1", + limit_type=LimitType.COST_USD, + since=since, + until=until, + model_filter=None, + ) + + assert value == overflow_total + assert value > int32_max + assert "BIGINT" in executed_sql[0] + assert "sum(CAST(floor(coalesce(request_logs.cost_usd, 0.0) * 1000000) AS BIGINT))" in executed_sql[0] + assert "request_logs.request_kind NOT IN ('warmup', 'limit_warmup')" in executed_sql[0] diff --git a/tests/unit/test_api_keys_service.py b/tests/unit/test_api_keys_service.py index 246b4e2b1..86156aac6 100644 --- a/tests/unit/test_api_keys_service.py +++ b/tests/unit/test_api_keys_service.py @@ -27,9 +27,11 @@ ApiKeysRepositoryProtocol, ApiKeysService, ApiKeyUpdateData, + ApiKeyValidationError, LimitRuleInput, _build_api_key_trends, _is_sqlite_database_locked, + _normalize_usage_sections, ) from app.modules.usage.repository import UsageRepository @@ -104,6 +106,18 @@ async def list_all_accounts(self) -> list[Account]: async def list_usage_summary_by_key(self) -> dict[str, ApiKeyUsageSummary]: return {} + async def get_limit_usage_value( + self, + key_id: str, + *, + limit_type: LimitType, + since: datetime, + until: datetime, + model_filter: str | None, + ) -> int: + del key_id, limit_type, since, until, model_filter + return 0 + async def update( self, key_id: str, @@ -115,6 +129,7 @@ async def update( enforced_reasoning_effort: str | None | _Unset = _UNSET, enforced_service_tier: str | None | _Unset = _UNSET, traffic_class: str | _Unset = _UNSET, + usage_sections: str | _Unset = _UNSET, account_assignment_scope_enabled: bool | _Unset = _UNSET, expires_at: datetime | None | _Unset = _UNSET, is_active: bool | _Unset = _UNSET, @@ -134,6 +149,7 @@ async def update( "enforced_reasoning_effort": enforced_reasoning_effort, "enforced_service_tier": enforced_service_tier, "traffic_class": traffic_class, + "usage_sections": usage_sections, "account_assignment_scope_enabled": account_assignment_scope_enabled, "expires_at": expires_at, "is_active": is_active, @@ -969,6 +985,51 @@ async def test_list_keys_falls_back_to_all_accounts_when_key_is_unassigned() -> assert usage_repo.calls == [("primary", None), ("secondary", None)] +@pytest.mark.asyncio +async def test_list_keys_keeps_scoped_empty_pool_when_assignments_are_missing() -> None: + repo = _FakeApiKeysRepository() + repo._accounts = { + "acc-a": Account( + id="acc-a", + chatgpt_account_id=None, + email="a@example.com", + plan_type="plus", + access_token_encrypted=b"access-a", + refresh_token_encrypted=b"refresh-a", + id_token_encrypted=b"id-a", + last_refresh=utcnow(), + status=AccountStatus.ACTIVE, + ), + } + usage_repo = _FakeUsageRepository( + primary={"acc-a": _make_usage_history("acc-a", used_percent=25.0)}, + secondary={"acc-a": _make_usage_history("acc-a", used_percent=10.0)}, + ) + service = ApiKeysService(repo, usage_repo) + + created = await service.create_key( + ApiKeyCreateData( + name="scoped-empty", + allowed_models=None, + expires_at=None, + assigned_account_ids=["acc-a"], + ) + ) + repo._account_assignments[created.id] = [] + + account_calls_before_list = len(repo.list_accounts_by_ids_calls) + listed = await service.list_keys() + account_calls_after_list = repo.list_accounts_by_ids_calls[account_calls_before_list:] + + assert listed[0].pooled_credits is not None + assert listed[0].pooled_credits.remaining_percent_primary is None + assert listed[0].pooled_credits.remaining_percent_secondary is None + assert listed[0].pooled_credits.capacity_credits_primary == 0.0 + assert repo.list_all_accounts_calls == 0 + assert account_calls_after_list == [] + assert usage_repo.calls == [] + + @pytest.mark.asyncio async def test_update_key_persists_apply_to_codex_model_flag() -> None: repo = _FakeApiKeysRepository() @@ -2012,3 +2073,111 @@ def test_build_api_key_trends_keeps_aligned_windows_at_168_buckets() -> None: assert trends.cost[-1].t == newest_bucket assert sum(point.v for point in trends.tokens) == pytest.approx(12.0) assert sum(point.v for point in trends.cost) == pytest.approx(0.3) + + +class TestNormalizeUsageSections: + def test_none_returns_default(self) -> None: + assert _normalize_usage_sections(None) == "upstream_limits,account_pool_usage" + + def test_empty_string_returns_empty(self) -> None: + assert _normalize_usage_sections("") == "" + + def test_whitespace_only_returns_empty(self) -> None: + assert _normalize_usage_sections(" ") == "" + + def test_single_valid_section(self) -> None: + assert _normalize_usage_sections("upstream_limits") == "upstream_limits" + + def test_both_sections(self) -> None: + result = _normalize_usage_sections("upstream_limits,account_pool_usage") + sections = set(result.split(",")) + assert sections == {"upstream_limits", "account_pool_usage"} + + def test_spaces_around_commas(self) -> None: + result = _normalize_usage_sections(" upstream_limits , account_pool_usage ") + sections = set(result.split(",")) + assert sections == {"upstream_limits", "account_pool_usage"} + + def test_rejects_invalid_section(self) -> None: + with pytest.raises(ApiKeyValidationError, match="Invalid usage sections"): + _normalize_usage_sections("upstream_limits,invalid_section") + + def test_rejects_completely_invalid(self) -> None: + with pytest.raises(ApiKeyValidationError, match="Invalid usage sections"): + _normalize_usage_sections("garbage") + + +async def test_create_key_stores_usage_sections() -> None: + repo = _FakeApiKeysRepository() + service = ApiKeysService(repo) + created = await service.create_key( + ApiKeyCreateData( + name="my-key", + allowed_models=None, + usage_sections="upstream_limits", + ) + ) + assert created.usage_sections == "upstream_limits" + + +async def test_create_key_stores_empty_usage_sections() -> None: + repo = _FakeApiKeysRepository() + service = ApiKeysService(repo) + created = await service.create_key( + ApiKeyCreateData( + name="my-key", + allowed_models=None, + usage_sections="", + ) + ) + assert created.usage_sections == "" + repo = _FakeApiKeysRepository() + service = ApiKeysService(repo) + created = await service.create_key( + ApiKeyCreateData( + name="my-key", + allowed_models=None, + ) + ) + assert created.usage_sections == "upstream_limits,account_pool_usage" + + +async def test_update_key_usage_sections() -> None: + repo = _FakeApiKeysRepository() + service = ApiKeysService(repo) + now = utcnow() + repo.rows["key-1"] = ApiKey( + id="key-1", + name="test", + key_hash="abc", + key_prefix="sk-", + allowed_models=None, + apply_to_codex_model=False, + account_assignment_scope_enabled=False, + usage_sections="upstream_limits,account_pool_usage", + expires_at=None, + is_active=True, + created_at=now, + last_used_at=None, + ) + updated = await service.update_key( + "key-1", + ApiKeyUpdateData( + usage_sections="account_pool_usage", + usage_sections_set=True, + ), + ) + assert updated.usage_sections == "account_pool_usage" + + +async def test_create_key_rejects_invalid_usage_sections() -> None: + repo = _FakeApiKeysRepository() + service = ApiKeysService(repo) + with pytest.raises(ApiKeyValidationError, match="Invalid usage sections"): + await service.create_key( + ApiKeyCreateData( + name="my-key", + allowed_models=None, + usage_sections="bad_section", + ) + ) diff --git a/tests/unit/test_api_keys_validation_error.py b/tests/unit/test_api_keys_validation_error.py index 90a4ca303..8c4df6c1c 100644 --- a/tests/unit/test_api_keys_validation_error.py +++ b/tests/unit/test_api_keys_validation_error.py @@ -60,7 +60,8 @@ def test_limit_input_to_row_raises_typed_validation_error_for_credits_with_model assert "credits" in str(info.value).lower() -def test_build_limit_rows_for_update_raises_typed_validation_error_on_duplicate_rules() -> None: +@pytest.mark.asyncio +async def test_build_limit_rows_for_update_raises_typed_validation_error_on_duplicate_rules() -> None: rule = LimitRuleInput( limit_type=LimitType.TOTAL_TOKENS.value, limit_window=LimitWindow.DAILY.value, @@ -68,7 +69,7 @@ def test_build_limit_rows_for_update_raises_typed_validation_error_on_duplicate_ model_filter=None, ) with pytest.raises(ApiKeyValidationError) as info: - _build_limit_rows_for_update( + await _build_limit_rows_for_update( key_id="key-1", now=__import__("datetime").datetime(2026, 5, 15), submitted_limits=[rule, rule], diff --git a/tests/unit/test_pooled_credits.py b/tests/unit/test_pooled_credits.py index 309102b5c..6fe5f6553 100644 --- a/tests/unit/test_pooled_credits.py +++ b/tests/unit/test_pooled_credits.py @@ -124,6 +124,28 @@ def test_no_accounts_returns_null_percents(self) -> None: assert result.remaining_percent_secondary is None assert result.capacity_credits_primary == 0.0 + def test_scoped_empty_assignment_list_returns_empty_pool(self) -> None: + acc_a = _make_account("acc-a", "plus") + acc_b = _make_account("acc-b", "pro") + + result = _compute_pooled_credits( + assigned_account_ids=[], + all_accounts=[acc_a, acc_b], + primary_usage={ + "acc-a": _make_usage("acc-a", "primary", 20.0), + "acc-b": _make_usage("acc-b", "primary", 50.0), + }, + secondary_usage={ + "acc-a": _make_usage("acc-a", "secondary", 10.0, window_minutes=10080), + "acc-b": _make_usage("acc-b", "secondary", 30.0, window_minutes=10080), + }, + account_assignment_scope_enabled=True, + ) + + assert result.remaining_percent_primary is None + assert result.remaining_percent_secondary is None + assert result.capacity_credits_primary == 0.0 + def test_assigned_accounts_without_usage_history_still_count_capacity(self) -> None: acc_a = _make_account("acc-a", "plus") diff --git a/tests/unit/test_proxy_api_usage.py b/tests/unit/test_proxy_api_usage.py new file mode 100644 index 000000000..ce9b70911 --- /dev/null +++ b/tests/unit/test_proxy_api_usage.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from sqlalchemy.ext.asyncio import AsyncSession + +import app.modules.proxy.api as proxy_api_module +from app.db.models import Account, AccountStatus + +pytestmark = pytest.mark.unit + + +def _make_account(account_id: str, plan_type: str = "plus") -> Account: + return Account( + id=account_id, + chatgpt_account_id=f"workspace-{account_id}", + email=f"{account_id}@example.com", + plan_type=plan_type, + access_token_encrypted=b"a", + refresh_token_encrypted=b"b", + id_token_encrypted=b"c", + last_refresh=datetime.now(tz=timezone.utc), + status=AccountStatus.ACTIVE, + deactivation_reason=None, + ) + + +@pytest.mark.asyncio +async def test_build_account_pool_usage_limits_latest_queries_to_assigned_accounts(monkeypatch) -> None: + assigned_account_ids = ["acc-a", "acc-b"] + latest_calls: list[tuple[str, list[str] | None]] = [] + session = AsyncSession.__new__(AsyncSession) + + class FakeApiKeysRepository: + def __init__(self, _session: object) -> None: + pass + + async def list_accounts_by_ids(self, account_ids: list[str]) -> list[Account]: + assert account_ids == assigned_account_ids + return [_make_account(account_id) for account_id in account_ids] + + async def list_all_accounts(self) -> list[Account]: + raise AssertionError("scoped account pool usage should not load all accounts") + + class FakeUsageRepository: + def __init__(self, _session: object) -> None: + pass + + async def latest_by_account( + self, + window: str, + *, + account_ids: list[str] | None = None, + ) -> dict[str, object]: + latest_calls.append((window, account_ids)) + return {} + + monkeypatch.setattr("app.modules.api_keys.repository.ApiKeysRepository", FakeApiKeysRepository) + monkeypatch.setattr(proxy_api_module, "UsageRepository", FakeUsageRepository) + + result = await proxy_api_module._build_account_pool_usage( + session=session, + assigned_account_ids=assigned_account_ids, + account_assignment_scope_enabled=True, + ) + + assert latest_calls == [ + ("primary", assigned_account_ids), + ("secondary", assigned_account_ids), + ] + assert result is not None + assert result.primary == pytest.approx(100.0) + assert result.secondary == pytest.approx(100.0) + + +@pytest.mark.asyncio +async def test_build_account_pool_usage_returns_object_when_pool_has_no_capacity(monkeypatch) -> None: + session = AsyncSession.__new__(AsyncSession) + + class FakeApiKeysRepository: + def __init__(self, _session: object) -> None: + pass + + async def list_accounts_by_ids(self, _account_ids: list[str]) -> list[Account]: + raise AssertionError("unscoped account pool usage should not load assigned accounts") + + async def list_all_accounts(self) -> list[Account]: + return [] + + class FakeUsageRepository: + def __init__(self, _session: object) -> None: + pass + + async def latest_by_account( + self, + window: str, + *, + account_ids: list[str] | None = None, + ) -> dict[str, object]: + assert account_ids is None + return {} + + monkeypatch.setattr("app.modules.api_keys.repository.ApiKeysRepository", FakeApiKeysRepository) + monkeypatch.setattr(proxy_api_module, "UsageRepository", FakeUsageRepository) + + result = await proxy_api_module._build_account_pool_usage( + session=session, + assigned_account_ids=[], + account_assignment_scope_enabled=False, + ) + + assert result is not None + assert result.primary is None + assert result.secondary is None