@@ -36,8 +36,10 @@ interface ProviderConfig {
3636 providerOptionsTypeName : string
3737 /** Name of the input modalities type map */
3838 inputModalitiesTypeName : string
39- /** The full supports block to copy for new models */
40- referenceSupports : string
39+ /** The supports block template (minus input modalities, which come from OpenRouter) */
40+ referenceSupportsBody : string
41+ /** Valid input modality types for this provider's ModelMeta interface */
42+ validInputModalities : Array < InputModality >
4143 /** The satisfies type clause (after 'as const satisfies') */
4244 referenceSatisfies : string
4345 /** The type string for provider options map entries */
@@ -56,13 +58,11 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
5658 chatArrayName : 'OPENAI_CHAT_MODELS' ,
5759 providerOptionsTypeName : 'OpenAIChatModelProviderOptionsByName' ,
5860 inputModalitiesTypeName : 'OpenAIModelInputModalitiesByName' ,
59- referenceSupports : ` supports: {
60- input: ['text', 'image'],
61- output: ['text'],
61+ validInputModalities : [ 'text' , 'image' , 'audio' , 'video' ] ,
62+ referenceSupportsBody : ` output: ['text'],
6263 endpoints: ['chat', 'chat-completions'],
6364 features: ['streaming', 'function_calling', 'structured_outputs', 'distillation'],
64- tools: ['web_search', 'file_search', 'image_generation', 'code_interpreter', 'mcp'],
65- },` ,
65+ tools: ['web_search', 'file_search', 'image_generation', 'code_interpreter', 'mcp'],` ,
6666 referenceSatisfies :
6767 'ModelMeta<OpenAIBaseOptions & OpenAIReasoningOptions & OpenAIStructuredOutputOptions & OpenAIToolsOptions & OpenAIStreamingOptions & OpenAIMetadataOptions>' ,
6868 referenceProviderOptionsEntry :
@@ -80,11 +80,9 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
8080 chatArrayName : 'ANTHROPIC_MODELS' ,
8181 providerOptionsTypeName : 'AnthropicChatModelProviderOptionsByName' ,
8282 inputModalitiesTypeName : 'AnthropicModelInputModalitiesByName' ,
83- referenceSupports : ` supports: {
84- input: ['text', 'image', 'document'],
85- extended_thinking: true,
86- priority_tier: true,
87- },` ,
83+ validInputModalities : [ 'text' , 'image' , 'audio' , 'video' , 'document' ] ,
84+ referenceSupportsBody : ` extended_thinking: true,
85+ priority_tier: true,` ,
8886 referenceSatisfies :
8987 'ModelMeta<AnthropicContainerOptions & AnthropicContextManagementOptions & AnthropicMCPOptions & AnthropicServiceTierOptions & AnthropicStopSequencesOptions & AnthropicThinkingOptions & AnthropicToolChoiceOptions & AnthropicSamplingOptions>' ,
9088 referenceProviderOptionsEntry :
@@ -99,11 +97,9 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
9997 chatArrayName : 'GEMINI_MODELS' ,
10098 providerOptionsTypeName : 'GeminiChatModelProviderOptionsByName' ,
10199 inputModalitiesTypeName : 'GeminiModelInputModalitiesByName' ,
102- referenceSupports : ` supports: {
103- input: ['text', 'image', 'audio', 'video', 'document'],
104- output: ['text'],
105- capabilities: ['batch_api', 'caching', 'code_execution', 'file_search', 'function_calling', 'search_grounding', 'structured_output', 'thinking', 'url_context'],
106- },` ,
100+ validInputModalities : [ 'text' , 'image' , 'audio' , 'video' , 'document' ] ,
101+ referenceSupportsBody : ` output: ['text'],
102+ capabilities: ['batch_api', 'caching', 'code_execution', 'file_search', 'function_calling', 'search_grounding', 'structured_output', 'thinking', 'url_context'],` ,
107103 referenceSatisfies :
108104 'ModelMeta<GeminiToolConfigOptions & GeminiSafetyOptions & GeminiCommonConfigOptions & GeminiCachedContentOptions & GeminiStructuredOutputOptions & GeminiThinkingOptions & GeminiThinkingAdvancedOptions>' ,
109105 referenceProviderOptionsEntry :
@@ -118,11 +114,9 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
118114 chatArrayName : 'GROK_CHAT_MODELS' ,
119115 providerOptionsTypeName : 'GrokChatModelProviderOptionsByName' ,
120116 inputModalitiesTypeName : 'GrokModelInputModalitiesByName' ,
121- referenceSupports : ` supports: {
122- input: ['text', 'image'],
123- output: ['text'],
124- capabilities: ['reasoning', 'structured_outputs', 'tool_calling'],
125- },` ,
117+ validInputModalities : [ 'text' , 'image' , 'audio' , 'video' , 'document' ] ,
118+ referenceSupportsBody : ` output: ['text'],
119+ capabilities: ['reasoning', 'structured_outputs', 'tool_calling'],` ,
126120 referenceSatisfies : 'ModelMeta' ,
127121 referenceProviderOptionsEntry : 'GrokProviderOptions' ,
128122 hasBothNameAndId : false ,
@@ -134,6 +128,32 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
134128// Utility functions
135129// ---------------------------------------------------------------------------
136130
131+ type InputModality = 'text' | 'image' | 'audio' | 'video' | 'document'
132+
133+ const MODALITY_MAP : Record < string , InputModality > = {
134+ text : 'text' ,
135+ image : 'image' ,
136+ audio : 'audio' ,
137+ video : 'video' ,
138+ file : 'document' ,
139+ document : 'document' ,
140+ }
141+
142+ /**
143+ * Map OpenRouter input modalities to our standard modality types.
144+ * Same mapping as the existing convert-openrouter-models.ts script.
145+ */
146+ function mapInputModalities ( modalities : Array < string > ) : Array < InputModality > {
147+ const mapped = modalities
148+ . map ( ( m ) => MODALITY_MAP [ m . toLowerCase ( ) ] )
149+ . filter ( ( m ) : m is InputModality => m !== undefined )
150+ // Ensure at least 'text' is present
151+ if ( ! mapped . includes ( 'text' ) ) {
152+ mapped . unshift ( 'text' )
153+ }
154+ return mapped
155+ }
156+
137157/** Strip the provider prefix from an OpenRouter model ID */
138158function stripPrefix ( prefix : string , modelId : string ) : string {
139159 return modelId . slice ( prefix . length )
@@ -227,6 +247,24 @@ function isImageOnlyModel(model: OpenRouterModel): boolean {
227247 )
228248}
229249
250+ /**
251+ * Non-chat model family prefixes to exclude from chat model arrays.
252+ * These are audio/music/video/image generation models that happen to
253+ * include 'text' in their output modalities but are not chat models.
254+ */
255+ const NON_CHAT_MODEL_PREFIXES = [
256+ 'lyria-' , // Google music generation
257+ 'veo-' , // Google video generation
258+ 'imagen-' , // Google image generation
259+ 'sora-' , // OpenAI video generation
260+ 'dall-e-' , // OpenAI image generation
261+ 'tts-' , // Text-to-speech models
262+ ]
263+
264+ function isNonChatModel ( strippedId : string ) : boolean {
265+ return NON_CHAT_MODEL_PREFIXES . some ( ( p ) => strippedId . startsWith ( p ) )
266+ }
267+
230268// ---------------------------------------------------------------------------
231269// Model constant generation
232270// ---------------------------------------------------------------------------
@@ -243,6 +281,12 @@ function generateModelConstant(
243281 const inputCached = convertPrice ( model . pricing . input_cache_read )
244282 const outputNormal = convertPrice ( model . pricing . completion )
245283
284+ // Use actual input modalities from OpenRouter data, filtered to what this provider supports
285+ const inputModalities = mapInputModalities (
286+ model . architecture . input_modalities ,
287+ ) . filter ( ( m ) => config . validInputModalities . includes ( m ) )
288+ const inputModalitiesStr = inputModalities . map ( ( m ) => `'${ m } '` ) . join ( ', ' )
289+
246290 const lines : Array < string > = [ ]
247291 lines . push ( `const ${ constName } = {` )
248292
@@ -268,6 +312,12 @@ function generateModelConstant(
268312 )
269313 }
270314
315+ // supports block (actual input modalities + reference capabilities)
316+ lines . push ( ` supports: {` )
317+ lines . push ( ` input: [${ inputModalitiesStr } ],` )
318+ lines . push ( config . referenceSupportsBody )
319+ lines . push ( ` },` )
320+
271321 // pricing
272322 lines . push ( ` pricing: {` )
273323 lines . push ( ` input: {` )
@@ -281,9 +331,6 @@ function generateModelConstant(
281331 lines . push ( ` },` )
282332 lines . push ( ` },` )
283333
284- // supports block (copied from reference)
285- lines . push ( config . referenceSupports )
286-
287334 lines . push ( `} as const satisfies ${ config . referenceSatisfies } ` )
288335
289336 return lines . join ( '\n' )
@@ -429,6 +476,11 @@ async function main() {
429476 continue
430477 }
431478
479+ // Skip non-chat model families (audio/music/video/image generation)
480+ if ( isNonChatModel ( strippedId ) ) {
481+ continue
482+ }
483+
432484 // Normalize for comparison to handle dots-vs-dashes naming differences
433485 if (
434486 ! existingIds . has ( normalizeId ( strippedId ) ) &&
@@ -472,9 +524,11 @@ async function main() {
472524 // Insert constants before first export
473525 content = insertConstants ( content , constants )
474526
475- // All non-image-only models go into the chat array
476- // (models that output both text and image are still chat models)
477- const chatModels = filteredModels . filter ( ( { model } ) => outputsText ( model ) )
527+ // Filter to chat-eligible models: must output text and not be a non-chat model family
528+ const chatModels = filteredModels . filter (
529+ ( { model, strippedId } ) =>
530+ outputsText ( model ) && ! isNonChatModel ( strippedId ) ,
531+ )
478532
479533 if ( chatModels . length > 0 ) {
480534 content = addToArray (
0 commit comments