Skip to content

Commit c5f891d

Browse files
committed
fix: use actual input modalities, exclude non-chat models, reduce CI noise
- Use OpenRouter's actual input_modalities instead of blindly copying reference model's modalities (fixes text-only models claiming image support) - Filter modalities to only include types valid for each provider's interface - Add blocklist for non-chat model families (lyria, veo, imagen, sora, dall-e, tts) - Swap field order to supports-before-pricing matching existing convention - CI only creates PR when package files actually changed (not just openrouter.models.ts)
1 parent 07e63c2 commit c5f891d

File tree

2 files changed

+84
-30
lines changed

2 files changed

+84
-30
lines changed

.github/workflows/sync-models.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ jobs:
2929
- name: Fetch and sync model metadata
3030
run: pnpm generate:models:sync
3131

32-
- name: Check for changes
32+
- name: Check for package changes
3333
id: changes
3434
run: |
35-
if git diff --quiet; then
35+
if git diff --quiet -- packages/; then
3636
echo "changed=false" >> $GITHUB_OUTPUT
3737
else
3838
echo "changed=true" >> $GITHUB_OUTPUT

scripts/sync-provider-models.ts

Lines changed: 82 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@ interface ProviderConfig {
3636
providerOptionsTypeName: string
3737
/** Name of the input modalities type map */
3838
inputModalitiesTypeName: string
39-
/** The full supports block to copy for new models */
40-
referenceSupports: string
39+
/** The supports block template (minus input modalities, which come from OpenRouter) */
40+
referenceSupportsBody: string
41+
/** Valid input modality types for this provider's ModelMeta interface */
42+
validInputModalities: Array<InputModality>
4143
/** The satisfies type clause (after 'as const satisfies') */
4244
referenceSatisfies: string
4345
/** The type string for provider options map entries */
@@ -56,13 +58,11 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
5658
chatArrayName: 'OPENAI_CHAT_MODELS',
5759
providerOptionsTypeName: 'OpenAIChatModelProviderOptionsByName',
5860
inputModalitiesTypeName: 'OpenAIModelInputModalitiesByName',
59-
referenceSupports: ` supports: {
60-
input: ['text', 'image'],
61-
output: ['text'],
61+
validInputModalities: ['text', 'image', 'audio', 'video'],
62+
referenceSupportsBody: ` output: ['text'],
6263
endpoints: ['chat', 'chat-completions'],
6364
features: ['streaming', 'function_calling', 'structured_outputs', 'distillation'],
64-
tools: ['web_search', 'file_search', 'image_generation', 'code_interpreter', 'mcp'],
65-
},`,
65+
tools: ['web_search', 'file_search', 'image_generation', 'code_interpreter', 'mcp'],`,
6666
referenceSatisfies:
6767
'ModelMeta<OpenAIBaseOptions & OpenAIReasoningOptions & OpenAIStructuredOutputOptions & OpenAIToolsOptions & OpenAIStreamingOptions & OpenAIMetadataOptions>',
6868
referenceProviderOptionsEntry:
@@ -80,11 +80,9 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
8080
chatArrayName: 'ANTHROPIC_MODELS',
8181
providerOptionsTypeName: 'AnthropicChatModelProviderOptionsByName',
8282
inputModalitiesTypeName: 'AnthropicModelInputModalitiesByName',
83-
referenceSupports: ` supports: {
84-
input: ['text', 'image', 'document'],
85-
extended_thinking: true,
86-
priority_tier: true,
87-
},`,
83+
validInputModalities: ['text', 'image', 'audio', 'video', 'document'],
84+
referenceSupportsBody: ` extended_thinking: true,
85+
priority_tier: true,`,
8886
referenceSatisfies:
8987
'ModelMeta<AnthropicContainerOptions & AnthropicContextManagementOptions & AnthropicMCPOptions & AnthropicServiceTierOptions & AnthropicStopSequencesOptions & AnthropicThinkingOptions & AnthropicToolChoiceOptions & AnthropicSamplingOptions>',
9088
referenceProviderOptionsEntry:
@@ -99,11 +97,9 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
9997
chatArrayName: 'GEMINI_MODELS',
10098
providerOptionsTypeName: 'GeminiChatModelProviderOptionsByName',
10199
inputModalitiesTypeName: 'GeminiModelInputModalitiesByName',
102-
referenceSupports: ` supports: {
103-
input: ['text', 'image', 'audio', 'video', 'document'],
104-
output: ['text'],
105-
capabilities: ['batch_api', 'caching', 'code_execution', 'file_search', 'function_calling', 'search_grounding', 'structured_output', 'thinking', 'url_context'],
106-
},`,
100+
validInputModalities: ['text', 'image', 'audio', 'video', 'document'],
101+
referenceSupportsBody: ` output: ['text'],
102+
capabilities: ['batch_api', 'caching', 'code_execution', 'file_search', 'function_calling', 'search_grounding', 'structured_output', 'thinking', 'url_context'],`,
107103
referenceSatisfies:
108104
'ModelMeta<GeminiToolConfigOptions & GeminiSafetyOptions & GeminiCommonConfigOptions & GeminiCachedContentOptions & GeminiStructuredOutputOptions & GeminiThinkingOptions & GeminiThinkingAdvancedOptions>',
109105
referenceProviderOptionsEntry:
@@ -118,11 +114,9 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
118114
chatArrayName: 'GROK_CHAT_MODELS',
119115
providerOptionsTypeName: 'GrokChatModelProviderOptionsByName',
120116
inputModalitiesTypeName: 'GrokModelInputModalitiesByName',
121-
referenceSupports: ` supports: {
122-
input: ['text', 'image'],
123-
output: ['text'],
124-
capabilities: ['reasoning', 'structured_outputs', 'tool_calling'],
125-
},`,
117+
validInputModalities: ['text', 'image', 'audio', 'video', 'document'],
118+
referenceSupportsBody: ` output: ['text'],
119+
capabilities: ['reasoning', 'structured_outputs', 'tool_calling'],`,
126120
referenceSatisfies: 'ModelMeta',
127121
referenceProviderOptionsEntry: 'GrokProviderOptions',
128122
hasBothNameAndId: false,
@@ -134,6 +128,32 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
134128
// Utility functions
135129
// ---------------------------------------------------------------------------
136130

131+
type InputModality = 'text' | 'image' | 'audio' | 'video' | 'document'
132+
133+
const MODALITY_MAP: Record<string, InputModality> = {
134+
text: 'text',
135+
image: 'image',
136+
audio: 'audio',
137+
video: 'video',
138+
file: 'document',
139+
document: 'document',
140+
}
141+
142+
/**
143+
* Map OpenRouter input modalities to our standard modality types.
144+
* Same mapping as the existing convert-openrouter-models.ts script.
145+
*/
146+
function mapInputModalities(modalities: Array<string>): Array<InputModality> {
147+
const mapped = modalities
148+
.map((m) => MODALITY_MAP[m.toLowerCase()])
149+
.filter((m): m is InputModality => m !== undefined)
150+
// Ensure at least 'text' is present
151+
if (!mapped.includes('text')) {
152+
mapped.unshift('text')
153+
}
154+
return mapped
155+
}
156+
137157
/** Strip the provider prefix from an OpenRouter model ID */
138158
function stripPrefix(prefix: string, modelId: string): string {
139159
return modelId.slice(prefix.length)
@@ -227,6 +247,24 @@ function isImageOnlyModel(model: OpenRouterModel): boolean {
227247
)
228248
}
229249

250+
/**
251+
* Non-chat model family prefixes to exclude from chat model arrays.
252+
* These are audio/music/video/image generation models that happen to
253+
* include 'text' in their output modalities but are not chat models.
254+
*/
255+
const NON_CHAT_MODEL_PREFIXES = [
256+
'lyria-', // Google music generation
257+
'veo-', // Google video generation
258+
'imagen-', // Google image generation
259+
'sora-', // OpenAI video generation
260+
'dall-e-', // OpenAI image generation
261+
'tts-', // Text-to-speech models
262+
]
263+
264+
function isNonChatModel(strippedId: string): boolean {
265+
return NON_CHAT_MODEL_PREFIXES.some((p) => strippedId.startsWith(p))
266+
}
267+
230268
// ---------------------------------------------------------------------------
231269
// Model constant generation
232270
// ---------------------------------------------------------------------------
@@ -243,6 +281,12 @@ function generateModelConstant(
243281
const inputCached = convertPrice(model.pricing.input_cache_read)
244282
const outputNormal = convertPrice(model.pricing.completion)
245283

284+
// Use actual input modalities from OpenRouter data, filtered to what this provider supports
285+
const inputModalities = mapInputModalities(
286+
model.architecture.input_modalities,
287+
).filter((m) => config.validInputModalities.includes(m))
288+
const inputModalitiesStr = inputModalities.map((m) => `'${m}'`).join(', ')
289+
246290
const lines: Array<string> = []
247291
lines.push(`const ${constName} = {`)
248292

@@ -268,6 +312,12 @@ function generateModelConstant(
268312
)
269313
}
270314

315+
// supports block (actual input modalities + reference capabilities)
316+
lines.push(` supports: {`)
317+
lines.push(` input: [${inputModalitiesStr}],`)
318+
lines.push(config.referenceSupportsBody)
319+
lines.push(` },`)
320+
271321
// pricing
272322
lines.push(` pricing: {`)
273323
lines.push(` input: {`)
@@ -281,9 +331,6 @@ function generateModelConstant(
281331
lines.push(` },`)
282332
lines.push(` },`)
283333

284-
// supports block (copied from reference)
285-
lines.push(config.referenceSupports)
286-
287334
lines.push(`} as const satisfies ${config.referenceSatisfies}`)
288335

289336
return lines.join('\n')
@@ -429,6 +476,11 @@ async function main() {
429476
continue
430477
}
431478

479+
// Skip non-chat model families (audio/music/video/image generation)
480+
if (isNonChatModel(strippedId)) {
481+
continue
482+
}
483+
432484
// Normalize for comparison to handle dots-vs-dashes naming differences
433485
if (
434486
!existingIds.has(normalizeId(strippedId)) &&
@@ -472,9 +524,11 @@ async function main() {
472524
// Insert constants before first export
473525
content = insertConstants(content, constants)
474526

475-
// All non-image-only models go into the chat array
476-
// (models that output both text and image are still chat models)
477-
const chatModels = filteredModels.filter(({ model }) => outputsText(model))
527+
// Filter to chat-eligible models: must output text and not be a non-chat model family
528+
const chatModels = filteredModels.filter(
529+
({ model, strippedId }) =>
530+
outputsText(model) && !isNonChatModel(strippedId),
531+
)
478532

479533
if (chatModels.length > 0) {
480534
content = addToArray(

0 commit comments

Comments
 (0)