diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index 015c0e2e4..fa56e6596 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -517,6 +517,20 @@ function convertMessages( (b: { type?: string }) => b.type !== 'tool_use' && b.type !== 'thinking', ) + // Compute reasoning_content upfront so it is part of the object literal + // rather than a dynamic assignment. Some bundler/runtime paths may drop + // dynamically-added properties when the object flows through multiple + // transforms before JSON.stringify. + // + // DeepSeek requires reasoning_content on EVERY assistant message when + // thinking mode is active, even if the content is empty (e.g. the model + // produced no visible chain-of-thought for that turn). + const reasoningContent = preserveReasoningContent + ? ((thinkingBlock as { thinking?: string } | undefined)?.thinking ?? '') + : undefined + const hasThinkingBlock = thinkingBlock !== undefined + const shouldAttachReasoning = preserveReasoningContent + const assistantMsg: OpenAIMessage = { role: 'assistant', content: (() => { @@ -527,21 +541,9 @@ function convertMessages( ? c.map((p: { text?: string }) => p.text ?? '').join('') : '' })(), - } - - // Providers that validate reasoning continuity (Moonshot/Kimi Code: "thinking - // is enabled but reasoning_content is missing in assistant tool call - // message at index N" 400) need the original chain-of-thought echoed - // back on each assistant message that carries a tool_call. We kept - // the thinking block on the Anthropic side; re-attach it here as the - // `reasoning_content` field on the outgoing OpenAI-shaped message. - // Gated per-provider because other endpoints either ignore the field - // (harmless) or strict-reject unknown fields (harmful). - if (preserveReasoningContent) { - const thinkingText = (thinkingBlock as { thinking?: string } | undefined)?.thinking - if (typeof thinkingText === 'string' && thinkingText.trim().length > 0) { - assistantMsg.reasoning_content = thinkingText - } + ...(shouldAttachReasoning && { + reasoning_content: reasoningContent, + }), } if (toolUses.length > 0) { @@ -633,6 +635,9 @@ function convertMessages( ? c.map((p: { text?: string }) => p.text ?? '').join('') : '' })(), + ...(preserveReasoningContent && { + reasoning_content: '', + }), } if (assistantMsg.content) { @@ -659,6 +664,9 @@ function convertMessages( coalesced.push({ role: 'assistant', content: '[Tool execution interrupted by user]', + ...(preserveReasoningContent && { + reasoning_content: '', + }), }) } diff --git a/src/utils/conversationRecovery.hooks.test.ts b/src/utils/conversationRecovery.hooks.test.ts index b19ae2559..d9284ede8 100644 --- a/src/utils/conversationRecovery.hooks.test.ts +++ b/src/utils/conversationRecovery.hooks.test.ts @@ -70,7 +70,7 @@ test('loadConversationForResume rejects oversized transcripts before resume hook expect(hookSpy).not.toHaveBeenCalled() }) -test('deserializeMessagesWithInterruptDetection strips thinking blocks only for OpenAI-compatible providers', async () => { +test('deserializeMessagesWithInterruptDetection preserves thinking blocks for all providers (shim handles provider-specific filtering)', async () => { const serializedMessages = [ user(id(10), 'hello'), { @@ -120,13 +120,18 @@ test('deserializeMessagesWithInterruptDetection strips thinking blocks only for message => message.type === 'assistant', ) + // The first assistant message keeps its thinking block (the OpenAI shim will + // convert it to reasoning_content for DeepSeek/Moonshot as needed). + // The second assistant message is thinking-only and orphaned, so it is removed + // by filterOrphanedThinkingOnlyMessages — not by provider-specific stripping. expect(thirdPartyAssistantMessages).toHaveLength(2) expect(thirdPartyAssistantMessages[0]?.message?.content).toEqual([ + { type: 'thinking', thinking: 'secret reasoning' }, { type: 'text', text: 'visible reply' }, ]) expect( JSON.stringify(thirdPartyAssistantMessages.map(message => message.message?.content)), - ).not.toContain('secret reasoning') + ).toContain('secret reasoning') expect( JSON.stringify(thirdPartyAssistantMessages.map(message => message.message?.content)), ).not.toContain('only hidden reasoning') diff --git a/src/utils/conversationRecovery.ts b/src/utils/conversationRecovery.ts index 3d4ad44bc..773490f1e 100644 --- a/src/utils/conversationRecovery.ts +++ b/src/utils/conversationRecovery.ts @@ -24,7 +24,6 @@ import { type FileHistorySnapshot, } from './fileHistory.js' import { logError } from './log.js' -import { getAPIProvider } from './model/providers.js' import { createAssistantMessage, createUserMessage, @@ -178,25 +177,6 @@ export type DeserializeResult = { turnInterruptionState: TurnInterruptionState } -/** - * Remove thinking/redacted_thinking content blocks from assistant messages. - * Messages that become empty after stripping are removed entirely. - */ -function stripThinkingBlocks(messages: NormalizedMessage[]): NormalizedMessage[] { - return messages.reduce((acc, msg) => { - if (msg.type !== 'assistant' || !Array.isArray(msg.message?.content)) { - acc.push(msg) - return acc - } - const filtered = msg.message.content.filter( - (block: { type?: string }) => block.type !== 'thinking' && block.type !== 'redacted_thinking', - ) - if (filtered.length === 0) return acc - acc.push({ ...msg, message: { ...msg.message, content: filtered } }) - return acc - }, []) -} - /** * Deserializes messages from a log file into the format expected by the REPL. * Filters unresolved tool uses, orphaned thinking messages, and appends a @@ -247,19 +227,10 @@ export function deserializeMessagesWithInterruptDetection( filteredToolUses, ) as NormalizedMessage[] - // Strip thinking/redacted_thinking content blocks from assistant messages - // when resuming against a 3P provider. These Anthropic-specific blocks cause - // 400 errors or context corruption on OpenAI-compatible providers (issue #248 finding 5). - const provider = getAPIProvider() - const isThirdPartyProvider = provider !== 'firstParty' && provider !== 'bedrock' && provider !== 'vertex' && provider !== 'foundry' - const thinkingStripped = isThirdPartyProvider - ? stripThinkingBlocks(filteredThinking) - : filteredThinking - // Filter out assistant messages with only whitespace text content. // This can happen when model outputs "\n\n" before thinking, user cancels mid-stream. const filteredMessages = filterWhitespaceOnlyAssistantMessages( - thinkingStripped, + filteredThinking, ) as NormalizedMessage[] const internalState = detectTurnInterruption(filteredMessages)