From cdd3d4f316bef1be6f74e51d776b0248193e2c12 Mon Sep 17 00:00:00 2001 From: "nick.mesen" Date: Sat, 25 Apr 2026 01:50:38 -0600 Subject: [PATCH] When DeepSeek V4 thinking mode is enabled, the API appears to require `reasoning_content` to be present on every assistant message in the conversation history. In the DeepSeek scenarios I reproduced, omitting the property entirely causes a `400` error: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "The `reasoning_content` in the thinking mode must be passed back to the API." The original DeepSeek support introduced in commit `ff2a380` seems to handle the happy path correctly for assistant messages that contain a thinking block, but from the testing I did, it looks like a few edge cases may have been missed: 1. Assistant messages with array-based `content` but no thinking block (for example, when the model calls tools without emitting visible thinking). 2. Assistant messages where `content` is a plain string rather than an array, which appears to bypass the existing `convertMessages()` path. 3. The synthetic `"[Tool execution interrupted by user]"` message injected during coalescing, which is created outside `convertMessages()`. Additionally, `conversationRecovery.ts` was stripping all thinking blocks during deserialization for third-party providers, which seems to prevent the shim from converting them back into `reasoning_content`. Proposed changes in this PR: - `openaiShim.ts`: attach `reasoning_content` to assistant messages when `preserveReasoningContent` is `true`, using `""` when no thinking block is present. - `openaiShim.ts`: add `reasoning_content` to the synthetic interrupt message and to the string-content `else` branch. - `conversationRecovery.ts`: remove `stripThinkingBlocks()` and let the shim handle provider-specific filtering. These changes are gated behind `preserveReasoningContent`, which is currently enabled only for DeepSeek and Moonshot, so the expected impact on other providers should be limited. That said, my validation here was focused on DeepSeek, so I’m not fully sure whether there could be secondary effects in other provider paths. Tests: - Updated `conversationRecovery` test - `65` passing, `0` failing --- src/services/api/openaiShim.ts | 38 ++++++++++++-------- src/utils/conversationRecovery.hooks.test.ts | 9 +++-- src/utils/conversationRecovery.ts | 31 +--------------- 3 files changed, 31 insertions(+), 47 deletions(-) diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index 015c0e2e4..fa56e6596 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -517,6 +517,20 @@ function convertMessages( (b: { type?: string }) => b.type !== 'tool_use' && b.type !== 'thinking', ) + // Compute reasoning_content upfront so it is part of the object literal + // rather than a dynamic assignment. Some bundler/runtime paths may drop + // dynamically-added properties when the object flows through multiple + // transforms before JSON.stringify. + // + // DeepSeek requires reasoning_content on EVERY assistant message when + // thinking mode is active, even if the content is empty (e.g. the model + // produced no visible chain-of-thought for that turn). + const reasoningContent = preserveReasoningContent + ? ((thinkingBlock as { thinking?: string } | undefined)?.thinking ?? '') + : undefined + const hasThinkingBlock = thinkingBlock !== undefined + const shouldAttachReasoning = preserveReasoningContent + const assistantMsg: OpenAIMessage = { role: 'assistant', content: (() => { @@ -527,21 +541,9 @@ function convertMessages( ? c.map((p: { text?: string }) => p.text ?? '').join('') : '' })(), - } - - // Providers that validate reasoning continuity (Moonshot/Kimi Code: "thinking - // is enabled but reasoning_content is missing in assistant tool call - // message at index N" 400) need the original chain-of-thought echoed - // back on each assistant message that carries a tool_call. We kept - // the thinking block on the Anthropic side; re-attach it here as the - // `reasoning_content` field on the outgoing OpenAI-shaped message. - // Gated per-provider because other endpoints either ignore the field - // (harmless) or strict-reject unknown fields (harmful). - if (preserveReasoningContent) { - const thinkingText = (thinkingBlock as { thinking?: string } | undefined)?.thinking - if (typeof thinkingText === 'string' && thinkingText.trim().length > 0) { - assistantMsg.reasoning_content = thinkingText - } + ...(shouldAttachReasoning && { + reasoning_content: reasoningContent, + }), } if (toolUses.length > 0) { @@ -633,6 +635,9 @@ function convertMessages( ? c.map((p: { text?: string }) => p.text ?? '').join('') : '' })(), + ...(preserveReasoningContent && { + reasoning_content: '', + }), } if (assistantMsg.content) { @@ -659,6 +664,9 @@ function convertMessages( coalesced.push({ role: 'assistant', content: '[Tool execution interrupted by user]', + ...(preserveReasoningContent && { + reasoning_content: '', + }), }) } diff --git a/src/utils/conversationRecovery.hooks.test.ts b/src/utils/conversationRecovery.hooks.test.ts index b19ae2559..d9284ede8 100644 --- a/src/utils/conversationRecovery.hooks.test.ts +++ b/src/utils/conversationRecovery.hooks.test.ts @@ -70,7 +70,7 @@ test('loadConversationForResume rejects oversized transcripts before resume hook expect(hookSpy).not.toHaveBeenCalled() }) -test('deserializeMessagesWithInterruptDetection strips thinking blocks only for OpenAI-compatible providers', async () => { +test('deserializeMessagesWithInterruptDetection preserves thinking blocks for all providers (shim handles provider-specific filtering)', async () => { const serializedMessages = [ user(id(10), 'hello'), { @@ -120,13 +120,18 @@ test('deserializeMessagesWithInterruptDetection strips thinking blocks only for message => message.type === 'assistant', ) + // The first assistant message keeps its thinking block (the OpenAI shim will + // convert it to reasoning_content for DeepSeek/Moonshot as needed). + // The second assistant message is thinking-only and orphaned, so it is removed + // by filterOrphanedThinkingOnlyMessages — not by provider-specific stripping. expect(thirdPartyAssistantMessages).toHaveLength(2) expect(thirdPartyAssistantMessages[0]?.message?.content).toEqual([ + { type: 'thinking', thinking: 'secret reasoning' }, { type: 'text', text: 'visible reply' }, ]) expect( JSON.stringify(thirdPartyAssistantMessages.map(message => message.message?.content)), - ).not.toContain('secret reasoning') + ).toContain('secret reasoning') expect( JSON.stringify(thirdPartyAssistantMessages.map(message => message.message?.content)), ).not.toContain('only hidden reasoning') diff --git a/src/utils/conversationRecovery.ts b/src/utils/conversationRecovery.ts index 3d4ad44bc..773490f1e 100644 --- a/src/utils/conversationRecovery.ts +++ b/src/utils/conversationRecovery.ts @@ -24,7 +24,6 @@ import { type FileHistorySnapshot, } from './fileHistory.js' import { logError } from './log.js' -import { getAPIProvider } from './model/providers.js' import { createAssistantMessage, createUserMessage, @@ -178,25 +177,6 @@ export type DeserializeResult = { turnInterruptionState: TurnInterruptionState } -/** - * Remove thinking/redacted_thinking content blocks from assistant messages. - * Messages that become empty after stripping are removed entirely. - */ -function stripThinkingBlocks(messages: NormalizedMessage[]): NormalizedMessage[] { - return messages.reduce((acc, msg) => { - if (msg.type !== 'assistant' || !Array.isArray(msg.message?.content)) { - acc.push(msg) - return acc - } - const filtered = msg.message.content.filter( - (block: { type?: string }) => block.type !== 'thinking' && block.type !== 'redacted_thinking', - ) - if (filtered.length === 0) return acc - acc.push({ ...msg, message: { ...msg.message, content: filtered } }) - return acc - }, []) -} - /** * Deserializes messages from a log file into the format expected by the REPL. * Filters unresolved tool uses, orphaned thinking messages, and appends a @@ -247,19 +227,10 @@ export function deserializeMessagesWithInterruptDetection( filteredToolUses, ) as NormalizedMessage[] - // Strip thinking/redacted_thinking content blocks from assistant messages - // when resuming against a 3P provider. These Anthropic-specific blocks cause - // 400 errors or context corruption on OpenAI-compatible providers (issue #248 finding 5). - const provider = getAPIProvider() - const isThirdPartyProvider = provider !== 'firstParty' && provider !== 'bedrock' && provider !== 'vertex' && provider !== 'foundry' - const thinkingStripped = isThirdPartyProvider - ? stripThinkingBlocks(filteredThinking) - : filteredThinking - // Filter out assistant messages with only whitespace text content. // This can happen when model outputs "\n\n" before thinking, user cancels mid-stream. const filteredMessages = filterWhitespaceOnlyAssistantMessages( - thinkingStripped, + filteredThinking, ) as NormalizedMessage[] const internalState = detectTurnInterruption(filteredMessages)