From cdd3d4f316bef1be6f74e51d776b0248193e2c12 Mon Sep 17 00:00:00 2001
From: "nick.mesen" <nickmesen@users.noreply.github.com>
Date: Sat, 25 Apr 2026 01:50:38 -0600
Subject: [PATCH] When DeepSeek V4 thinking mode is enabled, the API appears to
 require `reasoning_content` to be present on every assistant message in the
 conversation history. In the DeepSeek scenarios I reproduced, omitting the
 property entirely causes a `400` error:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  "The `reasoning_content` in the thinking mode must be passed back
  to the API."

The original DeepSeek support introduced in commit `ff2a380` seems to
handle the happy path correctly for assistant messages that contain a
thinking block, but from the testing I did, it looks like a few edge
cases may have been missed:

1. Assistant messages with array-based `content` but no thinking block
   (for example, when the model calls tools without emitting visible
   thinking).
2. Assistant messages where `content` is a plain string rather than an
   array, which appears to bypass the existing `convertMessages()` path.
3. The synthetic `"[Tool execution interrupted by user]"` message
   injected during coalescing, which is created outside
   `convertMessages()`.

Additionally, `conversationRecovery.ts` was stripping all thinking
blocks during deserialization for third-party providers, which seems to
prevent the shim from converting them back into `reasoning_content`.

Proposed changes in this PR:
- `openaiShim.ts`: attach `reasoning_content` to assistant messages
  when `preserveReasoningContent` is `true`, using `""` when no
  thinking block is present.
- `openaiShim.ts`: add `reasoning_content` to the synthetic interrupt
  message and to the string-content `else` branch.
- `conversationRecovery.ts`: remove `stripThinkingBlocks()` and let
  the shim handle provider-specific filtering.

These changes are gated behind `preserveReasoningContent`, which is
currently enabled only for DeepSeek and Moonshot, so the expected impact
on other providers should be limited. That said, my validation here was
focused on DeepSeek, so I’m not fully sure whether there could be
secondary effects in other provider paths.

Tests:
- Updated `conversationRecovery` test
- `65` passing, `0` failing
---
 src/services/api/openaiShim.ts               | 38 ++++++++++++--------
 src/utils/conversationRecovery.hooks.test.ts |  9 +++--
 src/utils/conversationRecovery.ts            | 31 +---------------
 3 files changed, 31 insertions(+), 47 deletions(-)

diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts
index 015c0e2e4..fa56e6596 100644
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -517,6 +517,20 @@ function convertMessages(
           (b: { type?: string }) => b.type !== 'tool_use' && b.type !== 'thinking',
         )
 
+        // Compute reasoning_content upfront so it is part of the object literal
+        // rather than a dynamic assignment. Some bundler/runtime paths may drop
+        // dynamically-added properties when the object flows through multiple
+        // transforms before JSON.stringify.
+        //
+        // DeepSeek requires reasoning_content on EVERY assistant message when
+        // thinking mode is active, even if the content is empty (e.g. the model
+        // produced no visible chain-of-thought for that turn).
+        const reasoningContent = preserveReasoningContent
+          ? ((thinkingBlock as { thinking?: string } | undefined)?.thinking ?? '')
+          : undefined
+        const hasThinkingBlock = thinkingBlock !== undefined
+        const shouldAttachReasoning = preserveReasoningContent
+
         const assistantMsg: OpenAIMessage = {
           role: 'assistant',
           content: (() => {
@@ -527,21 +541,9 @@ function convertMessages(
                 ? c.map((p: { text?: string }) => p.text ?? '').join('')
                 : ''
           })(),
-        }
-
-        // Providers that validate reasoning continuity (Moonshot/Kimi Code: "thinking
-        // is enabled but reasoning_content is missing in assistant tool call
-        // message at index N" 400) need the original chain-of-thought echoed
-        // back on each assistant message that carries a tool_call. We kept
-        // the thinking block on the Anthropic side; re-attach it here as the
-        // `reasoning_content` field on the outgoing OpenAI-shaped message.
-        // Gated per-provider because other endpoints either ignore the field
-        // (harmless) or strict-reject unknown fields (harmful).
-        if (preserveReasoningContent) {
-          const thinkingText = (thinkingBlock as { thinking?: string } | undefined)?.thinking
-          if (typeof thinkingText === 'string' && thinkingText.trim().length > 0) {
-            assistantMsg.reasoning_content = thinkingText
-          }
+          ...(shouldAttachReasoning && {
+            reasoning_content: reasoningContent,
+          }),
         }
 
         if (toolUses.length > 0) {
@@ -633,6 +635,9 @@ function convertMessages(
                 ? c.map((p: { text?: string }) => p.text ?? '').join('')
                 : ''
           })(),
+          ...(preserveReasoningContent && {
+            reasoning_content: '',
+          }),
         }
 
         if (assistantMsg.content) {
@@ -659,6 +664,9 @@ function convertMessages(
       coalesced.push({
         role: 'assistant',
         content: '[Tool execution interrupted by user]',
+        ...(preserveReasoningContent && {
+          reasoning_content: '',
+        }),
       })
     }
 
diff --git a/src/utils/conversationRecovery.hooks.test.ts b/src/utils/conversationRecovery.hooks.test.ts
index b19ae2559..d9284ede8 100644
--- a/src/utils/conversationRecovery.hooks.test.ts
+++ b/src/utils/conversationRecovery.hooks.test.ts
@@ -70,7 +70,7 @@ test('loadConversationForResume rejects oversized transcripts before resume hook
   expect(hookSpy).not.toHaveBeenCalled()
 })
 
-test('deserializeMessagesWithInterruptDetection strips thinking blocks only for OpenAI-compatible providers', async () => {
+test('deserializeMessagesWithInterruptDetection preserves thinking blocks for all providers (shim handles provider-specific filtering)', async () => {
   const serializedMessages = [
     user(id(10), 'hello'),
     {
@@ -120,13 +120,18 @@ test('deserializeMessagesWithInterruptDetection strips thinking blocks only for
     message => message.type === 'assistant',
   )
 
+  // The first assistant message keeps its thinking block (the OpenAI shim will
+  // convert it to reasoning_content for DeepSeek/Moonshot as needed).
+  // The second assistant message is thinking-only and orphaned, so it is removed
+  // by filterOrphanedThinkingOnlyMessages — not by provider-specific stripping.
   expect(thirdPartyAssistantMessages).toHaveLength(2)
   expect(thirdPartyAssistantMessages[0]?.message?.content).toEqual([
+    { type: 'thinking', thinking: 'secret reasoning' },
     { type: 'text', text: 'visible reply' },
   ])
   expect(
     JSON.stringify(thirdPartyAssistantMessages.map(message => message.message?.content)),
-  ).not.toContain('secret reasoning')
+  ).toContain('secret reasoning')
   expect(
     JSON.stringify(thirdPartyAssistantMessages.map(message => message.message?.content)),
   ).not.toContain('only hidden reasoning')
diff --git a/src/utils/conversationRecovery.ts b/src/utils/conversationRecovery.ts
index 3d4ad44bc..773490f1e 100644
--- a/src/utils/conversationRecovery.ts
+++ b/src/utils/conversationRecovery.ts
@@ -24,7 +24,6 @@ import {
   type FileHistorySnapshot,
 } from './fileHistory.js'
 import { logError } from './log.js'
-import { getAPIProvider } from './model/providers.js'
 import {
   createAssistantMessage,
   createUserMessage,
@@ -178,25 +177,6 @@ export type DeserializeResult = {
   turnInterruptionState: TurnInterruptionState
 }
 
-/**
- * Remove thinking/redacted_thinking content blocks from assistant messages.
- * Messages that become empty after stripping are removed entirely.
- */
-function stripThinkingBlocks(messages: NormalizedMessage[]): NormalizedMessage[] {
-  return messages.reduce<NormalizedMessage[]>((acc, msg) => {
-    if (msg.type !== 'assistant' || !Array.isArray(msg.message?.content)) {
-      acc.push(msg)
-      return acc
-    }
-    const filtered = msg.message.content.filter(
-      (block: { type?: string }) => block.type !== 'thinking' && block.type !== 'redacted_thinking',
-    )
-    if (filtered.length === 0) return acc
-    acc.push({ ...msg, message: { ...msg.message, content: filtered } })
-    return acc
-  }, [])
-}
-
 /**
  * Deserializes messages from a log file into the format expected by the REPL.
  * Filters unresolved tool uses, orphaned thinking messages, and appends a
@@ -247,19 +227,10 @@ export function deserializeMessagesWithInterruptDetection(
       filteredToolUses,
     ) as NormalizedMessage[]
 
-    // Strip thinking/redacted_thinking content blocks from assistant messages
-    // when resuming against a 3P provider. These Anthropic-specific blocks cause
-    // 400 errors or context corruption on OpenAI-compatible providers (issue #248 finding 5).
-    const provider = getAPIProvider()
-    const isThirdPartyProvider = provider !== 'firstParty' && provider !== 'bedrock' && provider !== 'vertex' && provider !== 'foundry'
-    const thinkingStripped = isThirdPartyProvider
-      ? stripThinkingBlocks(filteredThinking)
-      : filteredThinking
-
     // Filter out assistant messages with only whitespace text content.
     // This can happen when model outputs "\n\n" before thinking, user cancels mid-stream.
     const filteredMessages = filterWhitespaceOnlyAssistantMessages(
-      thinkingStripped,
+      filteredThinking,
     ) as NormalizedMessage[]
 
     const internalState = detectTurnInterruption(filteredMessages)