QwenLM · Jerry2003826 · May 25, 2026 · May 26, 2026 · May 26, 2026 · May 26, 2026
diff --git a/packages/core/src/core/openaiContentGenerator/converter.test.ts b/packages/core/src/core/openaiContentGenerator/converter.test.ts
@@ -1812,6 +1812,89 @@ describe('OpenAIContentConverter', () => {
 
       expect(response.candidates).toEqual([]);
     });
+
+    it('maps DeepSeek prompt cache hit tokens into cached content token count', () => {
+      const response = converter.convertOpenAIResponseToGemini(
+        {
+          object: 'chat.completion',
+          id: 'chatcmpl-deepseek-cache',
+          created: 123,
+          model: 'deepseek-v4-pro',
+          choices: [],
+          usage: {
+            prompt_tokens: 21225,
+            completion_tokens: 45,
+            total_tokens: 21270,
+            prompt_cache_hit_tokens: 21120,
+          },
+        } as unknown as OpenAI.Chat.ChatCompletion,
+        requestContext,
+      );
+
+      expect(response.usageMetadata).toEqual(
+        expect.objectContaining({
+          promptTokenCount: 21225,
+          candidatesTokenCount: 45,
+          totalTokenCount: 21270,
+          cachedContentTokenCount: 21120,
+        }),
+      );
+    });
+
+    it('prefers standard cached token fields over provider-specific fallbacks', () => {
+      const response = converter.convertOpenAIResponseToGemini(
+        {
+          object: 'chat.completion',
+          id: 'chatcmpl-cache-precedence',
+          created: 123,
+          model: 'deepseek-v4-pro',
+          choices: [],
+          usage: {
+            prompt_tokens: 400,
+            completion_tokens: 20,
+            total_tokens: 420,
+            prompt_tokens_details: { cached_tokens: 100 },
+            cached_tokens: 200,
+            prompt_cache_hit_tokens: 300,
+          },
+        } as unknown as OpenAI.Chat.ChatCompletion,
+        requestContext,
+      );
+
+      expect(response.usageMetadata).toEqual(
+        expect.objectContaining({
+          cachedContentTokenCount: 100,
+        }),
+      );
+    });
+
+    it('maps DeepSeek prompt cache hit tokens from streaming chunks', () => {
+      const chunk = converter.convertOpenAIChunkToGemini(
+        {
+          object: 'chat.completion.chunk',
+          id: 'chunk-deepseek-cache',
+          created: 456,
+          choices: [],
+          model: 'deepseek-v4-pro',
+          usage: {
+            prompt_tokens: 21225,
+            completion_tokens: 45,
+            total_tokens: 21270,
+            prompt_cache_hit_tokens: 21120,
+          },
+        } as unknown as OpenAI.Chat.ChatCompletionChunk,
+        withStreamParser(),
+      );
+
+      expect(chunk.usageMetadata).toEqual(
+        expect.objectContaining({
+          promptTokenCount: 21225,
+          candidatesTokenCount: 45,
+          totalTokenCount: 21270,
+          cachedContentTokenCount: 21120,
+        }),
+      );
+    });
   });
 
   describe('OpenAI -> Gemini reasoning content', () => {

diff --git a/packages/core/src/core/openaiContentGenerator/converter.ts b/packages/core/src/core/openaiContentGenerator/converter.ts
@@ -36,6 +36,7 @@ const debugLogger = createDebugLogger('CONVERTER');
  */
 interface ExtendedCompletionUsage extends OpenAI.CompletionUsage {
   cached_tokens?: number;
+  prompt_cache_hit_tokens?: number;
 }
 
 export interface ExtendedChatCompletionAssistantMessageParam
@@ -69,6 +70,16 @@ export interface ExtendedCompletionChunkDelta
 // so it preserves catch-rate without silently suppressing legitimate chunks.
 const CUMULATIVE_DELTA_EXACT_REPEAT_MIN_LENGTH = 64;
 
+function getCachedPromptTokens(usage: OpenAI.CompletionUsage): number {
+  const extendedUsage = usage as ExtendedCompletionUsage;
+  return (
+    usage.prompt_tokens_details?.cached_tokens ??
+    extendedUsage.cached_tokens ??
+    extendedUsage.prompt_cache_hit_tokens ??
+    0
+  );
+}
+
 // Once this many bytes have been emitted without entering cumulative mode the
 // stream is almost certainly a standard incremental provider. Stop growing
 // emittedText beyond this point to bound per-stream memory and CPU. The true
@@ -1121,13 +1132,8 @@ export function convertOpenAIResponseToGemini(
     const promptTokens = usage.prompt_tokens || 0;
     const completionTokens = usage.completion_tokens || 0;
     const totalTokens = usage.total_tokens || 0;
-    // Support both formats: prompt_tokens_details.cached_tokens (OpenAI standard)
-    // and cached_tokens (some models return it at top level)
-    const extendedUsage = usage as ExtendedCompletionUsage;
-    const cachedTokens =
-      usage.prompt_tokens_details?.cached_tokens ??
-      extendedUsage.cached_tokens ??
-      0;
+    // Support OpenAI and provider-specific cache usage fields.
+    const cachedTokens = getCachedPromptTokens(usage);
     const thinkingTokens =
       usage.completion_tokens_details?.reasoning_tokens || 0;
 
@@ -1320,13 +1326,8 @@ export function convertOpenAIChunkToGemini(
     const totalTokens = usage.total_tokens || 0;
     const thinkingTokens =
       usage.completion_tokens_details?.reasoning_tokens || 0;
-    // Support both formats: prompt_tokens_details.cached_tokens (OpenAI standard)
-    // and cached_tokens (some models return it at top level)
-    const extendedUsage = usage as ExtendedCompletionUsage;
-    const cachedTokens =
-      usage.prompt_tokens_details?.cached_tokens ??
-      extendedUsage.cached_tokens ??
-      0;
+    // Support OpenAI and provider-specific cache usage fields.
+    const cachedTokens = getCachedPromptTokens(usage);
 
     // If we only have total tokens but no breakdown, estimate the split
     // Typically input is ~70% and output is ~30% for most conversations

diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.test.ts b/packages/core/src/core/openaiContentGenerator/pipeline.test.ts
@@ -424,6 +424,168 @@ describe('ContentGenerationPipeline', () => {
       );
     });
 
+    it('sorts DeepSeek tools by function name before sending the wire request', async () => {
+      mockContentGeneratorConfig.baseUrl = 'https://api.deepseek.com/v1';
+      const request: GenerateContentParameters = {
+        model: 'deepseek-v4-pro',
+        contents: [{ parts: [{ text: 'Hello' }], role: 'user' }],
+        config: {
+          tools: [
+            {
+              functionDeclarations: [
+                {
+                  name: 'placeholder',
+                  description: 'Placeholder function',
+                  parameters: { type: Type.OBJECT, properties: {} },
+                },
+              ],
+            },
+          ],
+        },
+      };
+
+      const mockMessages = [
+        { role: 'user', content: 'Hello' },
+      ] as OpenAI.Chat.ChatCompletionMessageParam[];
+      const mockTools = [
+        { type: 'function', function: { name: 'zeta' } },
+        { type: 'function', function: { name: 'alpha' } },
+        { type: 'function', function: { name: 'bravo' } },
+      ] as OpenAI.Chat.ChatCompletionTool[];
+
+      (mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue(
+        mockMessages,
+      );
+      (mockConverter.convertGeminiToolsToOpenAI as Mock).mockResolvedValue(
+        mockTools,
+      );
+      (mockConverter.convertOpenAIResponseToGemini as Mock).mockReturnValue(
+        new GenerateContentResponse(),
+      );
+      (mockClient.chat.completions.create as Mock).mockResolvedValue({
+        id: 'response-id',
+        choices: [],
+      } as unknown as OpenAI.Chat.ChatCompletion);
+
+      await pipeline.execute(request, 'test-prompt-id');
+
+      const apiCall = (mockClient.chat.completions.create as Mock).mock
+        .calls[0][0];
+      expect(
+        apiCall.tools.map(
+          (tool: OpenAI.Chat.ChatCompletionTool) => tool.function.name,
+        ),
+      ).toEqual(['alpha', 'bravo', 'zeta']);
+    });
+
+    it('preserves tool order for non-DeepSeek hostnames', async () => {
+      mockContentGeneratorConfig.baseUrl = 'https://example.test/v1';
+      const request: GenerateContentParameters = {
+        model: 'test-model',
+        contents: [{ parts: [{ text: 'Hello' }], role: 'user' }],
+        config: {
+          tools: [
+            {
+              functionDeclarations: [
+                {
+                  name: 'placeholder',
+                  description: 'Placeholder function',
+                  parameters: { type: Type.OBJECT, properties: {} },
+                },
+              ],
+            },
+          ],
+        },
+      };
+
+      const mockMessages = [
+        { role: 'user', content: 'Hello' },
+      ] as OpenAI.Chat.ChatCompletionMessageParam[];
+      const mockTools = [
+        { type: 'function', function: { name: 'zeta' } },
+        { type: 'function', function: { name: 'alpha' } },
+      ] as OpenAI.Chat.ChatCompletionTool[];
+
+      (mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue(
+        mockMessages,
+      );
+      (mockConverter.convertGeminiToolsToOpenAI as Mock).mockResolvedValue(
+        mockTools,
+      );
+      (mockConverter.convertOpenAIResponseToGemini as Mock).mockReturnValue(
+        new GenerateContentResponse(),
+      );
+      (mockClient.chat.completions.create as Mock).mockResolvedValue({
+        id: 'response-id',
+        choices: [],
+      } as unknown as OpenAI.Chat.ChatCompletion);
+
+      await pipeline.execute(request, 'test-prompt-id');
+
+      const apiCall = (mockClient.chat.completions.create as Mock).mock
+        .calls[0][0];
+      expect(
+        apiCall.tools.map(
+          (tool: OpenAI.Chat.ChatCompletionTool) => tool.function.name,
+        ),
+      ).toEqual(['zeta', 'alpha']);
+    });
+
+    it('sorts self-hosted DeepSeek tools by function name before sending the wire request', async () => {
+      mockContentGeneratorConfig.baseUrl = 'https://example.test/v1';
+      mockContentGeneratorConfig.model = 'deepseek-v4-pro';
+      const request: GenerateContentParameters = {
+        model: 'deepseek-v4-pro',
+        contents: [{ parts: [{ text: 'Hello' }], role: 'user' }],
+        config: {
+          tools: [
+            {
+              functionDeclarations: [
+                {
+                  name: 'placeholder',
+                  description: 'Placeholder function',
+                  parameters: { type: Type.OBJECT, properties: {} },
+                },
+              ],
+            },
+          ],
+        },
+      };
+
+      const mockMessages = [
+        { role: 'user', content: 'Hello' },
+      ] as OpenAI.Chat.ChatCompletionMessageParam[];
+      const mockTools = [
+        { type: 'function', function: { name: 'zeta' } },
+        { type: 'function', function: { name: 'alpha' } },
+        { type: 'function', function: { name: 'bravo' } },
+      ] as OpenAI.Chat.ChatCompletionTool[];
+
+      (mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue(
+        mockMessages,
+      );
+      (mockConverter.convertGeminiToolsToOpenAI as Mock).mockResolvedValue(
+        mockTools,
+      );
+      (mockConverter.convertOpenAIResponseToGemini as Mock).mockReturnValue(
+        new GenerateContentResponse(),
+      );
+      (mockClient.chat.completions.create as Mock).mockResolvedValue({
+        id: 'response-id',
+        choices: [],
+      } as unknown as OpenAI.Chat.ChatCompletion);
+
+      await pipeline.execute(request, 'test-prompt-id');
+
+      const apiCall = (mockClient.chat.completions.create as Mock).mock
+        .calls[0][0];
+      expect(
+        apiCall.tools.map(
+          (tool: OpenAI.Chat.ChatCompletionTool) => tool.function.name,
+        ),
+      ).toEqual(['alpha', 'bravo', 'zeta']);
+    });
+
     it('should skip empty tools array in request', async () => {
       // Arrange — tools: [] should NOT be included in the API request
       const request: GenerateContentParameters = {

diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.ts b/packages/core/src/core/openaiContentGenerator/pipeline.ts
@@ -11,14 +11,34 @@ import {
 } from '@google/genai';
 import type { ContentGeneratorConfig } from '../contentGenerator.js';
 import { OpenAIContentConverter } from './converter.js';
-import { isDeepSeekHostname } from './provider/deepseek.js';
+import { isDeepSeekHostname, isDeepSeekProvider } from './provider/deepseek.js';
 import { openaiRequestCaptureContext } from './requestCaptureContext.js';
 import { StreamingToolCallParser } from './streamingToolCallParser.js';
 import { TaggedThinkingParser } from './taggedThinkingParser.js';
 import type { PipelineConfig, RequestContext } from './types.js';
 import { redactProxyError } from '../../utils/runtimeFetchOptions.js';
 import { runtimeDiagnostics } from '../../utils/runtimeDiagnostics.js';
 
+function compareStableStrings(left: string, right: string): number {
+  if (left < right) return -1;
+  if (left > right) return 1;
+  return 0;
+}
+
+function getToolSortKey(tool: OpenAI.Chat.ChatCompletionTool): string {
-function getToolSortKey(tool: OpenAI.Chat.ChatCompletionTool): string {
+function getToolSortKey(tool: OpenAI.Chat.ChatCompletionTool): string {
+  return tool.function.name;
+}
-function getToolSortKey(tool: OpenAI.Chat.ChatCompletionTool): string {
+function getToolSortKey(tool: OpenAI.Chat.ChatCompletionTool): string {
+  return tool.function.name;
+}
+  return tool.function.name;
+}
+
+function sortToolsForCacheStableRequest(
+  request: OpenAI.Chat.ChatCompletionCreateParams,
+): void {
+  if (!request.tools || request.tools.length < 2) return;
+
+  request.tools = [...request.tools].sort((left, right) =>
+    compareStableStrings(getToolSortKey(left), getToolSortKey(right)),
+  );
+}
+
 /**
  * Error thrown when the API returns an error embedded as stream content
  * instead of a proper HTTP error. Some providers (e.g., certain OpenAI-compatible
@@ -367,6 +387,16 @@ export class ContentGenerationPipeline {
       }
     }
 
+    // DeepSeek's KV cache is prefix-exact: a different tool order changes the
+    // serialized prompt prefix even when the tool set and schemas are identical.
+    // Gate on broad provider detection because cache-prefix stability follows
+    // the DeepSeek model/protocol even for self-hosted deployments. The
+    // narrower hostname gate above is only for DeepSeek's official V4
+    // `thinking` wire shape, which self-hosted servers may reject.
+    if (isDeepSeekProvider(this.contentGeneratorConfig)) {
+      sortToolsForCacheStableRequest(providerRequest);
+    }
+
     return providerRequest;
   }