fix(ollama): strip inline kimi cloud reasoning leak (#86515)

clawsweeper[bot] · jason-allen-oneal · osolmaz · web-flow · commit c4bce00727ec · 2026-05-25T15:16:42.000Z
Summary: - This PR adds an Ollama Kimi-cloud visible-content sanitizer for streamed and final assistant replies, updates stream handling and regression tests, and adds a changelog entry. - PR surface: Source +183, Tests +473, Docs +1. Total +657 across 7 files. - Reproducibility: yes. from source and the linked report: current main appends Ollama `message.content` direc ... payload described in the issue would be shown. I did not run a live vendor repro in this read-only review. Automerge notes: - PR branch already contained follow-up commit before automerge: fix(ollama): sanitize kimi inline reasoning in stream events - PR branch already contained follow-up commit before automerge: fix(ollama): buffer kimi cloud stream reasoning - PR branch already contained follow-up commit before automerge: fix(ollama): cover kimi inline boundary variants - PR branch already contained follow-up commit before automerge: fix(ollama): preserve text start partial state - PR branch already contained follow-up commit before automerge: fix(ollama): bound kimi stream sanitizer hold - PR branch already contained follow-up commit before automerge: fix(ollama): keep kimi sanitizer deltas append-only Validation: - ClawSweeper review passed for head b709229. - Required merge gates passed before the squash merge. Prepared head SHA: b709229 Review: #86515 (comment) Co-authored-by: Jason O'Neal <jason.allen.oneal@gmail.com> Co-authored-by: Onur Solmaz <2453968+osolmaz@users.noreply.github.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: osolmaz Co-authored-by: osolmaz <2453968+osolmaz@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -75,6 +75,7 @@ Docs: https://docs.openclaw.ai
 - Codex harness: make subscription usage-limit errors without reset times explain that OpenClaw cannot determine the reset and point users to wait until Codex is available, use another Codex account, or switch to another configured model/provider. Thanks @amknight.
 - Google Vertex: support production ADC modes such as Workload Identity Federation, service-account credentials, and metadata-server ADC for the native Vertex transport. (#83971) Thanks @damianFelixPago.
 - Telegram: route normal `[telegram][diag]` polling diagnostics through `runtime.log` while keeping non-diag warnings and persistence failures on `runtime.error`, so healthy polling startup no longer looks like an error. Fixes #82957. (#82958) Thanks @galiniliev.
+- Providers/Ollama: strip inline Kimi cloud reasoning prefixes from streamed and final visible replies while keeping ordinary Kimi answers append-only. (#86286) Thanks @jason-allen-oneal.
 
 - Gateway: require Talk secret authority before setup-code handoff can include Talk secrets. (#85690) Thanks @ngutman.
 - Agents: keep fallback error reporting scoped to the active model candidate so stale prior-provider quota/auth text is not reported for later fallback attempts. (#86134) thanks @zhangguiping-xydt.
diff --git a/extensions/ollama/src/model-behavior.ts b/extensions/ollama/src/model-behavior.ts
@@ -0,0 +1,5 @@
+import { isOllamaCloudKimiModelRef } from "./sanitizers/kimi-inline-reasoning.js";
+
+export function shouldWrapOllamaCompatMoonshotThinking(modelId: string): boolean {
+  return isOllamaCloudKimiModelRef(modelId);
+}
diff --git a/extensions/ollama/src/sanitizers/kimi-inline-reasoning.ts b/extensions/ollama/src/sanitizers/kimi-inline-reasoning.ts
@@ -0,0 +1,74 @@
+import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coerce-runtime";
+import type {
+  OllamaVisibleContentSanitizer,
+  OllamaVisibleContentStreamResolution,
+} from "./visible-content-contract.js";
+
+const INLINE_REASONING_MIN_PREFIX_CHARS = 80;
+const INLINE_REASONING_MAX_PENDING_CHARS = 512;
+const INLINE_REASONING_BOUNDARY_RE = /(^|\s)\uFE0F\s*/u;
+
+type InlineReasoningVisibleTextResolution =
+  | { kind: "visible"; text: string; bypassInlineReasoning?: boolean }
+  | { kind: "pending" };
+
+export function isOllamaCloudKimiModelRef(modelId: string): boolean {
+  const normalizedModelId = normalizeLowercaseStringOrEmpty(modelId);
+  const slashIndex = normalizedModelId.indexOf("/");
+  const normalizedWireModelId =
+    slashIndex === -1 ? normalizedModelId : normalizedModelId.slice(slashIndex + 1);
+  return normalizedWireModelId.startsWith("kimi-k") && normalizedWireModelId.includes(":cloud");
+}
+
+function resolveInlineReasoningVisibleText(params: {
+  text: string;
+  final: boolean;
+}): InlineReasoningVisibleTextResolution {
+  const match = INLINE_REASONING_BOUNDARY_RE.exec(params.text);
+  if (!match) {
+    if (!params.final && params.text.length <= INLINE_REASONING_MAX_PENDING_CHARS) {
+      return { kind: "pending" };
+    }
+    return {
+      kind: "visible",
+      text: params.text,
+      bypassInlineReasoning:
+        !params.final && params.text.length > INLINE_REASONING_MAX_PENDING_CHARS,
+    };
+  }
+
+  const boundaryStartIndex = match.index + match[1].length;
+  const boundaryEndIndex = match.index + match[0].length;
+  const prefix = params.text.slice(0, boundaryStartIndex).trim();
+  const answer = params.text.slice(boundaryEndIndex).trim();
+  if (prefix.length >= INLINE_REASONING_MIN_PREFIX_CHARS) {
+    return { kind: "visible", text: answer };
+  }
+
+  return params.final ? { kind: "visible", text: params.text } : { kind: "pending" };
+}
+
+export function createKimiInlineReasoningSanitizer(): OllamaVisibleContentSanitizer {
+  let bypassInlineReasoning = false;
+
+  return {
+    resolveStreamText(params): OllamaVisibleContentStreamResolution {
+      if (bypassInlineReasoning) {
+        return { kind: "visible", text: params.text };
+      }
+
+      const resolution = resolveInlineReasoningVisibleText(params);
+      if (resolution.kind === "pending") {
+        return resolution;
+      }
+      if (resolution.bypassInlineReasoning) {
+        bypassInlineReasoning = true;
+      }
+      return { kind: "visible", text: resolution.text };
+    },
+    sanitizeFinalText(text) {
+      const resolution = resolveInlineReasoningVisibleText({ text, final: true });
+      return resolution.kind === "visible" ? resolution.text : text;
+    },
+  };
+}
diff --git a/extensions/ollama/src/sanitizers/visible-content-contract.ts b/extensions/ollama/src/sanitizers/visible-content-contract.ts
@@ -0,0 +1,8 @@
+export type OllamaVisibleContentStreamResolution =
+  | { kind: "visible"; text: string }
+  | { kind: "pending" };
+
+export type OllamaVisibleContentSanitizer = {
+  resolveStreamText(params: { text: string; final: boolean }): OllamaVisibleContentStreamResolution;
+  sanitizeFinalText(text: string): string;
+};
diff --git a/extensions/ollama/src/sanitizers/visible-content.ts b/extensions/ollama/src/sanitizers/visible-content.ts
@@ -0,0 +1,30 @@
+import {
+  createKimiInlineReasoningSanitizer,
+  isOllamaCloudKimiModelRef,
+} from "./kimi-inline-reasoning.js";
+import type { OllamaVisibleContentSanitizer } from "./visible-content-contract.js";
+
+const noopVisibleContentSanitizer: OllamaVisibleContentSanitizer = {
+  resolveStreamText(params) {
+    return { kind: "visible", text: params.text };
+  },
+  sanitizeFinalText(text) {
+    return text;
+  },
+};
+
+export function createOllamaVisibleContentSanitizer(
+  modelId: string,
+): OllamaVisibleContentSanitizer {
+  if (isOllamaCloudKimiModelRef(modelId)) {
+    return createKimiInlineReasoningSanitizer();
+  }
+  return noopVisibleContentSanitizer;
+}
+
+export function sanitizeOllamaFinalVisibleContent(params: {
+  modelId: string;
+  text: string;
+}): string {
+  return createOllamaVisibleContentSanitizer(params.modelId).sanitizeFinalText(params.text);
+}
diff --git a/extensions/ollama/src/stream-runtime.test.ts b/extensions/ollama/src/stream-runtime.test.ts
diff --git a/extensions/ollama/src/stream.ts b/extensions/ollama/src/stream.ts