fix: yield diagnostic event drains (#82937)

galiniliev · clawsweeper[bot] · web-flow · commit 5d799c2d2052 · 2026-05-20T02:55:17.000Z
Summary: - The branch caps async diagnostic drains at 100 events per turn, adds pending/full-drain diagnostic helpers, ... rminal diagnostics to inspect pending events, and adds regression coverage plus changelog/baseline updates. - Reproducibility: yes. from source inspection. Current main drains the entire async diagnostic queue in one s ... ck, and the PR body supplies a focused 250-event after-fix probe showing 100/200/250 delivery across turns. Automerge notes: - PR branch already contained follow-up commit before automerge: fix: yield diagnostic event drains Validation: - ClawSweeper review passed for head 9561093. - Required merge gates passed before the squash merge. Prepared head SHA: 9561093 Review: #82937 (comment) Co-authored-by: Galin Iliev <galini@microsoft.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
 
 - Agents/code mode: spell out the `exec` tool's JavaScript/TypeScript, no Node module, and catalog-bridge constraints in model-visible schema text so agents can use enabled tools without trial-and-error. (#84269) Thanks @Kaspre.
 - Codex: give `image_generate` dynamic-tool calls a 120s default watchdog when no per-call or configured image timeout is set, so image generation no longer falls back to the generic 30s bridge timeout. (#84254) Thanks @moritzmmayerhofer.
+- Codex: avoid duplicate dynamic tool terminal diagnostics while large diagnostic backlogs drain without blocking tool responses. (#82937) Thanks @galiniliev.
 - CLI/message: include a stable top-level `messageId` in `openclaw message --json` output when channel sends return one. (#84191) Thanks @100menotu001.
 - Gateway/agents: use an agent's `identity.name` in Gateway agent summaries when `agents.list[].name` is unset, so configured agent labels remain visible in clients. (#84355; refs #57835) Thanks @luoyanglang.
 - Plugins/hooks: apply a default 30-second timeout to `before_compaction` and `after_compaction` hooks so a hung plugin handler no longer blocks compaction completion. (#84153)
diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256
@@ -1,2 +1,2 @@
-474c461709084ddd4014112d891c64abf3e062a417dbffae82be1cf54206283b  plugin-sdk-api-baseline.json
-cb7ad8a96c541d1ed7295c4bde6fb6a679e5d3481ed66778610ef897a3152484  plugin-sdk-api-baseline.jsonl
+6468950bae79f48709683957c5b140f634425f02f292bc5981e12c6565044b48  plugin-sdk-api-baseline.json
+2b329a3747a80498d1bc974a64d56a637bde6d2e6f7415f82cdcaeebb8f703af  plugin-sdk-api-baseline.jsonl
diff --git a/extensions/codex/src/app-server/run-attempt.test.ts b/extensions/codex/src/app-server/run-attempt.test.ts
@@ -14,9 +14,11 @@ import {
   type EmbeddedRunAttemptParams,
 } from "openclaw/plugin-sdk/agent-harness-runtime";
 import {
+  emitDiagnosticEvent,
   emitTrustedDiagnosticEvent,
   onInternalDiagnosticEvent,
   resetDiagnosticEventsForTest,
+  waitForDiagnosticEventsDrained,
   type DiagnosticEventPayload,
 } from "openclaw/plugin-sdk/diagnostic-runtime";
 import {
@@ -80,7 +82,19 @@ type RunCodexAppServerAttemptOptions = NonNullable<
 >;
 
 function flushDiagnosticEvents() {
-  return new Promise<void>((resolve) => setImmediate(resolve));
+  return waitForDiagnosticEventsDrained();
+}
+
+function emitAsyncDiagnosticBacklog(count: number): void {
+  for (let index = 0; index < count; index += 1) {
+    emitDiagnosticEvent({
+      type: "model.call.started",
+      runId: `backlog-run-${index}`,
+      callId: `backlog-call-${index}`,
+      provider: "openai",
+      model: "gpt-5.4",
+    });
+  }
 }
 
 function activeDiagnosticToolKeys(events: DiagnosticEventPayload[]): Set<string> {
@@ -2521,6 +2535,7 @@ describe("runCodexAppServerAttempt", () => {
 
     const run = runCodexAppServerAttempt(params);
     await harness.waitForMethod("thread/start");
+    emitAsyncDiagnosticBacklog(150);
 
     const toolResult = (await harness.handleServerRequest({
       id: "request-echo-error-tool",
diff --git a/extensions/codex/src/app-server/run-attempt.ts b/extensions/codex/src/app-server/run-attempt.ts
@@ -51,6 +51,7 @@ import {
 import { markAuthProfileBlockedUntil, resolveAgentDir } from "openclaw/plugin-sdk/agent-runtime";
 import {
   emitTrustedDiagnosticEvent,
+  hasPendingInternalDiagnosticEvent,
   onInternalDiagnosticEvent,
   type DiagnosticEventPayload,
 } from "openclaw/plugin-sdk/diagnostic-runtime";
@@ -2183,8 +2184,15 @@ export async function runCodexAppServerAttempt(
             },
           });
         }
-        await waitForDiagnosticEventDrain();
-        if (!terminalDiagnosticObserved) {
+        if (
+          !terminalDiagnosticObserved &&
+          !hasPendingDynamicToolTerminalDiagnostic({
+            call,
+            runId: params.runId,
+            sessionId: params.sessionId,
+            sessionKey: params.sessionKey,
+          })
+        ) {
           emitDynamicToolTerminalDiagnostic({
             response,
             call,
@@ -2196,8 +2204,15 @@ export async function runCodexAppServerAttempt(
         }
         return protocolResponse as JsonValue;
       } catch (error) {
-        await waitForDiagnosticEventDrain();
-        if (!terminalDiagnosticObserved) {
+        if (
+          !terminalDiagnosticObserved &&
+          !hasPendingDynamicToolTerminalDiagnostic({
+            call,
+            runId: params.runId,
+            sessionId: params.sessionId,
+            sessionKey: params.sessionKey,
+          })
+        ) {
           emitDynamicToolErrorDiagnostic({
             call,
             runId: params.runId,
@@ -2948,10 +2963,6 @@ function toCodexDynamicToolProtocolResponse(
   };
 }
 
-function waitForDiagnosticEventDrain(): Promise<void> {
-  return new Promise((resolve) => setImmediate(resolve));
-}
-
 type TerminalToolExecutionDiagnostic = Extract<
   DiagnosticEventPayload,
   { type: "tool.execution.blocked" | "tool.execution.completed" | "tool.execution.error" }
@@ -2996,6 +3007,26 @@ function isMatchingDynamicToolTerminalDiagnostic(params: {
   );
 }
 
+function hasPendingDynamicToolTerminalDiagnostic(params: {
+  call: CodexDynamicToolCallParams;
+  runId?: string;
+  sessionId?: string;
+  sessionKey?: string;
+}): boolean {
+  return hasPendingInternalDiagnosticEvent((event) => {
+    if (!isDynamicToolTerminalDiagnosticEvent(event)) {
+      return false;
+    }
+    return isMatchingDynamicToolTerminalDiagnostic({
+      event,
+      call: params.call,
+      runId: params.runId,
+      sessionId: params.sessionId,
+      sessionKey: params.sessionKey,
+    });
+  });
+}
+
 function resolveDynamicToolCallTimeoutMs(params: {
   call: CodexDynamicToolCallParams;
   config: EmbeddedRunAttemptParams["config"];
diff --git a/src/infra/diagnostic-events.test.ts b/src/infra/diagnostic-events.test.ts
@@ -3,11 +3,14 @@ import {
   emitDiagnosticEvent,
   emitTrustedDiagnosticEvent,
   formatDiagnosticTraceparentForPropagation,
+  hasPendingInternalDiagnosticEvent,
   isDiagnosticsEnabled,
   onInternalDiagnosticEvent,
   onDiagnosticEvent,
   resetDiagnosticEventsForTest,
   setDiagnosticsEnabledForProcess,
+  waitForDiagnosticEventsDrained,
+  type DiagnosticEventPayload,
 } from "./diagnostic-events.js";
 import {
   createDiagnosticTraceContext,
@@ -415,6 +418,224 @@ describe("diagnostic-events", () => {
     expect(events).toEqual(["tool.execution.started", "model.call.started"]);
   });
 
+  it("yields between large high-frequency diagnostic event bursts", async () => {
+    const events: string[] = [];
+    onDiagnosticEvent((event) => {
+      events.push(event.type);
+    });
+
+    for (let index = 0; index < 250; index += 1) {
+      emitDiagnosticEvent({
+        type: "model.call.started",
+        runId: `run-${index}`,
+        callId: `call-${index}`,
+        provider: "openai",
+        model: "gpt-5.4",
+      });
+    }
+
+    expect(events).toStrictEqual([]);
+    await new Promise<void>((resolve) => setImmediate(resolve));
+    expect(events).toHaveLength(100);
+    await new Promise<void>((resolve) => setImmediate(resolve));
+    expect(events).toHaveLength(200);
+    await new Promise<void>((resolve) => setImmediate(resolve));
+    expect(events).toHaveLength(250);
+  });
+
+  it("waits for all queued high-frequency diagnostic events to drain", async () => {
+    const events: string[] = [];
+    onDiagnosticEvent((event) => {
+      events.push(event.type);
+    });
+
+    for (let index = 0; index < 250; index += 1) {
+      emitDiagnosticEvent({
+        type: "model.call.started",
+        runId: `run-${index}`,
+        callId: `call-${index}`,
+        provider: "openai",
+        model: "gpt-5.4",
+      });
+    }
+
+    await waitForDiagnosticEventsDrained();
+
+    expect(events).toHaveLength(250);
+  });
+
+  it("reports pending async diagnostic events before they drain", async () => {
+    emitTrustedDiagnosticEvent({
+      type: "tool.execution.error",
+      runId: "run-pending",
+      toolName: "exec",
+      toolCallId: "call-pending",
+      durationMs: 1,
+      errorCategory: "test",
+    });
+
+    expect(
+      hasPendingInternalDiagnosticEvent(
+        (event, metadata) =>
+          metadata.trusted &&
+          event.type === "tool.execution.error" &&
+          event.toolCallId === "call-pending",
+      ),
+    ).toBe(true);
+
+    await waitForDiagnosticEventsDrained();
+
+    expect(
+      hasPendingInternalDiagnosticEvent((event) => event.type === "tool.execution.error"),
+    ).toBe(false);
+  });
+
+  it("passes immutable pending diagnostic copies to queue inspectors", async () => {
+    const events: DiagnosticEventPayload[] = [];
+    onInternalDiagnosticEvent((event) => {
+      events.push(event);
+    });
+
+    emitTrustedDiagnosticEvent({
+      type: "tool.execution.error",
+      runId: "run-immutable",
+      toolName: "exec",
+      toolCallId: "call-immutable",
+      durationMs: 1,
+      errorCategory: "test",
+    });
+
+    let mutationErrors = 0;
+    expect(
+      hasPendingInternalDiagnosticEvent((event, metadata) => {
+        try {
+          (event as { type: string }).type = "model.usage";
+        } catch {
+          mutationErrors += 1;
+        }
+        try {
+          (metadata as { trusted: boolean }).trusted = false;
+        } catch {
+          mutationErrors += 1;
+        }
+        return (
+          metadata.trusted &&
+          event.type === "tool.execution.error" &&
+          event.toolCallId === "call-immutable"
+        );
+      }),
+    ).toBe(true);
+    expect(mutationErrors).toBe(2);
+
+    await waitForDiagnosticEventsDrained();
+
+    expect(events).toMatchObject([
+      {
+        type: "tool.execution.error",
+        toolCallId: "call-immutable",
+      },
+    ]);
+  });
+
+  it("skips uncloneable pending diagnostics during queue inspection", async () => {
+    emitDiagnosticEvent({
+      type: "model.call.started",
+      runId: "run-uncloneable",
+      callId: "call-uncloneable",
+      provider: "openai",
+      model: "gpt-5.4",
+      badValue: () => undefined,
+    } as never);
+    emitTrustedDiagnosticEvent({
+      type: "tool.execution.error",
+      runId: "run-cloneable",
+      toolName: "exec",
+      toolCallId: "call-cloneable",
+      durationMs: 1,
+      errorCategory: "test",
+    });
+
+    expect(
+      hasPendingInternalDiagnosticEvent(
+        (event, metadata) =>
+          metadata.trusted &&
+          event.type === "tool.execution.error" &&
+          event.toolCallId === "call-cloneable",
+      ),
+    ).toBe(true);
+  });
+
+  it("preserves trusted terminal tool diagnostics when the async queue is full", async () => {
+    const events: DiagnosticEventPayload[] = [];
+    onInternalDiagnosticEvent((event) => {
+      events.push(event);
+    });
+
+    emitTrustedDiagnosticEvent({
+      type: "tool.execution.completed",
+      runId: "run-saturation-first",
+      toolName: "exec",
+      toolCallId: "call-saturation-first",
+      durationMs: 1,
+    });
+
+    for (let index = 0; index < 9_999; index += 1) {
+      emitDiagnosticEvent({
+        type: "model.call.started",
+        runId: `saturation-run-${index}`,
+        callId: `saturation-call-${index}`,
+        provider: "openai",
+        model: "gpt-5.4",
+      });
+    }
+
+    emitTrustedDiagnosticEvent({
+      type: "tool.execution.error",
+      runId: "run-saturation-second",
+      toolName: "exec",
+      toolCallId: "call-saturation-second",
+      durationMs: 1,
+      errorCategory: "test",
+    });
+
+    expect(
+      hasPendingInternalDiagnosticEvent(
+        (event, metadata) =>
+          metadata.trusted &&
+          event.type === "tool.execution.error" &&
+          event.toolCallId === "call-saturation-second",
+      ),
+    ).toBe(true);
+
+    await waitForDiagnosticEventsDrained();
+
+    expect(
+      events
+        .filter(
+          (
+            event,
+          ): event is Extract<
+            DiagnosticEventPayload,
+            { type: "tool.execution.completed" | "tool.execution.error" }
+          > => event.type === "tool.execution.completed" || event.type === "tool.execution.error",
+        )
+        .map((event) => ({
+          type: event.type,
+          toolCallId: event.toolCallId,
+        })),
+    ).toEqual([
+      {
+        type: "tool.execution.completed",
+        toolCallId: "call-saturation-first",
+      },
+      {
+        type: "tool.execution.error",
+        toolCallId: "call-saturation-second",
+      },
+    ]);
+    expect(events.filter((event) => event.type === "model.call.started")).toHaveLength(9_998);
+  });
+
   it("keeps log records off the public diagnostic event stream", async () => {
     const publicEvents: string[] = [];
     const internalEvents: string[] = [];
diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts
diff --git a/src/plugin-sdk/diagnostic-runtime.ts b/src/plugin-sdk/diagnostic-runtime.ts