Skip to content

Commit cead2ea

Browse files
authored
feat(diagnostics): emit tool execution events
Emit structured diagnostic events for tool execution lifecycle, with trace context, safe parameter summaries, and non-message error metadata.
1 parent 447105a commit cead2ea

7 files changed

Lines changed: 261 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
1313
- Diagnostics/OTEL: attach diagnostic trace context to exported OTEL logs so log records can correlate with future spans without adding retained process state. Thanks @vincentkoc.
1414
- Diagnostics/OTEL: pass immutable per-run diagnostic trace context through agent and tool hook contexts, and parent exported diagnostic spans from validated context without retaining global trace state. Thanks @vincentkoc.
1515
- Diagnostics/OTEL: make exporter startup restart-safe so config reloads do not retain stale SDKs, log transports, or diagnostic event listeners. Thanks @vincentkoc.
16+
- Diagnostics: emit structured tool execution diagnostic events with trace context, timing, and redacted error metadata. Thanks @vincentkoc.
1617
- Control UI/chat: add a Steer action on queued messages so a browser follow-up can be injected into the active run without retyping it.
1718
- Control UI/Talk: add browser WebRTC realtime voice sessions backed by OpenAI Realtime, with Gateway-minted ephemeral client secrets and `openclaw_agent_consult` handoff to the full OpenClaw agent.
1819
- Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
c0f788d1895ced2ffdad9f82e6afc592171e6651c61c0fc5083f0040437cda6d plugin-sdk-api-baseline.json
2-
70e320157331080b98f9c2acae58e89ad1dc70b48adad265225a7eb76b6ac29f plugin-sdk-api-baseline.jsonl
1+
c57d43f93ec2930b099dd5c5777f201f1bdd1ab432eeb4049b6e62ff23fe8112 plugin-sdk-api-baseline.json
2+
ece1ea689914c4070b587551e86c6bed6598feba90457ab489222e168b2d9298 plugin-sdk-api-baseline.jsonl

extensions/diagnostics-otel/src/service.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
812812
recordHeartbeat(evt);
813813
return;
814814
case "tool.loop":
815+
case "tool.execution.started":
816+
case "tool.execution.completed":
817+
case "tool.execution.error":
815818
case "diagnostic.memory.sample":
816819
case "diagnostic.memory.pressure":
817820
case "payload.large":

src/agents/pi-tools.before-tool-call.e2e.test.ts

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
22
import {
33
onDiagnosticEvent,
44
resetDiagnosticEventsForTest,
5+
type DiagnosticEventPayload,
56
type DiagnosticToolLoopEvent,
67
} from "../infra/diagnostic-events.js";
78
import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
@@ -85,6 +86,22 @@ describe("before_tool_call loop detection behavior", () => {
8586
}
8687
}
8788

89+
async function withToolExecutionEvents(
90+
run: (emitted: DiagnosticEventPayload[]) => Promise<void>,
91+
) {
92+
const emitted: DiagnosticEventPayload[] = [];
93+
const stop = onDiagnosticEvent((evt) => {
94+
if (evt.type.startsWith("tool.execution.")) {
95+
emitted.push(evt);
96+
}
97+
});
98+
try {
99+
await run(emitted);
100+
} finally {
101+
stop();
102+
}
103+
}
104+
88105
function createPingPongTools(options?: { withProgress?: boolean }) {
89106
const readExecute = options?.withProgress
90107
? vi.fn().mockImplementation(async (toolCallId: string) => ({
@@ -331,6 +348,115 @@ describe("before_tool_call loop detection behavior", () => {
331348
});
332349
});
333350
});
351+
352+
it("emits diagnostic tool execution events without parameter values", async () => {
353+
const trace = {
354+
traceId: "4bf92f3577b34da6a3ce929d0e0e4736",
355+
spanId: "00f067aa0ba902b7",
356+
traceFlags: "01",
357+
};
358+
const execute = vi.fn().mockResolvedValue({
359+
content: [{ type: "text", text: "ok" }],
360+
});
361+
const tool = wrapToolWithBeforeToolCallHook({ name: "bash", execute } as any, {
362+
agentId: "main",
363+
sessionKey: "session-key",
364+
sessionId: "session-id",
365+
runId: "run-1",
366+
trace,
367+
loopDetection: { enabled: false },
368+
});
369+
370+
await withToolExecutionEvents(async (emitted) => {
371+
await tool.execute(
372+
"tool-call-1",
373+
{ command: "pwd", token: "sk-1234567890abcdef1234567890abcdef" },
374+
undefined,
375+
undefined,
376+
);
377+
378+
expect(emitted.map((evt) => evt.type)).toEqual([
379+
"tool.execution.started",
380+
"tool.execution.completed",
381+
]);
382+
expect(emitted[0]).toMatchObject({
383+
type: "tool.execution.started",
384+
runId: "run-1",
385+
sessionKey: "session-key",
386+
sessionId: "session-id",
387+
toolName: "exec",
388+
toolCallId: "tool-call-1",
389+
paramsSummary: {
390+
kind: "object",
391+
},
392+
trace,
393+
});
394+
expect(emitted[0]?.trace).not.toBe(trace);
395+
expect(Object.isFrozen(emitted[0]?.trace)).toBe(true);
396+
expect(emitted[1]).toMatchObject({
397+
type: "tool.execution.completed",
398+
durationMs: expect.any(Number),
399+
});
400+
expect(JSON.stringify(emitted)).not.toContain("sk-1234567890abcdef1234567890abcdef");
401+
expect(JSON.stringify(emitted)).not.toContain("pwd");
402+
});
403+
});
404+
405+
it("emits diagnostic tool execution error events with redacted errors", async () => {
406+
const execute = vi
407+
.fn()
408+
.mockRejectedValue(new Error("failed with key sk-1234567890abcdef1234567890abcdef"));
409+
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
410+
agentId: "main",
411+
sessionKey: "session-key",
412+
loopDetection: { enabled: false },
413+
});
414+
415+
await withToolExecutionEvents(async (emitted) => {
416+
await expect(
417+
tool.execute("tool-call-error", { path: "/tmp/file" }, undefined, undefined),
418+
).rejects.toThrow("failed with key");
419+
420+
expect(emitted.map((evt) => evt.type)).toEqual([
421+
"tool.execution.started",
422+
"tool.execution.error",
423+
]);
424+
expect(emitted[1]).toMatchObject({
425+
type: "tool.execution.error",
426+
toolName: "read",
427+
toolCallId: "tool-call-error",
428+
durationMs: expect.any(Number),
429+
errorCategory: "Error",
430+
});
431+
expect(JSON.stringify(emitted[1])).not.toContain("sk-1234567890abcdef1234567890abcdef");
432+
});
433+
});
434+
435+
it("summarizes hostile object params without enumerating keys", async () => {
436+
const execute = vi.fn().mockResolvedValue({ content: [{ type: "text", text: "ok" }] });
437+
const tool = wrapToolWithBeforeToolCallHook({ name: "bash", execute } as any, {
438+
agentId: "main",
439+
sessionKey: "session-key",
440+
loopDetection: { enabled: false },
441+
});
442+
const params = new Proxy(
443+
{},
444+
{
445+
ownKeys() {
446+
throw new Error("should not enumerate params");
447+
},
448+
},
449+
);
450+
451+
await withToolExecutionEvents(async (emitted) => {
452+
await tool.execute("tool-call-proxy", params, undefined, undefined);
453+
454+
expect(emitted[0]).toMatchObject({
455+
type: "tool.execution.started",
456+
paramsSummary: { kind: "object" },
457+
});
458+
});
459+
});
334460
});
335461

336462
describe("before_tool_call requireApproval handling", () => {

src/agents/pi-tools.before-tool-call.ts

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
import type { ToolLoopDetectionConfig } from "../config/types.tools.js";
2+
import {
3+
emitDiagnosticEvent,
4+
type DiagnosticToolParamsSummary,
5+
} from "../infra/diagnostic-events.js";
26
import {
37
freezeDiagnosticTraceContext,
48
type DiagnosticTraceContext,
@@ -81,6 +85,57 @@ function unwrapErrorCause(err: unknown): unknown {
8185
return err;
8286
}
8387

88+
function summarizeToolParams(params: unknown): DiagnosticToolParamsSummary {
89+
if (params === null) {
90+
return { kind: "null" };
91+
}
92+
if (params === undefined) {
93+
return { kind: "undefined" };
94+
}
95+
if (Array.isArray(params)) {
96+
return { kind: "array", length: params.length };
97+
}
98+
if (typeof params === "object") {
99+
return { kind: "object" };
100+
}
101+
if (typeof params === "string") {
102+
return { kind: "string", length: params.length };
103+
}
104+
if (typeof params === "number") {
105+
return { kind: "number" };
106+
}
107+
if (typeof params === "boolean") {
108+
return { kind: "boolean" };
109+
}
110+
return { kind: "other" };
111+
}
112+
113+
function errorCategory(err: unknown): string {
114+
if (err instanceof Error && err.name.trim()) {
115+
return err.name;
116+
}
117+
return typeof err;
118+
}
119+
120+
function diagnosticErrorCode(err: unknown): string | undefined {
121+
if (!err || typeof err !== "object") {
122+
return undefined;
123+
}
124+
const candidate = err as { code?: unknown; status?: unknown; statusCode?: unknown };
125+
const code = candidate.code ?? candidate.status ?? candidate.statusCode;
126+
if (typeof code === "number" && Number.isFinite(code)) {
127+
return String(code);
128+
}
129+
if (typeof code !== "string") {
130+
return undefined;
131+
}
132+
const trimmed = code.trim();
133+
if (!trimmed) {
134+
return undefined;
135+
}
136+
return trimmed.slice(0, 64);
137+
}
138+
84139
function shouldEmitLoopWarning(state: SessionState, warningKey: string, count: number): boolean {
85140
if (!state.toolLoopWarningBuckets) {
86141
state.toolLoopWarningBuckets = new Map();
@@ -415,8 +470,27 @@ export function wrapToolWithBeforeToolCallHook(
415470
}
416471
}
417472
const normalizedToolName = normalizeToolName(toolName || "tool");
473+
const eventBase = {
474+
...(ctx?.runId && { runId: ctx.runId }),
475+
...(ctx?.sessionKey && { sessionKey: ctx.sessionKey }),
476+
...(ctx?.sessionId && { sessionId: ctx.sessionId }),
477+
...(ctx?.trace && { trace: freezeDiagnosticTraceContext(ctx.trace) }),
478+
toolName: normalizedToolName,
479+
...(toolCallId && { toolCallId }),
480+
paramsSummary: summarizeToolParams(outcome.params),
481+
};
482+
emitDiagnosticEvent({
483+
type: "tool.execution.started",
484+
...eventBase,
485+
});
486+
const startedAt = Date.now();
418487
try {
419488
const result = await execute(toolCallId, outcome.params, signal, onUpdate);
489+
emitDiagnosticEvent({
490+
type: "tool.execution.completed",
491+
...eventBase,
492+
durationMs: Date.now() - startedAt,
493+
});
420494
await recordLoopOutcome({
421495
ctx,
422496
toolName: normalizedToolName,
@@ -426,6 +500,15 @@ export function wrapToolWithBeforeToolCallHook(
426500
});
427501
return result;
428502
} catch (err) {
503+
const cause = unwrapErrorCause(err);
504+
const errorCode = diagnosticErrorCode(cause);
505+
emitDiagnosticEvent({
506+
type: "tool.execution.error",
507+
...eventBase,
508+
durationMs: Date.now() - startedAt,
509+
errorCategory: errorCategory(cause),
510+
...(errorCode ? { errorCode } : {}),
511+
});
429512
await recordLoopOutcome({
430513
ctx,
431514
toolName: normalizedToolName,

src/infra/diagnostic-events.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,37 @@ export type DiagnosticToolLoopEvent = DiagnosticBaseEvent & {
154154
pairedToolName?: string;
155155
};
156156

157+
export type DiagnosticToolParamsSummary =
158+
| { kind: "object" }
159+
| { kind: "array"; length: number }
160+
| { kind: "string"; length: number }
161+
| { kind: "number" | "boolean" | "null" | "undefined" | "other" };
162+
163+
type DiagnosticToolExecutionBaseEvent = DiagnosticBaseEvent & {
164+
runId?: string;
165+
sessionKey?: string;
166+
sessionId?: string;
167+
toolName: string;
168+
toolCallId?: string;
169+
paramsSummary?: DiagnosticToolParamsSummary;
170+
};
171+
172+
export type DiagnosticToolExecutionStartedEvent = DiagnosticToolExecutionBaseEvent & {
173+
type: "tool.execution.started";
174+
};
175+
176+
export type DiagnosticToolExecutionCompletedEvent = DiagnosticToolExecutionBaseEvent & {
177+
type: "tool.execution.completed";
178+
durationMs: number;
179+
};
180+
181+
export type DiagnosticToolExecutionErrorEvent = DiagnosticToolExecutionBaseEvent & {
182+
type: "tool.execution.error";
183+
durationMs: number;
184+
errorCategory: string;
185+
errorCode?: string;
186+
};
187+
157188
export type DiagnosticMemoryUsage = {
158189
rssBytes: number;
159190
heapTotalBytes: number;
@@ -204,6 +235,9 @@ export type DiagnosticEventPayload =
204235
| DiagnosticRunAttemptEvent
205236
| DiagnosticHeartbeatEvent
206237
| DiagnosticToolLoopEvent
238+
| DiagnosticToolExecutionStartedEvent
239+
| DiagnosticToolExecutionCompletedEvent
240+
| DiagnosticToolExecutionErrorEvent
207241
| DiagnosticMemorySampleEvent
208242
| DiagnosticMemoryPressureEvent
209243
| DiagnosticPayloadLargeEvent;

src/logging/diagnostic-stability.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,18 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi
235235
record.count = event.count;
236236
record.pairedToolName = event.pairedToolName;
237237
break;
238+
case "tool.execution.started":
239+
record.toolName = event.toolName;
240+
break;
241+
case "tool.execution.completed":
242+
record.toolName = event.toolName;
243+
record.durationMs = event.durationMs;
244+
break;
245+
case "tool.execution.error":
246+
record.toolName = event.toolName;
247+
record.durationMs = event.durationMs;
248+
record.reason = event.errorCategory;
249+
break;
238250
case "diagnostic.memory.sample":
239251
record.memory = copyMemory(event.memory);
240252
break;

0 commit comments

Comments
 (0)