Skip to content

Commit 1ccd84b

Browse files
committed
feat(diagnostics-otel): capture tool input/output content via trusted channel
diagnostics.otel.captureContent.{toolInputs,toolOutputs} were documented and config-wired but never produced any span content. Emit tool args and results over the trusted private-data diagnostic channel (mirroring the model-content path), and have the OTel exporter bound/redact/truncate them before span export. Raw tool content never rides the public event bus. Scope: core embedded-runner tool path (canonical producer). Codex (async-batched) and Claude CLI remain follow-ups tracked by the issue. Refs #77391
1 parent 8b84e95 commit 1ccd84b

10 files changed

Lines changed: 327 additions & 61 deletions

File tree

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
1cd5bcc75461c64d39a00918a50d033e66ae7ec199d8029f7cccaaa2eeb16f22 plugin-sdk-api-baseline.json
2-
a5d3b43c3710c4238958b1b3163e652ac34bdc7b82215c6294ce61b72188d75e plugin-sdk-api-baseline.jsonl
1+
ae06e87a060aaa9618e2b245553d90402c0fbbe1ebc864928dc7f771cede7c6d plugin-sdk-api-baseline.json
2+
8ae4665726d0a8e2e80587ab0b98afce6718861a996daef2fac207066c29dd4f plugin-sdk-api-baseline.jsonl

docs/gateway/opentelemetry.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,13 @@ When any subkey is enabled, model and tool spans get bounded, redacted
161161
`captureContent: true` only for broad diagnostics captures where OTLP log
162162
message bodies are also approved for export.
163163

164+
`toolInputs`/`toolOutputs` content is captured for the built-in agent runtime's
165+
tool executions (`openclaw.content.tool_input` on completed/error spans,
166+
`openclaw.content.tool_output` on completed spans). External harness tool calls
167+
(Codex, Claude CLI) emit `tool.execution.*` spans without content payloads.
168+
Captured content travels on a trusted, listener-only channel and is never placed
169+
on the public diagnostic event bus.
170+
164171
## Sampling and flushing
165172

166173
- **Traces:** `diagnostics.otel.sampleRate` (root-span only, `0.0` drops all,

extensions/diagnostics-otel/src/service.test.ts

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,22 @@ function emitTrustedModelCallCompletedWithContent(
408408
);
409409
}
410410

411+
function emitTrustedToolExecutionCompletedWithContent(
412+
event: Omit<
413+
Extract<Parameters<typeof emitDiagnosticEvent>[0], { type: "tool.execution.completed" }>,
414+
"type"
415+
>,
416+
toolContent: NonNullable<DiagnosticEventPrivateData["toolContent"]>,
417+
) {
418+
emitTrustedDiagnosticEventWithPrivateData(
419+
{
420+
type: "tool.execution.completed",
421+
...event,
422+
},
423+
{ toolContent },
424+
);
425+
}
426+
411427
afterAll(() => {
412428
vi.doUnmock("@opentelemetry/api");
413429
vi.doUnmock("@opentelemetry/sdk-node");
@@ -3991,15 +4007,18 @@ describe("diagnostics-otel service", () => {
39914007
systemPrompt: "private system prompt",
39924008
},
39934009
);
3994-
emitDiagnosticEvent({
3995-
type: "tool.execution.completed",
3996-
runId: "run-1",
3997-
toolName: "read",
3998-
toolCallId: "tool-1",
3999-
durationMs: 20,
4000-
toolInput: "private tool input",
4001-
toolOutput: "private tool output",
4002-
} as Parameters<typeof emitDiagnosticEvent>[0]);
4010+
emitTrustedToolExecutionCompletedWithContent(
4011+
{
4012+
runId: "run-1",
4013+
toolName: "read",
4014+
toolCallId: "tool-1",
4015+
durationMs: 20,
4016+
},
4017+
{
4018+
toolInput: "private tool input",
4019+
toolOutput: "private tool output",
4020+
},
4021+
);
40034022
await flushDiagnosticEvents();
40044023

40054024
const modelOptions = startedSpanOptions("openclaw.model.call");
@@ -4052,15 +4071,18 @@ describe("diagnostics-otel service", () => {
40524071
systemPrompt: "system prompt",
40534072
},
40544073
);
4055-
emitDiagnosticEvent({
4056-
type: "tool.execution.completed",
4057-
runId: "run-1",
4058-
toolName: "read",
4059-
toolCallId: "tool-1",
4060-
durationMs: 20,
4061-
toolInput: "tool input",
4062-
toolOutput: `${"x".repeat(4077)} Bearer ${"a".repeat(80)}`, // pragma: allowlist secret
4063-
} as Parameters<typeof emitDiagnosticEvent>[0]);
4074+
emitTrustedToolExecutionCompletedWithContent(
4075+
{
4076+
runId: "run-1",
4077+
toolName: "read",
4078+
toolCallId: "tool-1",
4079+
durationMs: 20,
4080+
},
4081+
{
4082+
toolInput: "tool input",
4083+
toolOutput: `${"x".repeat(4077)} Bearer ${"a".repeat(80)}`, // pragma: allowlist secret
4084+
},
4085+
);
40644086
await flushDiagnosticEvents();
40654087

40664088
const modelCall = telemetryState.tracer.startSpan.mock.calls.find(

extensions/diagnostics-otel/src/service.ts

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ type OtelModelCallContent = {
109109
toolDefinitions?: unknown;
110110
};
111111

112+
type OtelToolCallContent = {
113+
toolInput?: unknown;
114+
toolOutput?: unknown;
115+
};
116+
112117
type MessageDeliveryDiagnosticEvent = Extract<
113118
DiagnosticEventPayload,
114119
{
@@ -910,14 +915,14 @@ function assignOtelModelContentAttributes(
910915

911916
function assignOtelToolContentAttributes(
912917
attributes: Record<string, string | number | boolean>,
913-
event: Record<string, unknown>,
918+
content: OtelToolCallContent | undefined,
914919
policy: OtelContentCapturePolicy,
915920
): void {
916921
if (policy.toolInputs) {
917-
assignOtelContentAttribute(attributes, "openclaw.content.tool_input", event.toolInput);
922+
assignOtelContentAttribute(attributes, "openclaw.content.tool_input", content?.toolInput);
918923
}
919924
if (policy.toolOutputs) {
920-
assignOtelContentAttribute(attributes, "openclaw.content.tool_output", event.toolOutput);
925+
assignOtelContentAttribute(attributes, "openclaw.content.tool_output", content?.toolOutput);
921926
}
922927
}
923928

@@ -3045,6 +3050,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
30453050
const recordToolExecutionCompleted = (
30463051
evt: Extract<DiagnosticEventPayload, { type: "tool.execution.completed" }>,
30473052
metadata: DiagnosticEventMetadata,
3053+
toolContent?: OtelToolCallContent,
30483054
) => {
30493055
const attrs = toolExecutionBaseAttrs(evt);
30503056
toolExecutionDurationHistogram.record(evt.durationMs, attrs);
@@ -3055,11 +3061,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
30553061
...toolExecutionBaseAttrs(evt),
30563062
};
30573063
addRunAttrs(spanAttrs, evt);
3058-
assignOtelToolContentAttributes(
3059-
spanAttrs,
3060-
evt as unknown as Record<string, unknown>,
3061-
contentCapturePolicy,
3062-
);
3064+
assignOtelToolContentAttributes(spanAttrs, toolContent, contentCapturePolicy);
30633065
const span =
30643066
takeTrackedTrustedSpan(evt, metadata) ??
30653067
spanWithDuration("openclaw.tool.execution", spanAttrs, evt.durationMs, {
@@ -3073,6 +3075,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
30733075
const recordToolExecutionError = (
30743076
evt: Extract<DiagnosticEventPayload, { type: "tool.execution.error" }>,
30753077
metadata: DiagnosticEventMetadata,
3078+
toolContent?: OtelToolCallContent,
30763079
) => {
30773080
const attrs = {
30783081
...toolExecutionBaseAttrs(evt),
@@ -3090,11 +3093,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
30903093
if (evt.errorCode) {
30913094
spanAttrs["openclaw.errorCode"] = lowCardinalityAttr(evt.errorCode, "other");
30923095
}
3093-
assignOtelToolContentAttributes(
3094-
spanAttrs,
3095-
evt as unknown as Record<string, unknown>,
3096-
contentCapturePolicy,
3097-
);
3096+
assignOtelToolContentAttributes(spanAttrs, toolContent, contentCapturePolicy);
30983097
const span =
30993098
takeTrackedTrustedSpan(evt, metadata) ??
31003099
spanWithDuration("openclaw.tool.execution", spanAttrs, evt.durationMs, {
@@ -3425,10 +3424,10 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
34253424
recordToolExecutionStarted(evt, metadata);
34263425
return;
34273426
case "tool.execution.completed":
3428-
recordToolExecutionCompleted(evt, metadata);
3427+
recordToolExecutionCompleted(evt, metadata, privateData.toolContent);
34293428
return;
34303429
case "tool.execution.error":
3431-
recordToolExecutionError(evt, metadata);
3430+
recordToolExecutionError(evt, metadata, privateData.toolContent);
34323431
return;
34333432
case "tool.execution.blocked":
34343433
recordToolExecutionBlocked(evt, metadata);

src/agents/agent-tools.before-tool-call.e2e.test.ts

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
99
import {
1010
onInternalDiagnosticEvent,
1111
onDiagnosticEvent,
12+
onTrustedInternalDiagnosticEvent,
1213
resetDiagnosticEventsForTest,
1314
type DiagnosticEventPayload,
15+
type DiagnosticEventPrivateData,
1416
type DiagnosticToolLoopEvent,
1517
} from "../infra/diagnostic-events.js";
1618
import { MAX_PLUGIN_APPROVAL_TIMEOUT_MS } from "../infra/plugin-approvals.js";
@@ -1761,3 +1763,142 @@ describe("before_tool_call requireApproval handling", () => {
17611763
expect(onResolution).toHaveBeenCalledWith("cancelled");
17621764
});
17631765
});
1766+
1767+
describe("before_tool_call tool content private-data capture", () => {
1768+
type TrustedToolEvent = {
1769+
event: DiagnosticEventPayload;
1770+
privateData: DiagnosticEventPrivateData;
1771+
};
1772+
1773+
beforeEach(() => {
1774+
resetDiagnosticSessionStateForTest();
1775+
resetDiagnosticEventsForTest();
1776+
});
1777+
1778+
async function withTrustedToolEvents(
1779+
run: (emitted: TrustedToolEvent[], flush: () => Promise<void>) => Promise<void>,
1780+
) {
1781+
const emitted: TrustedToolEvent[] = [];
1782+
const stop = onTrustedInternalDiagnosticEvent((event, _metadata, privateData) => {
1783+
if (event.type.startsWith("tool.execution.")) {
1784+
emitted.push({ event, privateData });
1785+
}
1786+
});
1787+
const flush = () =>
1788+
new Promise<void>((resolve) => {
1789+
setImmediate(resolve);
1790+
});
1791+
try {
1792+
await run(emitted, flush);
1793+
} finally {
1794+
stop();
1795+
}
1796+
}
1797+
1798+
function configWithToolContent(
1799+
fields: { toolInputs?: boolean; toolOutputs?: boolean } = {
1800+
toolInputs: true,
1801+
toolOutputs: true,
1802+
},
1803+
) {
1804+
return {
1805+
diagnostics: {
1806+
enabled: true,
1807+
otel: {
1808+
enabled: true,
1809+
traces: true,
1810+
captureContent: { enabled: true, ...fields },
1811+
},
1812+
},
1813+
} as unknown as import("../config/types.openclaw.js").OpenClawConfig;
1814+
}
1815+
1816+
it("attaches tool input/output to private data when opted in", async () => {
1817+
const execute = vi.fn().mockResolvedValue({ content: [{ type: "text", text: "file body" }] });
1818+
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
1819+
agentId: "main",
1820+
sessionKey: "session-key",
1821+
runId: "run-1",
1822+
loopDetection: { enabled: false },
1823+
config: configWithToolContent(),
1824+
});
1825+
1826+
await withTrustedToolEvents(async (emitted, flush) => {
1827+
await tool.execute("call-1", { path: "/etc/secret" }, undefined, undefined);
1828+
await flush();
1829+
1830+
const completed = emitted.find((e) => e.event.type === "tool.execution.completed");
1831+
expect(completed?.privateData.toolContent?.toolInput).toEqual({ path: "/etc/secret" });
1832+
expect(completed?.privateData.toolContent?.toolOutput).toEqual({
1833+
content: [{ type: "text", text: "file body" }],
1834+
});
1835+
// Public event payload must never carry raw params/results.
1836+
expect(JSON.stringify(completed?.event)).not.toContain("/etc/secret");
1837+
expect(JSON.stringify(completed?.event)).not.toContain("file body");
1838+
});
1839+
});
1840+
1841+
it("omits tool content from private data when capture is not configured", async () => {
1842+
const execute = vi.fn().mockResolvedValue({ content: [{ type: "text", text: "ok" }] });
1843+
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
1844+
agentId: "main",
1845+
sessionKey: "session-key",
1846+
runId: "run-1",
1847+
loopDetection: { enabled: false },
1848+
});
1849+
1850+
await withTrustedToolEvents(async (emitted, flush) => {
1851+
await tool.execute("call-1", { path: "/etc/secret" }, undefined, undefined);
1852+
await flush();
1853+
1854+
const completed = emitted.find((e) => e.event.type === "tool.execution.completed");
1855+
expect(completed).toBeDefined();
1856+
expect(completed?.privateData.toolContent).toBeUndefined();
1857+
});
1858+
});
1859+
1860+
it("captures only opted-in fields and clones away from live params", async () => {
1861+
const liveParams = { path: "/etc/secret" };
1862+
const execute = vi.fn().mockResolvedValue({ content: [{ type: "text", text: "out" }] });
1863+
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
1864+
agentId: "main",
1865+
sessionKey: "session-key",
1866+
runId: "run-1",
1867+
loopDetection: { enabled: false },
1868+
config: configWithToolContent({ toolInputs: true, toolOutputs: false }),
1869+
});
1870+
1871+
await withTrustedToolEvents(async (emitted, flush) => {
1872+
await tool.execute("call-1", liveParams, undefined, undefined);
1873+
await flush();
1874+
1875+
const completed = emitted.find((e) => e.event.type === "tool.execution.completed");
1876+
expect(completed?.privateData.toolContent?.toolInput).toEqual({ path: "/etc/secret" });
1877+
expect(completed?.privateData.toolContent?.toolOutput).toBeUndefined();
1878+
// Captured snapshot is a clone, not the live params object.
1879+
expect(completed?.privateData.toolContent?.toolInput).not.toBe(liveParams);
1880+
});
1881+
});
1882+
1883+
it("attaches tool input but not output on execution errors", async () => {
1884+
const execute = vi.fn().mockRejectedValue(new Error("boom"));
1885+
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
1886+
agentId: "main",
1887+
sessionKey: "session-key",
1888+
runId: "run-1",
1889+
loopDetection: { enabled: false },
1890+
config: configWithToolContent(),
1891+
});
1892+
1893+
await withTrustedToolEvents(async (emitted, flush) => {
1894+
await expect(
1895+
tool.execute("call-1", { path: "/etc/secret" }, undefined, undefined),
1896+
).rejects.toThrow("boom");
1897+
await flush();
1898+
1899+
const errored = emitted.find((e) => e.event.type === "tool.execution.error");
1900+
expect(errored?.privateData.toolContent?.toolInput).toEqual({ path: "/etc/secret" });
1901+
expect(errored?.privateData.toolContent?.toolOutput).toBeUndefined();
1902+
});
1903+
});
1904+
});

0 commit comments

Comments
 (0)