Skip to content

Commit 50da306

Browse files
committed
fix(telemetry): bound message diagnostics labels
1 parent 111df16 commit 50da306

7 files changed

Lines changed: 135 additions & 27 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Docs: https://docs.openclaw.ai
4141

4242
### Fixes
4343

44+
- Diagnostics: keep webhook/message OTEL attributes and Prometheus delivery labels low-cardinality and omit raw chat/message IDs from spans, so progress-draft and message-tool modes do not leak high-cardinality messaging identifiers.
4445
- Telegram: render shared interactive reply buttons in reply delivery so plugin approval messages show inline keyboards. (#76238) Thanks @keshavbotagent.
4546
- Release validation: install the cross-OS TypeScript harness through Windows-safe Node/npm shims so native Windows package checks reach the OpenClaw smoke suites instead of exiting before artifact capture. Thanks @vincentkoc.
4647
- Release validation: let Windows packaged-upgrade checks continue after the shipped 2026.5.2 updater hits its native-module swap cleanup fallback, verifying the fallback-installed candidate through package metadata and downstream smoke instead of crashing on the immediate update-status probe. Thanks @vincentkoc.

docs/gateway/opentelemetry.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,11 @@ heartbeat tick. For the config knob and defaults, see
268268
- `openclaw.exec`
269269
- `openclaw.exec.target`, `openclaw.exec.mode`, `openclaw.outcome`, `openclaw.failureKind`, `openclaw.exec.command_length`, `openclaw.exec.exit_code`, `openclaw.exec.timed_out`
270270
- `openclaw.webhook.processed`
271-
- `openclaw.channel`, `openclaw.webhook`, `openclaw.chatId`
271+
- `openclaw.channel`, `openclaw.webhook`
272272
- `openclaw.webhook.error`
273-
- `openclaw.channel`, `openclaw.webhook`, `openclaw.chatId`, `openclaw.error`
273+
- `openclaw.channel`, `openclaw.webhook`, `openclaw.error`
274274
- `openclaw.message.processed`
275-
- `openclaw.channel`, `openclaw.outcome`, `openclaw.chatId`, `openclaw.messageId`, `openclaw.reason`
275+
- `openclaw.channel`, `openclaw.outcome`, `openclaw.reason`
276276
- `openclaw.message.delivery`
277277
- `openclaw.channel`, `openclaw.delivery.kind`, `openclaw.outcome`, `openclaw.errorCategory`, `openclaw.delivery.result_count`
278278
- `openclaw.session.stuck`

extensions/diagnostics-otel/src/service.test.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ describe("diagnostics-otel service", () => {
296296
type: "webhook.processed",
297297
channel: "telegram",
298298
updateType: "telegram-post",
299+
chatId: "chat-should-not-export",
299300
durationMs: 120,
300301
});
301302
emitDiagnosticEvent({
@@ -307,7 +308,10 @@ describe("diagnostics-otel service", () => {
307308
emitDiagnosticEvent({
308309
type: "message.processed",
309310
channel: "telegram",
311+
chatId: "chat-should-not-export",
312+
messageId: "message-should-not-export",
310313
outcome: "completed",
314+
reason: "progress draft / message tool 123",
311315
durationMs: 55,
312316
});
313317
emitDiagnosticEvent({
@@ -348,6 +352,33 @@ describe("diagnostics-otel service", () => {
348352
expect(spanNames).toContain("openclaw.webhook.processed");
349353
expect(spanNames).toContain("openclaw.message.processed");
350354
expect(spanNames).toContain("openclaw.session.stuck");
355+
const webhookSpanCall = telemetryState.tracer.startSpan.mock.calls.find(
356+
(call) => call[0] === "openclaw.webhook.processed",
357+
);
358+
expect(webhookSpanCall?.[1]).toEqual({
359+
attributes: expect.not.objectContaining({
360+
"openclaw.chatId": expect.anything(),
361+
}),
362+
startTime: expect.any(Number),
363+
});
364+
const messageSpanCall = telemetryState.tracer.startSpan.mock.calls.find(
365+
(call) => call[0] === "openclaw.message.processed",
366+
);
367+
expect(messageSpanCall?.[1]).toEqual({
368+
attributes: expect.objectContaining({
369+
"openclaw.channel": "telegram",
370+
"openclaw.outcome": "completed",
371+
"openclaw.reason": "unknown",
372+
}),
373+
startTime: expect.any(Number),
374+
});
375+
expect(messageSpanCall?.[1]).toEqual({
376+
attributes: expect.not.objectContaining({
377+
"openclaw.chatId": expect.anything(),
378+
"openclaw.messageId": expect.anything(),
379+
}),
380+
startTime: expect.any(Number),
381+
});
351382

352383
emitDiagnosticEvent({
353384
type: "log.record",
@@ -2387,6 +2418,7 @@ describe("diagnostics-otel service", () => {
23872418
for (const call of deliverySpanCalls) {
23882419
expect(call[1]).toEqual({
23892420
attributes: expect.not.objectContaining({
2421+
"openclaw.chatId": expect.anything(),
23902422
"openclaw.sessionKey": expect.anything(),
23912423
"openclaw.messageId": expect.anything(),
23922424
"openclaw.conversationId": expect.anything(),
@@ -2406,6 +2438,46 @@ describe("diagnostics-otel service", () => {
24062438
await service.stop?.(ctx);
24072439
});
24082440

2441+
test("bounds unsafe message delivery attributes before export", async () => {
2442+
const service = createDiagnosticsOtelService();
2443+
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
2444+
await service.start(ctx);
2445+
2446+
emitDiagnosticEvent({
2447+
type: "message.delivery.completed",
2448+
channel: "discord/custom",
2449+
deliveryKind: "progress draft" as never,
2450+
durationMs: 20,
2451+
resultCount: 1,
2452+
sessionKey: "session-secret",
2453+
});
2454+
await flushDiagnosticEvents();
2455+
2456+
expect(
2457+
telemetryState.histograms.get("openclaw.message.delivery.duration_ms")?.record,
2458+
).toHaveBeenCalledWith(
2459+
20,
2460+
expect.objectContaining({
2461+
"openclaw.channel": "unknown",
2462+
"openclaw.delivery.kind": "other",
2463+
"openclaw.outcome": "completed",
2464+
}),
2465+
);
2466+
const deliverySpanCall = telemetryState.tracer.startSpan.mock.calls.find(
2467+
(call) => call[0] === "openclaw.message.delivery",
2468+
);
2469+
expect(deliverySpanCall?.[1]).toMatchObject({
2470+
attributes: {
2471+
"openclaw.channel": "unknown",
2472+
"openclaw.delivery.kind": "other",
2473+
"openclaw.outcome": "completed",
2474+
"openclaw.delivery.result_count": 1,
2475+
},
2476+
startTime: expect.any(Number),
2477+
});
2478+
await service.stop?.(ctx);
2479+
});
2480+
24092481
test("does not export model or tool content unless capture is explicitly enabled", async () => {
24102482
const service = createDiagnosticsOtelService();
24112483
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });

extensions/diagnostics-otel/src/service.ts

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ import {
3131
const DEFAULT_SERVICE_NAME = "openclaw";
3232
const DROPPED_OTEL_ATTRIBUTE_KEYS = new Set([
3333
"openclaw.callId",
34+
"openclaw.chatId",
35+
"openclaw.messageId",
3436
"openclaw.parentSpanId",
3537
"openclaw.runId",
3638
"openclaw.sessionId",
@@ -1262,8 +1264,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
12621264
evt: Extract<DiagnosticEventPayload, { type: "webhook.processed" }>,
12631265
) => {
12641266
const attrs = {
1265-
"openclaw.channel": evt.channel ?? "unknown",
1266-
"openclaw.webhook": evt.updateType ?? "unknown",
1267+
"openclaw.channel": lowCardinalityAttr(evt.channel),
1268+
"openclaw.webhook": lowCardinalityAttr(evt.updateType),
12671269
};
12681270
if (typeof evt.durationMs === "number") {
12691271
webhookDurationHistogram.record(evt.durationMs, attrs);
@@ -1272,9 +1274,6 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
12721274
return;
12731275
}
12741276
const spanAttrs: Record<string, string | number> = { ...attrs };
1275-
if (evt.chatId !== undefined) {
1276-
spanAttrs["openclaw.chatId"] = String(evt.chatId);
1277-
}
12781277
const span = spanWithDuration("openclaw.webhook.processed", spanAttrs, evt.durationMs);
12791278
span.end();
12801279
};
@@ -1283,8 +1282,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
12831282
evt: Extract<DiagnosticEventPayload, { type: "webhook.error" }>,
12841283
) => {
12851284
const attrs = {
1286-
"openclaw.channel": evt.channel ?? "unknown",
1287-
"openclaw.webhook": evt.updateType ?? "unknown",
1285+
"openclaw.channel": lowCardinalityAttr(evt.channel),
1286+
"openclaw.webhook": lowCardinalityAttr(evt.updateType),
12881287
};
12891288
webhookErrorCounter.add(1, attrs);
12901289
if (!tracesEnabled) {
@@ -1295,9 +1294,6 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
12951294
...attrs,
12961295
"openclaw.error": redactedError,
12971296
};
1298-
if (evt.chatId !== undefined) {
1299-
spanAttrs["openclaw.chatId"] = String(evt.chatId);
1300-
}
13011297
const span = tracer.startSpan("openclaw.webhook.error", {
13021298
attributes: spanAttrs,
13031299
});
@@ -1309,8 +1305,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
13091305
evt: Extract<DiagnosticEventPayload, { type: "message.queued" }>,
13101306
) => {
13111307
const attrs = {
1312-
"openclaw.channel": evt.channel ?? "unknown",
1313-
"openclaw.source": evt.source ?? "unknown",
1308+
"openclaw.channel": lowCardinalityAttr(evt.channel),
1309+
"openclaw.source": lowCardinalityAttr(evt.source),
13141310
};
13151311
messageQueuedCounter.add(1, attrs);
13161312
if (typeof evt.queueDepth === "number") {
@@ -1322,7 +1318,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
13221318
evt: Extract<DiagnosticEventPayload, { type: "message.processed" }>,
13231319
) => {
13241320
const attrs = {
1325-
"openclaw.channel": evt.channel ?? "unknown",
1321+
"openclaw.channel": lowCardinalityAttr(evt.channel),
13261322
"openclaw.outcome": evt.outcome ?? "unknown",
13271323
};
13281324
messageProcessedCounter.add(1, attrs);
@@ -1333,14 +1329,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
13331329
return;
13341330
}
13351331
const spanAttrs: Record<string, string | number> = { ...attrs };
1336-
if (evt.chatId !== undefined) {
1337-
spanAttrs["openclaw.chatId"] = String(evt.chatId);
1338-
}
1339-
if (evt.messageId !== undefined) {
1340-
spanAttrs["openclaw.messageId"] = String(evt.messageId);
1341-
}
13421332
if (evt.reason) {
1343-
spanAttrs["openclaw.reason"] = redactSensitiveText(evt.reason);
1333+
spanAttrs["openclaw.reason"] = lowCardinalityAttr(evt.reason, "unknown");
13441334
}
13451335
const span = spanWithDuration("openclaw.message.processed", spanAttrs, evt.durationMs);
13461336
if (evt.outcome === "error" && evt.error) {
@@ -1352,8 +1342,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
13521342
const messageDeliveryAttrs = (
13531343
evt: MessageDeliveryDiagnosticEvent,
13541344
): Record<string, string> => ({
1355-
"openclaw.channel": evt.channel,
1356-
"openclaw.delivery.kind": evt.deliveryKind,
1345+
"openclaw.channel": lowCardinalityAttr(evt.channel),
1346+
"openclaw.delivery.kind": lowCardinalityAttr(evt.deliveryKind, "other"),
13571347
});
13581348

13591349
const recordMessageDeliveryStarted = (

extensions/diagnostics-prometheus/src/service.test.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,49 @@ describe("diagnostics-prometheus service", () => {
8787
expect(rendered).not.toContain("sk-secret");
8888
});
8989

90+
it("bounds messaging labels without exporting raw chat identifiers", () => {
91+
const store = __test__.createPrometheusMetricStore();
92+
93+
__test__.recordDiagnosticEvent(
94+
store,
95+
{
96+
...baseEvent(),
97+
type: "message.processed",
98+
channel: "telegram/custom",
99+
chatId: "chat-should-not-export",
100+
messageId: "message-should-not-export",
101+
outcome: "completed",
102+
reason: "progress draft / message tool 123",
103+
durationMs: 25,
104+
},
105+
trusted,
106+
);
107+
__test__.recordDiagnosticEvent(
108+
store,
109+
{
110+
...baseEvent(),
111+
type: "message.delivery.error",
112+
channel: "discord/custom",
113+
deliveryKind: "progress draft" as never,
114+
durationMs: 50,
115+
errorCategory: "TimeoutError",
116+
},
117+
trusted,
118+
);
119+
120+
const rendered = __test__.renderPrometheusMetrics(store);
121+
122+
expect(rendered).toContain(
123+
'openclaw_message_processed_total{channel="unknown",outcome="completed",reason="none"} 1',
124+
);
125+
expect(rendered).toContain(
126+
'openclaw_message_delivery_total{channel="unknown",delivery_kind="other",error_category="TimeoutError",outcome="error"} 1',
127+
);
128+
expect(rendered).not.toContain("chat-should-not-export");
129+
expect(rendered).not.toContain("message-should-not-export");
130+
expect(rendered).not.toContain("progress draft");
131+
});
132+
90133
it("caps metric series growth and reports dropped series", () => {
91134
const store = __test__.createPrometheusMetricStore();
92135

extensions/diagnostics-prometheus/src/service.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,7 @@ function recordDiagnosticEvent(
504504
"Outbound message delivery attempts by outcome.",
505505
{
506506
channel: lowCardinalityLabel(evt.channel),
507-
delivery_kind: evt.deliveryKind,
507+
delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"),
508508
error_category:
509509
evt.type === "message.delivery.error"
510510
? lowCardinalityLabel(evt.errorCategory, "other")
@@ -517,7 +517,7 @@ function recordDiagnosticEvent(
517517
"Outbound message delivery duration in seconds.",
518518
{
519519
channel: lowCardinalityLabel(evt.channel),
520-
delivery_kind: evt.deliveryKind,
520+
delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"),
521521
error_category:
522522
evt.type === "message.delivery.error"
523523
? lowCardinalityLabel(evt.errorCategory, "other")

scripts/qa-otel-smoke.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ const REQUIRED_SPAN_NAMES = [
8888
] as const;
8989
const DISALLOWED_ATTRIBUTE_KEYS = new Set([
9090
"openclaw.runId",
91+
"openclaw.chatId",
92+
"openclaw.messageId",
9193
"openclaw.sessionKey",
9294
"openclaw.sessionId",
9395
"openclaw.callId",

0 commit comments

Comments
 (0)