Skip to content

Commit 8fade9d

Browse files
authored
feat(diagnostics): attach trace context to otel logs (#70961)
* feat(diagnostics): attach trace context to otel logs * fix(diagnostics): satisfy trace flags lint
1 parent 48b9452 commit 8fade9d

3 files changed

Lines changed: 160 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai
77
### Changes
88

99
- Diagnostics/OTEL: add a lightweight diagnostic trace-context carrier for future span correlation without adding OTEL SDK state to core. Thanks @vincentkoc.
10+
- Diagnostics/OTEL: attach diagnostic trace context to exported OTEL logs so log records can correlate with future spans without adding retained process state. Thanks @vincentkoc.
1011
- Control UI/chat: add a Steer action on queued messages so a browser follow-up can be injected into the active run without retyping it.
1112
- Control UI/Talk: add browser WebRTC realtime voice sessions backed by OpenAI Realtime, with Gateway-minted ephemeral client secrets and `openclaw_agent_consult` handoff to the full OpenClaw agent.
1213
- Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it.

extensions/diagnostics-otel/src/service.test.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ const telemetryState = vi.hoisted(() => {
1010
end: vi.fn(),
1111
setStatus: vi.fn(),
1212
})),
13+
setSpanContext: vi.fn((_ctx: unknown, spanContext: unknown) => ({ spanContext })),
1314
};
1415
const meter = {
1516
createCounter: vi.fn((name: string) => {
@@ -33,11 +34,19 @@ const logShutdown = vi.hoisted(() => vi.fn().mockResolvedValue(undefined));
3334
const traceExporterCtor = vi.hoisted(() => vi.fn());
3435

3536
vi.mock("@opentelemetry/api", () => ({
37+
context: {
38+
active: () => ({}),
39+
},
3640
metrics: {
3741
getMeter: () => telemetryState.meter,
3842
},
3943
trace: {
4044
getTracer: () => telemetryState.tracer,
45+
setSpanContext: telemetryState.tracer.setSpanContext,
46+
},
47+
TraceFlags: {
48+
NONE: 0,
49+
SAMPLED: 1,
4150
},
4251
SpanStatusCode: {
4352
ERROR: 2,
@@ -110,6 +119,8 @@ import { createDiagnosticsOtelService } from "./service.js";
110119
const OTEL_TEST_STATE_DIR = "/tmp/openclaw-diagnostics-otel-test";
111120
const OTEL_TEST_ENDPOINT = "http://otel-collector:4318";
112121
const OTEL_TEST_PROTOCOL = "http/protobuf";
122+
const TRACE_ID = "4bf92f3577b34da6a3ce929d0e0e4736";
123+
const SPAN_ID = "00f067aa0ba902b7";
113124

114125
function createLogger() {
115126
return {
@@ -181,6 +192,7 @@ describe("diagnostics-otel service", () => {
181192
telemetryState.counters.clear();
182193
telemetryState.histograms.clear();
183194
telemetryState.tracer.startSpan.mockClear();
195+
telemetryState.tracer.setSpanContext.mockClear();
184196
telemetryState.meter.createCounter.mockClear();
185197
telemetryState.meter.createHistogram.mockClear();
186198
sdkStart.mockClear();
@@ -336,6 +348,42 @@ describe("diagnostics-otel service", () => {
336348
}
337349
});
338350

351+
test("attaches diagnostic trace context to exported logs", async () => {
352+
const emitCall = await emitAndCaptureLog({
353+
0: '{"subsystem":"diagnostic"}',
354+
1: {
355+
trace: {
356+
traceId: TRACE_ID,
357+
spanId: SPAN_ID,
358+
traceFlags: "01",
359+
},
360+
},
361+
2: "traceable log",
362+
_meta: { logLevelName: "INFO", date: new Date() },
363+
});
364+
365+
expect(emitCall?.attributes).toMatchObject({
366+
"openclaw.traceId": TRACE_ID,
367+
"openclaw.spanId": SPAN_ID,
368+
"openclaw.traceFlags": "01",
369+
});
370+
expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledWith(
371+
expect.anything(),
372+
expect.objectContaining({
373+
traceId: TRACE_ID,
374+
spanId: SPAN_ID,
375+
traceFlags: 1,
376+
isRemote: true,
377+
}),
378+
);
379+
expect(emitCall?.context).toEqual({
380+
spanContext: expect.objectContaining({
381+
traceId: TRACE_ID,
382+
spanId: SPAN_ID,
383+
}),
384+
});
385+
});
386+
339387
test("redacts sensitive reason in session.state metric attributes", async () => {
340388
const service = createDiagnosticsOtelService();
341389
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true });

extensions/diagnostics-otel/src/service.ts

Lines changed: 111 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1-
import { metrics, trace, SpanStatusCode } from "@opentelemetry/api";
2-
import type { SeverityNumber } from "@opentelemetry/api-logs";
1+
import {
2+
context as otelContextApi,
3+
metrics,
4+
trace,
5+
SpanStatusCode,
6+
TraceFlags,
7+
} from "@opentelemetry/api";
8+
import type { LogRecord, SeverityNumber } from "@opentelemetry/api-logs";
39
import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-proto";
410
import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-proto";
511
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-proto";
@@ -9,8 +15,19 @@ import { PeriodicExportingMetricReader } from "@opentelemetry/sdk-metrics";
915
import { NodeSDK } from "@opentelemetry/sdk-node";
1016
import { ParentBasedSampler, TraceIdRatioBasedSampler } from "@opentelemetry/sdk-trace-base";
1117
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
12-
import type { DiagnosticEventPayload, OpenClawPluginService } from "../api.js";
13-
import { onDiagnosticEvent, redactSensitiveText, registerLogTransport } from "../api.js";
18+
import type {
19+
DiagnosticEventPayload,
20+
DiagnosticTraceContext,
21+
OpenClawPluginService,
22+
} from "../api.js";
23+
import {
24+
isValidDiagnosticSpanId,
25+
isValidDiagnosticTraceFlags,
26+
isValidDiagnosticTraceId,
27+
onDiagnosticEvent,
28+
redactSensitiveText,
29+
registerLogTransport,
30+
} from "../api.js";
1431

1532
const DEFAULT_SERVICE_NAME = "openclaw";
1633

@@ -62,6 +79,83 @@ function redactOtelAttributes(attributes: Record<string, string | number | boole
6279
return redactedAttributes;
6380
}
6481

82+
function normalizeTraceContext(value: unknown): DiagnosticTraceContext | undefined {
83+
if (!value || typeof value !== "object" || Array.isArray(value)) {
84+
return undefined;
85+
}
86+
const candidate = value as Partial<DiagnosticTraceContext>;
87+
if (!isValidDiagnosticTraceId(candidate.traceId)) {
88+
return undefined;
89+
}
90+
if (candidate.spanId !== undefined && !isValidDiagnosticSpanId(candidate.spanId)) {
91+
return undefined;
92+
}
93+
if (candidate.parentSpanId !== undefined && !isValidDiagnosticSpanId(candidate.parentSpanId)) {
94+
return undefined;
95+
}
96+
if (candidate.traceFlags !== undefined && !isValidDiagnosticTraceFlags(candidate.traceFlags)) {
97+
return undefined;
98+
}
99+
return {
100+
traceId: candidate.traceId,
101+
...(candidate.spanId ? { spanId: candidate.spanId } : {}),
102+
...(candidate.parentSpanId ? { parentSpanId: candidate.parentSpanId } : {}),
103+
...(candidate.traceFlags ? { traceFlags: candidate.traceFlags } : {}),
104+
};
105+
}
106+
107+
function extractTraceContext(value: unknown): DiagnosticTraceContext | undefined {
108+
const direct = normalizeTraceContext(value);
109+
if (direct) {
110+
return direct;
111+
}
112+
if (!value || typeof value !== "object" || Array.isArray(value)) {
113+
return undefined;
114+
}
115+
return normalizeTraceContext((value as { trace?: unknown }).trace);
116+
}
117+
118+
function findLogTraceContext(
119+
bindings: Record<string, unknown> | undefined,
120+
numericArgs: unknown[],
121+
): DiagnosticTraceContext | undefined {
122+
const fromBindings = extractTraceContext(bindings);
123+
if (fromBindings) {
124+
return fromBindings;
125+
}
126+
for (const arg of numericArgs) {
127+
const fromArg = extractTraceContext(arg);
128+
if (fromArg) {
129+
return fromArg;
130+
}
131+
}
132+
return undefined;
133+
}
134+
135+
function traceFlagsToOtel(traceFlags: string | undefined): TraceFlags {
136+
const parsed = Number.parseInt(traceFlags ?? "00", 16);
137+
return (parsed & TraceFlags.SAMPLED) !== 0 ? TraceFlags.SAMPLED : TraceFlags.NONE;
138+
}
139+
140+
function addTraceAttributes(
141+
attributes: Record<string, string | number | boolean>,
142+
traceContext: DiagnosticTraceContext | undefined,
143+
): void {
144+
if (!traceContext) {
145+
return;
146+
}
147+
attributes["openclaw.traceId"] = traceContext.traceId;
148+
if (traceContext.spanId) {
149+
attributes["openclaw.spanId"] = traceContext.spanId;
150+
}
151+
if (traceContext.parentSpanId) {
152+
attributes["openclaw.parentSpanId"] = traceContext.parentSpanId;
153+
}
154+
if (traceContext.traceFlags) {
155+
attributes["openclaw.traceFlags"] = traceContext.traceFlags;
156+
}
157+
}
158+
65159
export function createDiagnosticsOtelService(): OpenClawPluginService {
66160
let sdk: NodeSDK | null = null;
67161
let logProvider: LoggerProvider | null = null;
@@ -294,6 +388,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
294388
// ignore malformed json bindings
295389
}
296390
}
391+
const traceContext = findLogTraceContext(bindings, numericArgs);
297392

298393
let message = "";
299394
if (numericArgs.length > 0 && typeof numericArgs[numericArgs.length - 1] === "string") {
@@ -343,15 +438,25 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
343438
if (meta?.path?.filePathWithLine) {
344439
attributes["openclaw.code.location"] = meta.path.filePathWithLine;
345440
}
441+
addTraceAttributes(attributes, traceContext);
346442

347443
// OTLP can leave the host boundary, so redact string fields before export.
348-
otelLogger.emit({
444+
const logRecord: LogRecord = {
349445
body: redactSensitiveText(message),
350446
severityText: logLevelName,
351447
severityNumber,
352448
attributes: redactOtelAttributes(attributes),
353449
timestamp: meta?.date ?? new Date(),
354-
});
450+
};
451+
if (traceContext?.spanId) {
452+
logRecord.context = trace.setSpanContext(otelContextApi.active(), {
453+
traceId: traceContext.traceId,
454+
spanId: traceContext.spanId,
455+
traceFlags: traceFlagsToOtel(traceContext.traceFlags),
456+
isRemote: true,
457+
});
458+
}
459+
otelLogger.emit(logRecord);
355460
} catch (err) {
356461
ctx.logger.error(`diagnostics-otel: log transport failed: ${formatError(err)}`);
357462
}

0 commit comments

Comments
 (0)