1- import { monitorEventLoopDelay } from "node:perf_hooks" ;
1+ import { monitorEventLoopDelay , performance } from "node:perf_hooks" ;
22import { getActiveEmbeddedRunCount } from "../agents/pi-embedded-runner/run-state.js" ;
33import { getTotalPendingReplies } from "../auto-reply/reply/dispatcher-registry.js" ;
44import type { CanvasHostServer } from "../canvas-host/server.js" ;
@@ -24,6 +24,10 @@ import {
2424 isDiagnosticsEnabled ,
2525 setDiagnosticsEnabledForProcess ,
2626} from "../infra/diagnostic-events.js" ;
27+ import {
28+ emitDiagnosticsTimelineEvent ,
29+ isDiagnosticsTimelineEnabled ,
30+ } from "../infra/diagnostics-timeline.js" ;
2731import { isTruthyEnvValue , isVitestRuntimeEnv , logAcceptedEnvOption } from "../infra/env.js" ;
2832import { ensureOpenClawCliOnPath } from "../infra/path-env.js" ;
2933import { setGatewaySigusr1RestartPolicy , setPreRestartDeferralCheck } from "../infra/restart.js" ;
@@ -164,30 +168,80 @@ const gatewayRuntime = runtimeForLogger(log);
164168const canvasRuntime = runtimeForLogger ( logCanvas ) ;
165169
166170function createGatewayStartupTrace ( ) {
167- const enabled = isTruthyEnvValue ( process . env . OPENCLAW_GATEWAY_STARTUP_TRACE ) ;
168- const eventLoopDelay = enabled ? monitorEventLoopDelay ( { resolution : 10 } ) : undefined ;
171+ const logEnabled = isTruthyEnvValue ( process . env . OPENCLAW_GATEWAY_STARTUP_TRACE ) ;
172+ const timelineEnabled = isDiagnosticsTimelineEnabled ( ) ;
173+ const eventLoopTimelineEnabled =
174+ timelineEnabled && isTruthyEnvValue ( process . env . OPENCLAW_DIAGNOSTICS_EVENT_LOOP ) ;
175+ const eventLoopDelay =
176+ logEnabled || eventLoopTimelineEnabled ? monitorEventLoopDelay ( { resolution : 10 } ) : undefined ;
169177 eventLoopDelay ?. enable ( ) ;
170178 const started = performance . now ( ) ;
171179 let last = started ;
180+ let spanSequence = 0 ;
172181 const formatMetric = ( key : string , value : number | string ) =>
173182 `${ key } =${ typeof value === "number" ? value . toFixed ( 1 ) : value } ` ;
174- const readEventLoopMaxMs = ( ) => {
183+ const mapTimelineName = ( name : string ) => {
184+ switch ( name ) {
185+ case "config.snapshot" :
186+ return "config.load" ;
187+ case "config.auth" :
188+ case "config.final-snapshot" :
189+ case "runtime.config" :
190+ return "config.normalize" ;
191+ case "plugins.bootstrap" :
192+ return "plugins.load" ;
193+ case "runtime.post-attach" :
194+ case "ready" :
195+ return "gateway.ready" ;
196+ default :
197+ return name ;
198+ }
199+ } ;
200+ const takeEventLoopSample = ( ) => {
175201 if ( ! eventLoopDelay ) {
176- return 0 ;
202+ return undefined ;
177203 }
178- const maxMs = eventLoopDelay . max / 1_000_000 ;
204+ const sample = {
205+ p50Ms : eventLoopDelay . percentile ( 50 ) / 1_000_000 ,
206+ p95Ms : eventLoopDelay . percentile ( 95 ) / 1_000_000 ,
207+ p99Ms : eventLoopDelay . percentile ( 99 ) / 1_000_000 ,
208+ maxMs : eventLoopDelay . max / 1_000_000 ,
209+ } ;
179210 eventLoopDelay . reset ( ) ;
180- return maxMs ;
211+ return sample ;
212+ } ;
213+ const emitEventLoopTimelineSample = (
214+ activeSpanName : string ,
215+ sample : ReturnType < typeof takeEventLoopSample > ,
216+ ) => {
217+ if ( ! eventLoopTimelineEnabled ) {
218+ return ;
219+ }
220+ if ( ! sample ) {
221+ return ;
222+ }
223+ emitDiagnosticsTimelineEvent ( {
224+ type : "eventLoop.sample" ,
225+ name : "eventLoop" ,
226+ phase : "startup" ,
227+ activeSpanName : mapTimelineName ( activeSpanName ) ,
228+ attributes :
229+ activeSpanName === mapTimelineName ( activeSpanName )
230+ ? undefined
231+ : { traceName : activeSpanName } ,
232+ ...sample ,
233+ } ) ;
181234 } ;
182235 const emit = (
183236 name : string ,
184237 durationMs : number ,
185238 totalMs : number ,
239+ eventLoopSample : ReturnType < typeof takeEventLoopSample > ,
186240 extras : ReadonlyArray < readonly [ string , number | string ] > = [ ] ,
187241 ) => {
188- if ( enabled ) {
242+ if ( logEnabled ) {
189243 const metrics = [
190- `eventLoopMax=${ readEventLoopMaxMs ( ) . toFixed ( 1 ) } ms` ,
244+ `eventLoopMax=${ ( eventLoopSample ?. maxMs ?? 0 ) . toFixed ( 1 ) } ms` ,
191245 ...extras . map ( ( [ key , value ] ) => formatMetric ( key , value ) ) ,
192246 ] . join ( " " ) ;
193247 log . info (
@@ -198,27 +252,78 @@ function createGatewayStartupTrace() {
198252 return {
199253 mark ( name : string ) {
200254 const now = performance . now ( ) ;
201- emit ( name , now - last , now - started ) ;
255+ const eventLoopSample = takeEventLoopSample ( ) ;
256+ emit ( name , now - last , now - started , eventLoopSample ) ;
257+ emitDiagnosticsTimelineEvent ( {
258+ type : "mark" ,
259+ name : mapTimelineName ( name ) ,
260+ phase : "startup" ,
261+ durationMs : now - started ,
262+ attributes : name === mapTimelineName ( name ) ? undefined : { traceName : name } ,
263+ } ) ;
264+ emitEventLoopTimelineSample ( name , eventLoopSample ) ;
202265 last = now ;
203266 if ( name === "ready" ) {
204267 eventLoopDelay ?. disable ( ) ;
205268 }
206269 } ,
207270 detail ( name : string , metrics : ReadonlyArray < readonly [ string , number | string ] > ) {
208- if ( ! enabled ) {
209- return ;
271+ const attributes = Object . fromEntries ( metrics ) ;
272+ if ( logEnabled ) {
273+ log . info (
274+ `startup trace: ${ name } ${ metrics . map ( ( [ key , value ] ) => formatMetric ( key , value ) ) . join ( " " ) } ` ,
275+ ) ;
210276 }
211- log . info (
212- `startup trace: ${ name } ${ metrics . map ( ( [ key , value ] ) => formatMetric ( key , value ) ) . join ( " " ) } ` ,
213- ) ;
277+ emitDiagnosticsTimelineEvent ( {
278+ type : "mark" ,
279+ name : mapTimelineName ( name ) ,
280+ phase : "startup" ,
281+ attributes : {
282+ traceName : name ,
283+ ...attributes ,
284+ } ,
285+ } ) ;
214286 } ,
215287 async measure < T > ( name : string , run : ( ) => Promise < T > | T ) : Promise < T > {
216288 const before = performance . now ( ) ;
289+ const spanId = `gateway-startup-${ ++ spanSequence } ` ;
290+ emitDiagnosticsTimelineEvent ( {
291+ type : "span.start" ,
292+ name : mapTimelineName ( name ) ,
293+ phase : "startup" ,
294+ spanId,
295+ attributes : name === mapTimelineName ( name ) ? undefined : { traceName : name } ,
296+ } ) ;
217297 try {
218- return await run ( ) ;
298+ const result = await run ( ) ;
299+ const now = performance . now ( ) ;
300+ emitDiagnosticsTimelineEvent ( {
301+ type : "span.end" ,
302+ name : mapTimelineName ( name ) ,
303+ phase : "startup" ,
304+ spanId,
305+ durationMs : now - before ,
306+ attributes : name === mapTimelineName ( name ) ? undefined : { traceName : name } ,
307+ } ) ;
308+ return result ;
309+ } catch ( error ) {
310+ const now = performance . now ( ) ;
311+ emitDiagnosticsTimelineEvent ( {
312+ type : "span.error" ,
313+ name : mapTimelineName ( name ) ,
314+ phase : "startup" ,
315+ spanId,
316+ durationMs : now - before ,
317+ attributes : name === mapTimelineName ( name ) ? undefined : { traceName : name } ,
318+ errorName : error instanceof Error ? error . name : typeof error ,
319+ errorMessage : error instanceof Error ? error . message : String ( error ) ,
320+ } ) ;
321+ throw error ;
219322 } finally {
220323 const now = performance . now ( ) ;
221- emit ( name , now - before , now - started ) ;
324+ const eventLoopSample = takeEventLoopSample ( ) ;
325+ emit ( name , now - before , now - started , eventLoopSample ) ;
326+ emitEventLoopTimelineSample ( name , eventLoopSample ) ;
222327 last = now ;
223328 }
224329 } ,
0 commit comments