问题描述
难以追踪单次请求的完整链路:
- 无法确定延迟瓶颈在哪个阶段
- 错误定位困难
- 缺乏工具调用的详细耗时
- LLM 调用无法精确统计
建议方案
追踪上下文
// ai/tracing/context.go
type TracingContext struct {
TraceID string
SpanID string
ParentID string
StartTime time.Time
UserID int32
SessionID string
AgentType string
// 各阶段 Span
Phases map[string]*PhaseSpan
ToolCalls []ToolCallSpan
LLMCalls []LLMCallSpan
mu sync.Mutex
}
type PhaseSpan struct {
Name string
StartTime time.Time
EndTime time.Time
Duration time.Duration
Metadata map[string]any
Error string
}
type ToolCallSpan struct {
ToolName string
StartTime time.Time
Duration time.Duration
Input string
Output string
Success bool
Error string
}
type LLMCallSpan struct {
Purpose string // "planning", "synthesis"
Model string
StartTime time.Time
Duration time.Duration
InputTokens int
OutputTokens int
CacheHit bool
}
埋点示例
func (p *AmazingParrot) ExecuteWithCallback(...) error {
trace := tracing.NewContext()
trace.SetUserID(p.userID)
trace.SetAgentType(p.Name())
defer trace.Finish()
// Phase 1: Planning
trace.StartPhase("planning")
plan, err := p.planRetrieval(ctx, ...)
trace.EndPhase("planning", map[string]any{
"needs_memo": plan.needsMemoSearch,
"needs_schedule": plan.needsScheduleQuery,
})
// Phase 2: Retrieval
trace.StartPhase("retrieval")
for _, tool := range tools {
toolSpan := trace.StartToolCall(tool.Name())
result, err := tool.Execute(ctx, input)
trace.EndToolCall(toolSpan, result, err)
}
trace.EndPhase("retrieval", nil)
// Phase 3: Synthesis
trace.StartPhase("synthesis")
answer, err := p.synthesizeAnswer(ctx, ...)
trace.EndPhase("synthesis", nil)
// 导出 trace
trace.Export()
return nil
}
导出器
// ai/tracing/exporter.go
type Exporter interface {
Export(trace *TracingContext) error
}
// 日志导出
type LogExporter struct {
logger *slog.Logger
}
func (e *LogExporter) Export(trace *TracingContext) error {
e.logger.Info("trace completed",
"trace_id", trace.TraceID,
"total_duration_ms", trace.TotalDuration().Milliseconds(),
"phases", trace.PhaseSummary(),
"tool_calls", len(trace.ToolCalls),
"llm_calls", len(trace.LLMCalls),
)
return nil
}
// Jaeger/Zipkin 导出 (可选)
type JaegerExporter struct {
client *jaeger.Client
}
查询 API
// GET /api/v1/ai/traces?user_id=123&limit=10
type TraceQueryResponse struct {
Traces []TraceSummary `json:"traces"`
Total int `json:"total"`
}
type TraceSummary struct {
TraceID string `json:"trace_id"`
StartTime time.Time `json:"start_time"`
TotalDuration time.Duration `json:"total_duration"`
AgentType string `json:"agent_type"`
Success bool `json:"success"`
Phases []PhaseSummary `json:"phases"`
}
文件变更
ai/tracing/context.go - 追踪上下文
ai/tracing/exporter.go - 导出器接口和实现
ai/tracing/storage.go - trace 存储
ai/agent/amazing_parrot.go - 埋点
server/router/api/v1/ai_trace.go - 查询 API
验收标准
预估工时
2-3 天
相关 Issue
- 依赖: 无
- 相关: #XX (Prometheus 指标)
问题描述
难以追踪单次请求的完整链路:
建议方案
追踪上下文
埋点示例
导出器
查询 API
文件变更
ai/tracing/context.go- 追踪上下文ai/tracing/exporter.go- 导出器接口和实现ai/tracing/storage.go- trace 存储ai/agent/amazing_parrot.go- 埋点server/router/api/v1/ai_trace.go- 查询 API验收标准
预估工时
2-3 天
相关 Issue