-
Notifications
You must be signed in to change notification settings - Fork 367
Expand file tree
/
Copy pathmetrics.go
More file actions
263 lines (227 loc) · 9.25 KB
/
metrics.go
File metadata and controls
263 lines (227 loc) · 9.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
package workflow
import (
"encoding/json"
"sort"
"strings"
"time"
"github.com/github/gh-aw/pkg/logger"
"github.com/github/gh-aw/pkg/typeutil"
)
var metricsLog = logger.New("workflow:metrics")
// ToolCallInfo represents statistics for a single tool
type ToolCallInfo struct {
Name string // Prettified tool name (e.g., "github::search_issues", "bash")
CallCount int // Number of times this tool was called
MaxInputSize int // Maximum input size in tokens for any call
MaxOutputSize int // Maximum output size in tokens for any call
MaxDuration time.Duration // Maximum execution duration for any call
}
// LogMetrics represents extracted metrics from log files
type LogMetrics struct {
TokenUsage int
EstimatedCost float64
Turns int // Number of turns needed to complete the task
ToolCalls []ToolCallInfo // Tool call statistics
ToolSequences [][]string // Sequences of tool calls preserving order
AvgTimeBetweenTurns time.Duration // Mean time between consecutive LLM API calls (computed from per-turn timestamps when available)
MaxTimeBetweenTurns time.Duration // Maximum time between any two consecutive LLM API calls
MedianTimeBetweenTurns time.Duration // Median time between consecutive LLM API calls
// StdDevTimeBetweenTurns is the sample standard deviation (Bessel's correction, n-1
// denominator) of inter-turn intervals, treating the observed turns as a sample of
// the agent's execution behaviour rather than an exhaustive population.
StdDevTimeBetweenTurns time.Duration
// Timestamp removed - use GitHub API timestamps instead of parsing from logs
}
// ExtractJSONMetrics extracts metrics from streaming JSON log lines
func ExtractJSONMetrics(line string, verbose bool) LogMetrics {
var metrics LogMetrics
// Trim the line first
trimmed := strings.TrimSpace(line)
if trimmed == "" {
return metrics
}
metricsLog.Printf("Extracting metrics from JSON line: line_length=%d", len(trimmed))
// If the line isn't a clean JSON object, try to extract a JSON object substring
jsonStr := trimmed
if !strings.HasPrefix(trimmed, "{") || !strings.HasSuffix(trimmed, "}") {
// Find first '{' and last '}' and attempt to parse that slice
open := strings.Index(trimmed, "{")
close := strings.LastIndex(trimmed, "}")
if open == -1 || close == -1 || close <= open {
return metrics
}
jsonStr = trimmed[open : close+1]
}
// Try to parse as generic JSON
var jsonData map[string]any
if err := json.Unmarshal([]byte(jsonStr), &jsonData); err != nil {
// If parsing fails, try a relaxed approach: sometimes logs contain a JSON-like object with single quotes
// Replace single quotes with double quotes as a last resort (not ideal, but helpful for noisy logs)
relaxed := strings.ReplaceAll(jsonStr, "'", "\"")
if err2 := json.Unmarshal([]byte(relaxed), &jsonData); err2 != nil {
return metrics
}
}
// Extract token usage from various possible fields and structures
if tokens := ExtractJSONTokenUsage(jsonData); tokens > 0 {
metrics.TokenUsage = tokens
}
// Extract cost information from various possible fields
if cost := ExtractJSONCost(jsonData); cost > 0 {
metrics.EstimatedCost = cost
}
return metrics
}
// ExtractJSONTokenUsage extracts token usage from JSON data
func ExtractJSONTokenUsage(data map[string]any) int {
// Prefer explicit input+output sums at the top-level
inputTop := typeutil.ConvertToInt(data["input_tokens"])
outputTop := typeutil.ConvertToInt(data["output_tokens"])
if inputTop > 0 || outputTop > 0 {
totalTokens := inputTop + outputTop
if metricsLog.Enabled() {
metricsLog.Printf("Token usage extracted: input=%d, output=%d, total=%d", inputTop, outputTop, totalTokens)
}
return totalTokens
}
// Check top-level token fields that represent a single total value
tokenFields := []string{"tokens", "token_count", "total_tokens"}
for _, field := range tokenFields {
if val, exists := data[field]; exists {
if tokens := typeutil.ConvertToInt(val); tokens > 0 {
return tokens
}
}
}
// Check nested usage objects (Claude and OpenAI API formats)
if usage, exists := data["usage"]; exists {
if usageMap, ok := usage.(map[string]any); ok {
// Claude format: {"usage": {"input_tokens": 10, "output_tokens": 5, "cache_creation_input_tokens": 100, "cache_read_input_tokens": 200}}
inputTokens := typeutil.ConvertToInt(usageMap["input_tokens"])
outputTokens := typeutil.ConvertToInt(usageMap["output_tokens"])
cacheCreationTokens := typeutil.ConvertToInt(usageMap["cache_creation_input_tokens"])
cacheReadTokens := typeutil.ConvertToInt(usageMap["cache_read_input_tokens"])
// OpenAI format: {"usage": {"prompt_tokens": 100, "completion_tokens": 50}}
// If Claude fields are not present, try OpenAI fields
if inputTokens == 0 {
inputTokens = typeutil.ConvertToInt(usageMap["prompt_tokens"])
}
if outputTokens == 0 {
outputTokens = typeutil.ConvertToInt(usageMap["completion_tokens"])
}
totalTokens := inputTokens + outputTokens + cacheCreationTokens + cacheReadTokens
if totalTokens > 0 {
return totalTokens
}
// Generic token count fields inside usage
for _, field := range tokenFields {
if val, exists := usageMap[field]; exists {
if tokens := typeutil.ConvertToInt(val); tokens > 0 {
return tokens
}
}
}
}
}
// Check for delta structures (streaming format)
if delta, exists := data["delta"]; exists {
if deltaMap, ok := delta.(map[string]any); ok {
if usage, exists := deltaMap["usage"]; exists {
if usageMap, ok := usage.(map[string]any); ok {
inputTokens := typeutil.ConvertToInt(usageMap["input_tokens"])
outputTokens := typeutil.ConvertToInt(usageMap["output_tokens"])
if inputTokens > 0 || outputTokens > 0 {
return inputTokens + outputTokens
}
}
}
}
}
return 0
}
// ExtractJSONCost extracts cost information from JSON data
func ExtractJSONCost(data map[string]any) float64 {
// Common cost field names
costFields := []string{"total_cost_usd", "cost", "price", "amount", "total_cost", "estimated_cost"}
// Prefer explicit total_cost_usd at top-level
if val, exists := data["total_cost_usd"]; exists {
if cost := typeutil.ConvertToFloat(val); cost > 0 {
if metricsLog.Enabled() {
metricsLog.Printf("Cost extracted: value=%.6f", cost)
}
return cost
}
}
for _, field := range costFields {
if val, exists := data[field]; exists {
if cost := typeutil.ConvertToFloat(val); cost > 0 {
return cost
}
}
}
// Check nested billing or pricing objects
if billing, exists := data["billing"]; exists {
if billingMap, ok := billing.(map[string]any); ok {
for _, field := range costFields {
if val, exists := billingMap[field]; exists {
if cost := typeutil.ConvertToFloat(val); cost > 0 {
return cost
}
}
}
}
}
return 0
}
// FinalizeToolMetricsOptions holds the options for FinalizeToolMetrics
type FinalizeToolMetricsOptions struct {
Metrics *LogMetrics
ToolCallMap map[string]*ToolCallInfo
CurrentSequence []string
Turns int
TokenUsage int
}
// FinalizeToolMetrics completes the metric collection process by finalizing sequences,
// converting tool call maps to sorted slices, and optionally counting errors using patterns.
// This function is called by engine-specific ParseLogMetrics implementations to avoid code duplication.
func FinalizeToolMetrics(opts FinalizeToolMetricsOptions) {
// Add final sequence if any
if len(opts.CurrentSequence) > 0 {
opts.Metrics.ToolSequences = append(opts.Metrics.ToolSequences, opts.CurrentSequence)
}
opts.Metrics.TokenUsage = opts.TokenUsage
opts.Metrics.Turns = opts.Turns
// Convert tool call map to slice
for _, toolInfo := range opts.ToolCallMap {
opts.Metrics.ToolCalls = append(opts.Metrics.ToolCalls, *toolInfo)
}
// Sort tool calls by name for consistent output
sort.Slice(opts.Metrics.ToolCalls, func(i, j int) bool {
return opts.Metrics.ToolCalls[i].Name < opts.Metrics.ToolCalls[j].Name
})
metricsLog.Printf("FinalizeToolMetrics: turns=%d, tokenUsage=%d, toolCalls=%d, sequences=%d",
opts.Metrics.Turns, opts.Metrics.TokenUsage, len(opts.Metrics.ToolCalls), len(opts.Metrics.ToolSequences))
}
// FinalizeToolCallsAndSequence completes the tool call and sequence finalization.
// Use this function when the engine extracts token usage and turns from structured result entries,
// rather than accumulating them during line-by-line log parsing. This is a lighter version of
// FinalizeToolMetrics for engines that do not need to finalize token usage and turns here.
func FinalizeToolCallsAndSequence(
metrics *LogMetrics,
toolCallMap map[string]*ToolCallInfo,
currentSequence []string,
) {
// Add final sequence if any
if len(currentSequence) > 0 {
metrics.ToolSequences = append(metrics.ToolSequences, currentSequence)
}
// Convert tool call map to slice
for _, toolInfo := range toolCallMap {
metrics.ToolCalls = append(metrics.ToolCalls, *toolInfo)
}
// Sort tool calls by name for consistent output
sort.Slice(metrics.ToolCalls, func(i, j int) bool {
return metrics.ToolCalls[i].Name < metrics.ToolCalls[j].Name
})
metricsLog.Printf("FinalizeToolCallsAndSequence: toolCalls=%d, sequences=%d", len(metrics.ToolCalls), len(metrics.ToolSequences))
}