Skip to content

Commit 45d3b3f

Browse files
refactor: introduced execution plan and decoupling
1 parent c81dd2a commit 45d3b3f

14 files changed

Lines changed: 597 additions & 169 deletions

internal/auditlog/auditlog_test.go

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -413,18 +413,21 @@ func TestMiddleware_UsesIngressTooLargeFlagWithoutReadingStream(t *testing.T) {
413413
}
414414
}
415415

416-
func TestMiddleware_AppliesRequestModelResolution(t *testing.T) {
416+
func TestMiddleware_PrefersExecutionPlanOverLegacyResolution(t *testing.T) {
417417
e := echo.New()
418418
logger := &capturingLogger{
419419
cfg: Config{Enabled: true},
420420
}
421421

422422
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"anthropic/claude-opus-4-6"}`))
423-
req = req.WithContext(core.WithRequestModelResolution(req.Context(), &core.RequestModelResolution{
424-
RequestedModel: "anthropic/claude-opus-4-6",
425-
ResolvedSelector: core.ModelSelector{Provider: "openai", Model: "gpt-5-nano"},
426-
ProviderType: "openai",
427-
AliasApplied: true,
423+
req = req.WithContext(core.WithExecutionPlan(req.Context(), &core.ExecutionPlan{
424+
ProviderType: "openai",
425+
Resolution: &core.RequestModelResolution{
426+
RequestedModel: "anthropic/claude-opus-4-6",
427+
ResolvedSelector: core.ModelSelector{Provider: "openai", Model: "gpt-5-nano"},
428+
ProviderType: "openai",
429+
AliasApplied: true,
430+
},
428431
}))
429432

430433
rec := httptest.NewRecorder()
@@ -457,6 +460,43 @@ func TestMiddleware_AppliesRequestModelResolution(t *testing.T) {
457460
}
458461
}
459462

463+
func TestMiddleware_DoesNotApplyModelMetadataWithoutExecutionPlan(t *testing.T) {
464+
e := echo.New()
465+
logger := &capturingLogger{
466+
cfg: Config{Enabled: true},
467+
}
468+
469+
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"legacy-only"}`))
470+
471+
rec := httptest.NewRecorder()
472+
c := e.NewContext(req, rec)
473+
474+
handler := Middleware(logger)(func(c *echo.Context) error {
475+
return c.NoContent(http.StatusNoContent)
476+
})
477+
478+
if err := handler(c); err != nil {
479+
t.Fatalf("handler returned error: %v", err)
480+
}
481+
if len(logger.entries) != 1 {
482+
t.Fatalf("len(entries) = %d, want 1", len(logger.entries))
483+
}
484+
485+
entry := logger.entries[0]
486+
if entry.Model != "" {
487+
t.Fatalf("Model = %q, want empty", entry.Model)
488+
}
489+
if entry.ResolvedModel != "" {
490+
t.Fatalf("ResolvedModel = %q, want empty", entry.ResolvedModel)
491+
}
492+
if entry.Provider != "" {
493+
t.Fatalf("Provider = %q, want empty", entry.Provider)
494+
}
495+
if entry.AliasUsed {
496+
t.Fatal("AliasUsed = true, want false")
497+
}
498+
}
499+
460500
func TestLoggerClose(t *testing.T) {
461501
store := &mockStore{}
462502
cfg := Config{

internal/auditlog/middleware.go

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ func Middleware(logger LoggerInterface) echo.MiddlewareFunc {
106106
// Execute the handler
107107
err := next(c)
108108

109-
applyRequestModelResolution(entry, c.Request().Context())
109+
applyExecutionPlan(entry, c.Request().Context())
110110

111111
// Calculate duration
112112
entry.DurationNs = time.Since(start).Nanoseconds()
@@ -156,34 +156,35 @@ func Middleware(logger LoggerInterface) echo.MiddlewareFunc {
156156
}
157157
}
158158

159-
func applyRequestModelResolution(entry *LogEntry, ctx context.Context) {
159+
func applyExecutionPlan(entry *LogEntry, ctx context.Context) {
160160
if entry == nil || ctx == nil {
161161
return
162162
}
163163

164-
resolution := core.GetRequestModelResolution(ctx)
165-
if resolution == nil {
166-
return
164+
if plan := core.GetExecutionPlan(ctx); plan != nil {
165+
enrichEntryWithExecutionPlan(entry, plan)
167166
}
168-
169-
enrichEntryWithResolution(entry, resolution)
170167
}
171168

172-
func enrichEntryWithResolution(entry *LogEntry, resolution *core.RequestModelResolution) {
173-
if entry == nil || resolution == nil {
169+
func enrichEntryWithExecutionPlan(entry *LogEntry, plan *core.ExecutionPlan) {
170+
if entry == nil || plan == nil {
174171
return
175172
}
176173

177-
if requestedModel := resolution.RequestedQualifiedModel(); requestedModel != "" {
174+
if requestedModel := plan.RequestedQualifiedModel(); requestedModel != "" {
178175
entry.Model = requestedModel
179176
}
180-
if resolvedModel := resolution.ResolvedQualifiedModel(); resolvedModel != "" {
177+
if resolvedModel := plan.ResolvedQualifiedModel(); resolvedModel != "" {
181178
entry.ResolvedModel = resolvedModel
182179
}
183-
if strings.TrimSpace(resolution.ProviderType) != "" {
184-
entry.Provider = strings.TrimSpace(resolution.ProviderType)
180+
if providerType := strings.TrimSpace(plan.ProviderType); providerType != "" {
181+
entry.Provider = providerType
182+
} else if plan.Resolution != nil && strings.TrimSpace(plan.Resolution.ProviderType) != "" {
183+
entry.Provider = strings.TrimSpace(plan.Resolution.ProviderType)
184+
}
185+
if plan.Resolution != nil {
186+
entry.AliasUsed = plan.Resolution.AliasApplied
185187
}
186-
entry.AliasUsed = resolution.AliasApplied
187188
}
188189

189190
func captureRequestBodyForLogging(entry *LogEntry, req *http.Request) {
@@ -336,9 +337,10 @@ func EnrichEntry(c *echo.Context, model, provider string) {
336337
entry.Provider = provider
337338
}
338339

339-
// EnrichEntryWithResolution attaches resolved model and alias metadata to the live audit entry.
340-
// This is used before handler execution completes so streaming audit entries inherit the same data.
341-
func EnrichEntryWithResolution(c *echo.Context, resolution *core.RequestModelResolution) {
340+
// EnrichEntryWithExecutionPlan attaches execution-plan metadata to the live
341+
// audit entry. This is preferred over resolution-only enrichment once planning
342+
// has completed for the request.
343+
func EnrichEntryWithExecutionPlan(c *echo.Context, plan *core.ExecutionPlan) {
342344
entryVal := c.Get(string(LogEntryKey))
343345
if entryVal == nil {
344346
return
@@ -349,7 +351,7 @@ func EnrichEntryWithResolution(c *echo.Context, resolution *core.RequestModelRes
349351
return
350352
}
351353

352-
enrichEntryWithResolution(entry, resolution)
354+
enrichEntryWithExecutionPlan(entry, plan)
353355
}
354356

355357
// EnrichEntryWithError adds error information to the log entry.

internal/core/context.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ const (
1212
requestSnapshotKey contextKey = "request-snapshot"
1313
// whiteBoxPromptKey stores the best-effort semantic extraction for the request.
1414
whiteBoxPromptKey contextKey = "white-box-prompt"
15-
// requestModelResolutionKey stores the resolved request selector chosen for execution.
16-
requestModelResolutionKey contextKey = "request-model-resolution"
15+
// executionPlanKey stores the request-scoped execution plan chosen for handling.
16+
executionPlanKey contextKey = "execution-plan"
1717
// batchPreparationMetadataKey stores request-scoped batch preprocessing metadata.
1818
batchPreparationMetadataKey contextKey = "batch-preparation-metadata"
1919

@@ -68,16 +68,16 @@ func GetWhiteBoxPrompt(ctx context.Context) *WhiteBoxPrompt {
6868
return nil
6969
}
7070

71-
// WithRequestModelResolution returns a new context with the resolved request selector attached.
72-
func WithRequestModelResolution(ctx context.Context, resolution *RequestModelResolution) context.Context {
73-
return context.WithValue(ctx, requestModelResolutionKey, resolution)
71+
// WithExecutionPlan returns a new context with the execution plan attached.
72+
func WithExecutionPlan(ctx context.Context, plan *ExecutionPlan) context.Context {
73+
return context.WithValue(ctx, executionPlanKey, plan)
7474
}
7575

76-
// GetRequestModelResolution retrieves the resolved request selector from the context.
77-
func GetRequestModelResolution(ctx context.Context) *RequestModelResolution {
78-
if v := ctx.Value(requestModelResolutionKey); v != nil {
79-
if resolution, ok := v.(*RequestModelResolution); ok {
80-
return resolution
76+
// GetExecutionPlan retrieves the execution plan from the context.
77+
func GetExecutionPlan(ctx context.Context) *ExecutionPlan {
78+
if v := ctx.Value(executionPlanKey); v != nil {
79+
if plan, ok := v.(*ExecutionPlan); ok {
80+
return plan
8181
}
8282
}
8383
return nil

internal/core/execution_plan.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
package core
2+
3+
// ExecutionMode describes how the gateway intends to execute a request.
4+
type ExecutionMode string
5+
6+
const (
7+
ExecutionModeTranslated ExecutionMode = "translated"
8+
ExecutionModePassthrough ExecutionMode = "passthrough"
9+
ExecutionModeNativeBatch ExecutionMode = "native_batch"
10+
ExecutionModeNativeFile ExecutionMode = "native_file"
11+
)
12+
13+
// CapabilitySet advertises the gateway behaviors that are valid for a request.
14+
// This is intentionally small and pragmatic for the initial planning slice.
15+
type CapabilitySet struct {
16+
SemanticExtraction bool
17+
AliasResolution bool
18+
Guardrails bool
19+
RequestPatching bool
20+
UsageTracking bool
21+
ResponseCaching bool
22+
Streaming bool
23+
Passthrough bool
24+
}
25+
26+
// CapabilitiesForEndpoint returns the current capability set for one endpoint.
27+
func CapabilitiesForEndpoint(desc EndpointDescriptor) CapabilitySet {
28+
switch desc.Operation {
29+
case "chat_completions", "responses":
30+
return CapabilitySet{
31+
SemanticExtraction: true,
32+
AliasResolution: true,
33+
Guardrails: true,
34+
RequestPatching: true,
35+
UsageTracking: true,
36+
ResponseCaching: true,
37+
Streaming: true,
38+
}
39+
case "embeddings":
40+
return CapabilitySet{
41+
SemanticExtraction: true,
42+
AliasResolution: true,
43+
UsageTracking: true,
44+
ResponseCaching: true,
45+
}
46+
case "batches":
47+
return CapabilitySet{
48+
SemanticExtraction: true,
49+
AliasResolution: true,
50+
Guardrails: true,
51+
RequestPatching: true,
52+
UsageTracking: true,
53+
}
54+
case "files":
55+
return CapabilitySet{
56+
SemanticExtraction: true,
57+
}
58+
case "provider_passthrough":
59+
return CapabilitySet{
60+
SemanticExtraction: true,
61+
Passthrough: true,
62+
}
63+
default:
64+
return CapabilitySet{}
65+
}
66+
}
67+
68+
// ExecutionPlan is the request-scoped control-plane result consumed by later
69+
// execution stages. It carries the resolved execution mode, endpoint
70+
// capabilities, and any model routing decision already made for the request.
71+
type ExecutionPlan struct {
72+
RequestID string
73+
Endpoint EndpointDescriptor
74+
Mode ExecutionMode
75+
Capabilities CapabilitySet
76+
ProviderType string
77+
Resolution *RequestModelResolution
78+
}
79+
80+
// RequestedQualifiedModel returns the requested model selector when present.
81+
func (p *ExecutionPlan) RequestedQualifiedModel() string {
82+
if p == nil || p.Resolution == nil {
83+
return ""
84+
}
85+
return p.Resolution.RequestedQualifiedModel()
86+
}
87+
88+
// ResolvedQualifiedModel returns the resolved model selector when present.
89+
func (p *ExecutionPlan) ResolvedQualifiedModel() string {
90+
if p == nil || p.Resolution == nil {
91+
return ""
92+
}
93+
return p.Resolution.ResolvedQualifiedModel()
94+
}

internal/responsecache/middleware_test.go

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,39 @@ func TestSimpleCacheMiddleware_DifferentBodyDifferentKey(t *testing.T) {
8989
func TestHashRequest_ResolvedModelChangesKey(t *testing.T) {
9090
body := []byte(`{"model":"anthropic/claude-opus-4-6","messages":[{"role":"user","content":"hi"}]}`)
9191

92-
first := hashRequest("/v1/chat/completions", body, &core.RequestModelResolution{
93-
ResolvedSelector: core.ModelSelector{Provider: "openai", Model: "gpt-5-nano"},
92+
first := hashRequest("/v1/chat/completions", body, &core.ExecutionPlan{
93+
Mode: core.ExecutionModeTranslated,
94+
Resolution: &core.RequestModelResolution{
95+
ResolvedSelector: core.ModelSelector{Provider: "openai", Model: "gpt-5-nano"},
96+
},
9497
})
95-
second := hashRequest("/v1/chat/completions", body, &core.RequestModelResolution{
96-
ResolvedSelector: core.ModelSelector{Provider: "anthropic", Model: "claude-opus-4-6"},
98+
second := hashRequest("/v1/chat/completions", body, &core.ExecutionPlan{
99+
Mode: core.ExecutionModeTranslated,
100+
Resolution: &core.RequestModelResolution{
101+
ResolvedSelector: core.ModelSelector{Provider: "anthropic", Model: "claude-opus-4-6"},
102+
},
97103
})
98104

99105
if first == second {
100106
t.Fatal("resolved model should affect cache key")
101107
}
102108
}
103109

110+
func TestHashRequest_ModeChangesKey(t *testing.T) {
111+
body := []byte(`{"model":"gpt-4","messages":[{"role":"user","content":"hi"}]}`)
112+
113+
first := hashRequest("/v1/chat/completions", body, &core.ExecutionPlan{
114+
Mode: core.ExecutionModeTranslated,
115+
})
116+
second := hashRequest("/v1/chat/completions", body, &core.ExecutionPlan{
117+
Mode: core.ExecutionModePassthrough,
118+
})
119+
120+
if first == second {
121+
t.Fatal("execution mode should affect cache key")
122+
}
123+
}
124+
104125
func TestSimpleCacheMiddleware_SkipsStreaming(t *testing.T) {
105126
store := cache.NewMapStore()
106127
defer store.Close()

internal/responsecache/simple.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ func (m *simpleCacheMiddleware) Middleware() echo.MiddlewareFunc {
5656
if isStreamingRequest(path, body) {
5757
return next(c)
5858
}
59-
key := hashRequest(path, body, core.GetRequestModelResolution(c.Request().Context()))
59+
key := hashRequest(path, body, core.GetExecutionPlan(c.Request().Context()))
6060
ctx := c.Request().Context()
6161
cached, err := m.store.Get(ctx, key)
6262
if err != nil {
@@ -132,12 +132,16 @@ func isStreamingRequest(path string, body []byte) bool {
132132
return p.Stream != nil && *p.Stream
133133
}
134134

135-
func hashRequest(path string, body []byte, resolution *core.RequestModelResolution) string {
135+
func hashRequest(path string, body []byte, plan *core.ExecutionPlan) string {
136136
h := sha256.New()
137137
h.Write([]byte(path))
138138
h.Write([]byte{0})
139-
if resolution != nil {
140-
h.Write([]byte(resolution.ResolvedQualifiedModel()))
139+
if plan != nil {
140+
h.Write([]byte(plan.Mode))
141+
h.Write([]byte{0})
142+
h.Write([]byte(plan.ProviderType))
143+
h.Write([]byte{0})
144+
h.Write([]byte(plan.ResolvedQualifiedModel()))
141145
h.Write([]byte{0})
142146
}
143147
h.Write(body)

0 commit comments

Comments
 (0)