ENTERPILOT
diff --git a/‎internal/auditlog/auditlog_test.go‎
Lines changed: 46 additions & 6 deletions b/‎internal/auditlog/auditlog_test.go‎
Lines changed: 46 additions & 6 deletions
diff --git a/‎internal/auditlog/middleware.go‎
Lines changed: 20 additions & 18 deletions b/‎internal/auditlog/middleware.go‎
Lines changed: 20 additions & 18 deletions
diff --git a/‎internal/core/context.go‎
Lines changed: 10 additions & 10 deletions b/‎internal/core/context.go‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎internal/core/execution_plan.go‎
Lines changed: 94 additions & 0 deletions b/‎internal/core/execution_plan.go‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎internal/responsecache/middleware_test.go‎
Lines changed: 25 additions & 4 deletions b/‎internal/responsecache/middleware_test.go‎
Lines changed: 25 additions & 4 deletions
diff --git a/‎internal/responsecache/simple.go‎
Lines changed: 8 additions & 4 deletions b/‎internal/responsecache/simple.go‎
Lines changed: 8 additions & 4 deletions
@@ -413,18 +413,21 @@ func TestMiddleware_UsesIngressTooLargeFlagWithoutReadingStream(t *testing.T) {
 	}
 }
 
-func TestMiddleware_AppliesRequestModelResolution(t *testing.T) {
+func TestMiddleware_PrefersExecutionPlanOverLegacyResolution(t *testing.T) {
 	e := echo.New()
 	logger := &capturingLogger{
 		cfg: Config{Enabled: true},
 	}
 
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"anthropic/claude-opus-4-6"}`))
-	req = req.WithContext(core.WithRequestModelResolution(req.Context(), &core.RequestModelResolution{
-		RequestedModel:   "anthropic/claude-opus-4-6",
-		ResolvedSelector: core.ModelSelector{Provider: "openai", Model: "gpt-5-nano"},
-		ProviderType:     "openai",
-		AliasApplied:     true,
+	req = req.WithContext(core.WithExecutionPlan(req.Context(), &core.ExecutionPlan{
+		ProviderType: "openai",
+		Resolution: &core.RequestModelResolution{
+			RequestedModel:   "anthropic/claude-opus-4-6",
+			ResolvedSelector: core.ModelSelector{Provider: "openai", Model: "gpt-5-nano"},
+			ProviderType:     "openai",
+			AliasApplied:     true,
+		},
 	}))
 
 	rec := httptest.NewRecorder()
@@ -457,6 +460,43 @@ func TestMiddleware_AppliesRequestModelResolution(t *testing.T) {
 	}
 }
 
+func TestMiddleware_DoesNotApplyModelMetadataWithoutExecutionPlan(t *testing.T) {
+	e := echo.New()
+	logger := &capturingLogger{
+		cfg: Config{Enabled: true},
+	}
+
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"legacy-only"}`))
+
+	rec := httptest.NewRecorder()
+	c := e.NewContext(req, rec)
+
+	handler := Middleware(logger)(func(c *echo.Context) error {
+		return c.NoContent(http.StatusNoContent)
+	})
+
+	if err := handler(c); err != nil {
+		t.Fatalf("handler returned error: %v", err)
+	}
+	if len(logger.entries) != 1 {
+		t.Fatalf("len(entries) = %d, want 1", len(logger.entries))
+	}
+
+	entry := logger.entries[0]
+	if entry.Model != "" {
+		t.Fatalf("Model = %q, want empty", entry.Model)
+	}
+	if entry.ResolvedModel != "" {
+		t.Fatalf("ResolvedModel = %q, want empty", entry.ResolvedModel)
+	}
+	if entry.Provider != "" {
+		t.Fatalf("Provider = %q, want empty", entry.Provider)
+	}
+	if entry.AliasUsed {
+		t.Fatal("AliasUsed = true, want false")
+	}
+}
+
 func TestLoggerClose(t *testing.T) {
 	store := &mockStore{}
 	cfg := Config{
 
@@ -106,7 +106,7 @@ func Middleware(logger LoggerInterface) echo.MiddlewareFunc {
 			// Execute the handler
 			err := next(c)
 
-			applyRequestModelResolution(entry, c.Request().Context())
+			applyExecutionPlan(entry, c.Request().Context())
 
 			// Calculate duration
 			entry.DurationNs = time.Since(start).Nanoseconds()
@@ -156,34 +156,35 @@ func Middleware(logger LoggerInterface) echo.MiddlewareFunc {
 	}
 }
 
-func applyRequestModelResolution(entry *LogEntry, ctx context.Context) {
+func applyExecutionPlan(entry *LogEntry, ctx context.Context) {
 	if entry == nil || ctx == nil {
 		return
 	}
 
-	resolution := core.GetRequestModelResolution(ctx)
-	if resolution == nil {
-		return
+	if plan := core.GetExecutionPlan(ctx); plan != nil {
+		enrichEntryWithExecutionPlan(entry, plan)
 	}
-
-	enrichEntryWithResolution(entry, resolution)
 }
 
-func enrichEntryWithResolution(entry *LogEntry, resolution *core.RequestModelResolution) {
-	if entry == nil || resolution == nil {
+func enrichEntryWithExecutionPlan(entry *LogEntry, plan *core.ExecutionPlan) {
+	if entry == nil || plan == nil {
 		return
 	}
 
-	if requestedModel := resolution.RequestedQualifiedModel(); requestedModel != "" {
+	if requestedModel := plan.RequestedQualifiedModel(); requestedModel != "" {
 		entry.Model = requestedModel
 	}
-	if resolvedModel := resolution.ResolvedQualifiedModel(); resolvedModel != "" {
+	if resolvedModel := plan.ResolvedQualifiedModel(); resolvedModel != "" {
 		entry.ResolvedModel = resolvedModel
 	}
-	if strings.TrimSpace(resolution.ProviderType) != "" {
-		entry.Provider = strings.TrimSpace(resolution.ProviderType)
+	if providerType := strings.TrimSpace(plan.ProviderType); providerType != "" {
+		entry.Provider = providerType
+	} else if plan.Resolution != nil && strings.TrimSpace(plan.Resolution.ProviderType) != "" {
+		entry.Provider = strings.TrimSpace(plan.Resolution.ProviderType)
+	}
+	if plan.Resolution != nil {
+		entry.AliasUsed = plan.Resolution.AliasApplied
 	}
-	entry.AliasUsed = resolution.AliasApplied
 }
 
 func captureRequestBodyForLogging(entry *LogEntry, req *http.Request) {
@@ -336,9 +337,10 @@ func EnrichEntry(c *echo.Context, model, provider string) {
 	entry.Provider = provider
 }
 
-// EnrichEntryWithResolution attaches resolved model and alias metadata to the live audit entry.
-// This is used before handler execution completes so streaming audit entries inherit the same data.
-func EnrichEntryWithResolution(c *echo.Context, resolution *core.RequestModelResolution) {
+// EnrichEntryWithExecutionPlan attaches execution-plan metadata to the live
+// audit entry. This is preferred over resolution-only enrichment once planning
+// has completed for the request.
+func EnrichEntryWithExecutionPlan(c *echo.Context, plan *core.ExecutionPlan) {
 	entryVal := c.Get(string(LogEntryKey))
 	if entryVal == nil {
 		return
@@ -349,7 +351,7 @@ func EnrichEntryWithResolution(c *echo.Context, resolution *core.RequestModelRes
 		return
 	}
 
-	enrichEntryWithResolution(entry, resolution)
+	enrichEntryWithExecutionPlan(entry, plan)
 }
 
 // EnrichEntryWithError adds error information to the log entry.
 
@@ -12,8 +12,8 @@ const (
 	requestSnapshotKey contextKey = "request-snapshot"
 	// whiteBoxPromptKey stores the best-effort semantic extraction for the request.
 	whiteBoxPromptKey contextKey = "white-box-prompt"
-	// requestModelResolutionKey stores the resolved request selector chosen for execution.
-	requestModelResolutionKey contextKey = "request-model-resolution"
+	// executionPlanKey stores the request-scoped execution plan chosen for handling.
+	executionPlanKey contextKey = "execution-plan"
 	// batchPreparationMetadataKey stores request-scoped batch preprocessing metadata.
 	batchPreparationMetadataKey contextKey = "batch-preparation-metadata"
 
@@ -68,16 +68,16 @@ func GetWhiteBoxPrompt(ctx context.Context) *WhiteBoxPrompt {
 	return nil
 }
 
-// WithRequestModelResolution returns a new context with the resolved request selector attached.
-func WithRequestModelResolution(ctx context.Context, resolution *RequestModelResolution) context.Context {
-	return context.WithValue(ctx, requestModelResolutionKey, resolution)
+// WithExecutionPlan returns a new context with the execution plan attached.
+func WithExecutionPlan(ctx context.Context, plan *ExecutionPlan) context.Context {
+	return context.WithValue(ctx, executionPlanKey, plan)
 }
 
-// GetRequestModelResolution retrieves the resolved request selector from the context.
-func GetRequestModelResolution(ctx context.Context) *RequestModelResolution {
-	if v := ctx.Value(requestModelResolutionKey); v != nil {
-		if resolution, ok := v.(*RequestModelResolution); ok {
-			return resolution
+// GetExecutionPlan retrieves the execution plan from the context.
+func GetExecutionPlan(ctx context.Context) *ExecutionPlan {
+	if v := ctx.Value(executionPlanKey); v != nil {
+		if plan, ok := v.(*ExecutionPlan); ok {
+			return plan
 		}
 	}
 	return nil
 
@@ -0,0 +1,94 @@
+package core
+
+// ExecutionMode describes how the gateway intends to execute a request.
+type ExecutionMode string
+
+const (
+	ExecutionModeTranslated  ExecutionMode = "translated"
+	ExecutionModePassthrough ExecutionMode = "passthrough"
+	ExecutionModeNativeBatch ExecutionMode = "native_batch"
+	ExecutionModeNativeFile  ExecutionMode = "native_file"
+)
+
+// CapabilitySet advertises the gateway behaviors that are valid for a request.
+// This is intentionally small and pragmatic for the initial planning slice.
+type CapabilitySet struct {
+	SemanticExtraction bool
+	AliasResolution    bool
+	Guardrails         bool
+	RequestPatching    bool
+	UsageTracking      bool
+	ResponseCaching    bool
+	Streaming          bool
+	Passthrough        bool
+}
+
+// CapabilitiesForEndpoint returns the current capability set for one endpoint.
+func CapabilitiesForEndpoint(desc EndpointDescriptor) CapabilitySet {
+	switch desc.Operation {
+	case "chat_completions", "responses":
+		return CapabilitySet{
+			SemanticExtraction: true,
+			AliasResolution:    true,
+			Guardrails:         true,
+			RequestPatching:    true,
+			UsageTracking:      true,
+			ResponseCaching:    true,
+			Streaming:          true,
+		}
+	case "embeddings":
+		return CapabilitySet{
+			SemanticExtraction: true,
+			AliasResolution:    true,
+			UsageTracking:      true,
+			ResponseCaching:    true,
+		}
+	case "batches":
+		return CapabilitySet{
+			SemanticExtraction: true,
+			AliasResolution:    true,
+			Guardrails:         true,
+			RequestPatching:    true,
+			UsageTracking:      true,
+		}
+	case "files":
+		return CapabilitySet{
+			SemanticExtraction: true,
+		}
+	case "provider_passthrough":
+		return CapabilitySet{
+			SemanticExtraction: true,
+			Passthrough:        true,
+		}
+	default:
+		return CapabilitySet{}
+	}
+}
+
+// ExecutionPlan is the request-scoped control-plane result consumed by later
+// execution stages. It carries the resolved execution mode, endpoint
+// capabilities, and any model routing decision already made for the request.
+type ExecutionPlan struct {
+	RequestID    string
+	Endpoint     EndpointDescriptor
+	Mode         ExecutionMode
+	Capabilities CapabilitySet
+	ProviderType string
+	Resolution   *RequestModelResolution
+}
+
+// RequestedQualifiedModel returns the requested model selector when present.
+func (p *ExecutionPlan) RequestedQualifiedModel() string {
+	if p == nil || p.Resolution == nil {
+		return ""
+	}
+	return p.Resolution.RequestedQualifiedModel()
+}
+
+// ResolvedQualifiedModel returns the resolved model selector when present.
+func (p *ExecutionPlan) ResolvedQualifiedModel() string {
+	if p == nil || p.Resolution == nil {
+		return ""
+	}
+	return p.Resolution.ResolvedQualifiedModel()
+}
@@ -89,18 +89,39 @@ func TestSimpleCacheMiddleware_DifferentBodyDifferentKey(t *testing.T) {
 func TestHashRequest_ResolvedModelChangesKey(t *testing.T) {
 	body := []byte(`{"model":"anthropic/claude-opus-4-6","messages":[{"role":"user","content":"hi"}]}`)
 
-	first := hashRequest("/v1/chat/completions", body, &core.RequestModelResolution{
-		ResolvedSelector: core.ModelSelector{Provider: "openai", Model: "gpt-5-nano"},
+	first := hashRequest("/v1/chat/completions", body, &core.ExecutionPlan{
+		Mode: core.ExecutionModeTranslated,
+		Resolution: &core.RequestModelResolution{
+			ResolvedSelector: core.ModelSelector{Provider: "openai", Model: "gpt-5-nano"},
+		},
 	})
-	second := hashRequest("/v1/chat/completions", body, &core.RequestModelResolution{
-		ResolvedSelector: core.ModelSelector{Provider: "anthropic", Model: "claude-opus-4-6"},
+	second := hashRequest("/v1/chat/completions", body, &core.ExecutionPlan{
+		Mode: core.ExecutionModeTranslated,
+		Resolution: &core.RequestModelResolution{
+			ResolvedSelector: core.ModelSelector{Provider: "anthropic", Model: "claude-opus-4-6"},
+		},
 	})
 
 	if first == second {
 		t.Fatal("resolved model should affect cache key")
 	}
 }
 
+func TestHashRequest_ModeChangesKey(t *testing.T) {
+	body := []byte(`{"model":"gpt-4","messages":[{"role":"user","content":"hi"}]}`)
+
+	first := hashRequest("/v1/chat/completions", body, &core.ExecutionPlan{
+		Mode: core.ExecutionModeTranslated,
+	})
+	second := hashRequest("/v1/chat/completions", body, &core.ExecutionPlan{
+		Mode: core.ExecutionModePassthrough,
+	})
+
+	if first == second {
+		t.Fatal("execution mode should affect cache key")
+	}
+}
+
 func TestSimpleCacheMiddleware_SkipsStreaming(t *testing.T) {
 	store := cache.NewMapStore()
 	defer store.Close()
 
@@ -56,7 +56,7 @@ func (m *simpleCacheMiddleware) Middleware() echo.MiddlewareFunc {
 			if isStreamingRequest(path, body) {
 				return next(c)
 			}
-			key := hashRequest(path, body, core.GetRequestModelResolution(c.Request().Context()))
+			key := hashRequest(path, body, core.GetExecutionPlan(c.Request().Context()))
 			ctx := c.Request().Context()
 			cached, err := m.store.Get(ctx, key)
 			if err != nil {
@@ -132,12 +132,16 @@ func isStreamingRequest(path string, body []byte) bool {
 	return p.Stream != nil && *p.Stream
 }
 
-func hashRequest(path string, body []byte, resolution *core.RequestModelResolution) string {
+func hashRequest(path string, body []byte, plan *core.ExecutionPlan) string {
 	h := sha256.New()
 	h.Write([]byte(path))
 	h.Write([]byte{0})
-	if resolution != nil {
-		h.Write([]byte(resolution.ResolvedQualifiedModel()))
+	if plan != nil {
+		h.Write([]byte(plan.Mode))
+		h.Write([]byte{0})
+		h.Write([]byte(plan.ProviderType))
+		h.Write([]byte{0})
+		h.Write([]byte(plan.ResolvedQualifiedModel()))
 		h.Write([]byte{0})
 	}
 	h.Write(body)
Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,7 @@ func Middleware(logger LoggerInterface) echo.MiddlewareFunc {`
`106`	`106`	`// Execute the handler`
`107`	`107`	`err := next(c)`
`108`	`108`
`109`		`- applyRequestModelResolution(entry, c.Request().Context())`
	`109`	`+ applyExecutionPlan(entry, c.Request().Context())`
`110`	`110`
`111`	`111`	`// Calculate duration`
`112`	`112`	`entry.DurationNs = time.Since(start).Nanoseconds()`
`@@ -156,34 +156,35 @@ func Middleware(logger LoggerInterface) echo.MiddlewareFunc {`
`156`	`156`	`}`
`157`	`157`	`}`
`158`	`158`
`159`		`-func applyRequestModelResolution(entry *LogEntry, ctx context.Context) {`
	`159`	`+func applyExecutionPlan(entry *LogEntry, ctx context.Context) {`
`160`	`160`	`if entry == nil \|\| ctx == nil {`
`161`	`161`	`return`
`162`	`162`	`}`
`163`	`163`
`164`		`- resolution := core.GetRequestModelResolution(ctx)`
`165`		`- if resolution == nil {`
`166`		`- return`
	`164`	`+ if plan := core.GetExecutionPlan(ctx); plan != nil {`
	`165`	`+ enrichEntryWithExecutionPlan(entry, plan)`
`167`	`166`	`}`
`168`		`-`
`169`		`- enrichEntryWithResolution(entry, resolution)`
`170`	`167`	`}`
`171`	`168`
`172`		`-func enrichEntryWithResolution(entry LogEntry, resolution core.RequestModelResolution) {`
`173`		`- if entry == nil \|\| resolution == nil {`
	`169`	`+func enrichEntryWithExecutionPlan(entry LogEntry, plan core.ExecutionPlan) {`
	`170`	`+ if entry == nil \|\| plan == nil {`
`174`	`171`	`return`
`175`	`172`	`}`
`176`	`173`
`177`		`- if requestedModel := resolution.RequestedQualifiedModel(); requestedModel != "" {`
	`174`	`+ if requestedModel := plan.RequestedQualifiedModel(); requestedModel != "" {`
`178`	`175`	`entry.Model = requestedModel`
`179`	`176`	`}`
`180`		`- if resolvedModel := resolution.ResolvedQualifiedModel(); resolvedModel != "" {`
	`177`	`+ if resolvedModel := plan.ResolvedQualifiedModel(); resolvedModel != "" {`
`181`	`178`	`entry.ResolvedModel = resolvedModel`
`182`	`179`	`}`
`183`		`- if strings.TrimSpace(resolution.ProviderType) != "" {`
`184`		`- entry.Provider = strings.TrimSpace(resolution.ProviderType)`
	`180`	`+ if providerType := strings.TrimSpace(plan.ProviderType); providerType != "" {`
	`181`	`+ entry.Provider = providerType`
	`182`	`+ } else if plan.Resolution != nil && strings.TrimSpace(plan.Resolution.ProviderType) != "" {`
	`183`	`+ entry.Provider = strings.TrimSpace(plan.Resolution.ProviderType)`
	`184`	`+ }`
	`185`	`+ if plan.Resolution != nil {`
	`186`	`+ entry.AliasUsed = plan.Resolution.AliasApplied`
`185`	`187`	`}`
`186`		`- entry.AliasUsed = resolution.AliasApplied`
`187`	`188`	`}`
`188`	`189`
`189`	`190`	`func captureRequestBodyForLogging(entry LogEntry, req http.Request) {`
`@@ -336,9 +337,10 @@ func EnrichEntry(c *echo.Context, model, provider string) {`
`336`	`337`	`entry.Provider = provider`
`337`	`338`	`}`
`338`	`339`
`339`		`-// EnrichEntryWithResolution attaches resolved model and alias metadata to the live audit entry.`
`340`		`-// This is used before handler execution completes so streaming audit entries inherit the same data.`
`341`		`-func EnrichEntryWithResolution(c echo.Context, resolution core.RequestModelResolution) {`
	`340`	`+// EnrichEntryWithExecutionPlan attaches execution-plan metadata to the live`
	`341`	`+// audit entry. This is preferred over resolution-only enrichment once planning`
	`342`	`+// has completed for the request.`
	`343`	`+func EnrichEntryWithExecutionPlan(c echo.Context, plan core.ExecutionPlan) {`
`342`	`344`	`entryVal := c.Get(string(LogEntryKey))`
`343`	`345`	`if entryVal == nil {`
`344`	`346`	`return`
`@@ -349,7 +351,7 @@ func EnrichEntryWithResolution(c echo.Context, resolution core.RequestModelRes`
`349`	`351`	`return`
`350`	`352`	`}`
`351`	`353`
`352`		`- enrichEntryWithResolution(entry, resolution)`
	`354`	`+ enrichEntryWithExecutionPlan(entry, plan)`
`353`	`355`	`}`
`354`	`356`
`355`	`357`	`// EnrichEntryWithError adds error information to the log entry.`