fix(release): adapt gpt-5 chat params and batch aliases

SantiagoDePolonia · SantiagoDePolonia · commit bc04271a5ce8 · 2026-04-01T13:06:59.000+02:00
diff --git a/docs/advanced/configuration.mdx b/docs/advanced/configuration.mdx
@@ -55,6 +55,12 @@ The most common way to configure GOModel. Set any of the variables below to over
 | `REDIS_TTL_MODELS`     | TTL in seconds for model cache      | `86400` (24h)    |
 | `REDIS_TTL_RESPONSES`  | TTL in seconds for response cache   | `3600` (1h)      |
 
+<Tip>
+  See [Cache](/features/cache) for exact-cache behavior, response headers,
+  analytics endpoints, and the note that `user_path` alone does not partition
+  the exact cache.
+</Tip>
+
 #### Storage
 
 Storage is shared by audit logging, usage tracking, and future features like IAM.
diff --git a/docs/docs.json b/docs/docs.json
@@ -15,6 +15,10 @@
                 "group": "Getting Started",
                 "pages": ["getting-started/quickstart"]
             },
+            {
+                "group": "Features",
+                "pages": ["features/cache"]
+            },
             {
                 "group": "Guides",
                 "pages": [
diff --git a/docs/features/cache.mdx b/docs/features/cache.mdx
@@ -0,0 +1,96 @@
+---
+title: "Cache"
+description: "How GOModel response caching works, how to enable it, and what is included in the exact-cache key."
+---
+
+## Overview
+
+GOModel ships with an exact-match response cache for non-streaming requests on:
+
+- `/v1/chat/completions`
+- `/v1/responses`
+- `/v1/embeddings`
+
+When a response is served from the exact cache, GOModel returns:
+
+```http
+X-Cache: HIT (exact)
+```
+
+Semantic caching is planned separately. The exact layer is the cache behavior
+available today.
+
+## Enable the exact cache
+
+Point response caching at Redis:
+
+```yaml
+cache:
+  response:
+    simple:
+      redis:
+        url: redis://localhost:6379
+        ttl: 3600
+```
+
+You can also configure it with environment variables:
+
+- `REDIS_URL`
+- `REDIS_KEY_RESPONSES`
+- `REDIS_TTL_RESPONSES`
+
+## What the exact cache keys on
+
+The exact cache hashes:
+
+- the request path
+- the resolved execution plan context used for execution
+  specifically execution mode, provider type, and resolved model
+- the final request body
+
+This means guardrails and workflows affect cache keys when they change the
+resolved execution plan or the final body sent through execution.
+
+You can bypass caching per request with:
+
+```http
+Cache-Control: no-cache
+```
+
+or:
+
+```http
+Cache-Control: no-store
+```
+
+## `user_path` behavior
+
+`user_path` is not added to the exact-cache key by itself.
+
+That is intentional. If two requests end up with the same path, resolved
+execution plan, and final request body, they can share the same exact-cache
+entry even when they originate from different `user_path` values.
+
+<Note>
+  If you need tenant or path-specific cache behavior, use a scoped workflow or
+  otherwise make the final request differ for that scope. `user_path` alone is
+  not an exact-cache partition key.
+</Note>
+
+Common patterns:
+
+- disable cache in a scoped workflow
+- use different scoped workflows for different `user_path` values
+- include scope-specific context so the final request body differs
+
+## Cache analytics
+
+When response caching and usage tracking are enabled, the admin API exposes a
+cached-only overview at:
+
+```text
+/admin/api/v1/cache/overview
+```
+
+Cached usage entries are also visible in the regular usage log and summary
+endpoints.
diff --git a/docs/getting-started/quickstart.mdx b/docs/getting-started/quickstart.mdx
@@ -79,6 +79,7 @@ Use one of those model IDs in your requests.
 
 ## Next Steps
 
+- Understand response caching: [Cache](/features/cache)
 - Configure production settings: [Configuration](/advanced/configuration)
 - Add request policies: [Guardrails](/advanced/guardrails)
 - Connect OpenClaw: [Using GOModel with OpenClaw](/guides/openclaw)
diff --git a/internal/aliases/batch_preparer_test.go b/internal/aliases/batch_preparer_test.go
@@ -93,3 +93,40 @@ func TestBatchPreparerRejectsAliasResolvedToDifferentProvider(t *testing.T) {
 		t.Fatalf("len(fileCreates) = %d, want 0", len(inner.fileCreates))
 	}
 }
+
+func TestBatchPreparerRejectsAliasResolvedToDifferentProviderWithExplicitProviderField(t *testing.T) {
+	catalog := newTestCatalog()
+	catalog.add("anthropic/claude-3-7-sonnet", "anthropic", core.Model{ID: "claude-3-7-sonnet", Object: "model"})
+
+	service, err := NewService(newMemoryStore(Alias{Name: "smart", TargetModel: "claude-3-7-sonnet", TargetProvider: "anthropic", Enabled: true}), catalog)
+	if err != nil {
+		t.Fatalf("NewService() error = %v", err)
+	}
+	if err := service.Refresh(context.Background()); err != nil {
+		t.Fatalf("Refresh() error = %v", err)
+	}
+
+	inner := newProviderMock()
+	inner.supported["anthropic/claude-3-7-sonnet"] = true
+	inner.providerType["anthropic/claude-3-7-sonnet"] = "anthropic"
+	inner.fileContent = &core.FileContentResponse{
+		ID:       "file_source",
+		Filename: "batch.jsonl",
+		Data:     []byte("{\"custom_id\":\"1\",\"method\":\"POST\",\"url\":\"/v1/chat/completions\",\"body\":{\"model\":\"smart\",\"provider\":\"openai\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}}\n"),
+	}
+
+	preparer := NewBatchPreparer(inner, service)
+	_, err = preparer.PrepareBatchRequest(context.Background(), "openai", &core.BatchRequest{
+		InputFileID: "file_source",
+		Endpoint:    "/v1/chat/completions",
+	})
+	if err == nil {
+		t.Fatal("PrepareBatchRequest() error = nil, want provider mismatch")
+	}
+	if !strings.Contains(err.Error(), `native batch supports a single provider per batch`) {
+		t.Fatalf("PrepareBatchRequest() error = %v, want mixed-provider validation error", err)
+	}
+	if len(inner.fileCreates) != 0 {
+		t.Fatalf("len(fileCreates) = %d, want 0", len(inner.fileCreates))
+	}
+}
diff --git a/internal/aliases/service.go b/internal/aliases/service.go
@@ -122,16 +122,24 @@ func (s *Service) Resolve(model, provider string) (Resolution, bool, error) {
 }
 
 func (s *Service) resolveRequested(requested core.RequestedModelSelector) (Resolution, bool, error) {
-	if !requested.ExplicitProvider {
-		if resolution, ok := s.resolveAlias(requested.Model); ok {
-			return resolution, true, nil
-		}
-	}
-
 	selector, err := requested.Normalize()
 	if err != nil {
 		return Resolution{}, false, err
 	}
+
+	if requested.ExplicitProvider {
+		if !s.catalog.Supports(selector.QualifiedModel()) {
+			if resolution, ok := s.resolveAlias(requested.Model); ok {
+				resolution.Requested = selector
+				return resolution, true, nil
+			}
+		}
+		return Resolution{Requested: selector, Resolved: selector}, false, nil
+	}
+
+	if resolution, ok := s.resolveAlias(requested.Model); ok {
+		return resolution, true, nil
+	}
 	return Resolution{Requested: selector, Resolved: selector}, false, nil
 }
 
diff --git a/internal/aliases/service_test.go b/internal/aliases/service_test.go
@@ -235,6 +235,35 @@ func TestServiceResolveAliasWithExplicitProviderAndSlashModel(t *testing.T) {
 	}
 }
 
+func TestServiceResolveAliasWithExplicitProviderFallbackWhenConcreteSelectorMissing(t *testing.T) {
+	catalog := newTestCatalog()
+	catalog.add("anthropic/claude-3-7-sonnet", "anthropic", core.Model{ID: "claude-3-7-sonnet", Object: "model"})
+
+	service, err := NewService(newMemoryStore(Alias{
+		Name:           "smart",
+		TargetModel:    "claude-3-7-sonnet",
+		TargetProvider: "anthropic",
+		Enabled:        true,
+	}), catalog)
+	if err != nil {
+		t.Fatalf("NewService() error = %v", err)
+	}
+	if err := service.Refresh(context.Background()); err != nil {
+		t.Fatalf("Refresh() error = %v", err)
+	}
+
+	selector, changed, err := service.ResolveModel(core.NewRequestedModelSelector("smart", "openai"))
+	if err != nil {
+		t.Fatalf("ResolveModel() error = %v", err)
+	}
+	if !changed {
+		t.Fatal("ResolveModel() changed = false, want true")
+	}
+	if got := selector.QualifiedModel(); got != "anthropic/claude-3-7-sonnet" {
+		t.Fatalf("resolved selector = %q, want anthropic/claude-3-7-sonnet", got)
+	}
+}
+
 func TestServiceUpsertRejectsQualifiedAliasChainsAndSelfTargets(t *testing.T) {
 	catalog := newTestCatalog()
 	catalog.add("gpt-4o", "openai", core.Model{ID: "gpt-4o", Object: "model"})
diff --git a/internal/providers/openai/openai.go b/internal/providers/openai/openai.go
@@ -89,18 +89,29 @@ func isOSeriesModel(model string) bool {
 	return len(m) >= 2 && m[0] == 'o' && m[1] >= '0' && m[1] <= '9'
 }
 
-// adaptForOSeries rewrites a ChatRequest body for OpenAI o-series models,
-// mapping max_tokens -> max_completion_tokens and dropping temperature while
-// preserving all unknown top-level JSON fields.
-func adaptForOSeries(req *core.ChatRequest) (any, error) {
+func isGPT5Model(model string) bool {
+	m := strings.ToLower(strings.TrimSpace(model))
+	return m == "gpt-5" || strings.HasPrefix(m, "gpt-5-")
+}
+
+// isReasoningChatModel reports whether the model follows OpenAI's reasoning
+// chat parameter rules for max_completion_tokens and temperature handling.
+func isReasoningChatModel(model string) bool {
+	return isOSeriesModel(model) || isGPT5Model(model)
+}
+
+// adaptForReasoningChat rewrites a ChatRequest body for OpenAI reasoning chat
+// models, mapping max_tokens -> max_completion_tokens and dropping temperature
+// while preserving all unknown top-level JSON fields.
+func adaptForReasoningChat(req *core.ChatRequest) (any, error) {
 	body, err := json.Marshal(req)
 	if err != nil {
-		return nil, core.NewInvalidRequestError("failed to marshal o-series request: "+err.Error(), err)
+		return nil, core.NewInvalidRequestError("failed to marshal reasoning request: "+err.Error(), err)
 	}
 
 	var raw map[string]json.RawMessage
 	if err := json.Unmarshal(body, &raw); err != nil {
-		return nil, core.NewInvalidRequestError("failed to decode o-series request payload: "+err.Error(), err)
+		return nil, core.NewInvalidRequestError("failed to decode reasoning request payload: "+err.Error(), err)
 	}
 	if maxTokens, ok := raw["max_tokens"]; ok {
 		raw["max_completion_tokens"] = maxTokens
@@ -113,8 +124,8 @@ func adaptForOSeries(req *core.ChatRequest) (any, error) {
 // chatRequestBody returns the appropriate request body for the model.
 // Reasoning models get parameter adaptation; others pass through as-is.
 func chatRequestBody(req *core.ChatRequest) (any, error) {
-	if isOSeriesModel(req.Model) {
-		return adaptForOSeries(req)
+	if isReasoningChatModel(req.Model) {
+		return adaptForReasoningChat(req)
 	}
 	return req, nil
 }
diff --git a/internal/providers/openai/openai_test.go b/internal/providers/openai/openai_test.go
diff --git a/internal/server/handlers_test.go b/internal/server/handlers_test.go
diff --git a/tests/e2e/release-e2e-scenarios.md b/tests/e2e/release-e2e-scenarios.md