Skip to content

Commit bc04271

Browse files
fix(release): adapt gpt-5 chat params and batch aliases
1 parent ca63f7e commit bc04271

11 files changed

Lines changed: 636 additions & 15 deletions

File tree

docs/advanced/configuration.mdx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ The most common way to configure GOModel. Set any of the variables below to over
5555
| `REDIS_TTL_MODELS` | TTL in seconds for model cache | `86400` (24h) |
5656
| `REDIS_TTL_RESPONSES` | TTL in seconds for response cache | `3600` (1h) |
5757

58+
<Tip>
59+
See [Cache](/features/cache) for exact-cache behavior, response headers,
60+
analytics endpoints, and the note that `user_path` alone does not partition
61+
the exact cache.
62+
</Tip>
63+
5864
#### Storage
5965

6066
Storage is shared by audit logging, usage tracking, and future features like IAM.

docs/docs.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
"group": "Getting Started",
1616
"pages": ["getting-started/quickstart"]
1717
},
18+
{
19+
"group": "Features",
20+
"pages": ["features/cache"]
21+
},
1822
{
1923
"group": "Guides",
2024
"pages": [

docs/features/cache.mdx

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
---
2+
title: "Cache"
3+
description: "How GOModel response caching works, how to enable it, and what is included in the exact-cache key."
4+
---
5+
6+
## Overview
7+
8+
GOModel ships with an exact-match response cache for non-streaming requests on:
9+
10+
- `/v1/chat/completions`
11+
- `/v1/responses`
12+
- `/v1/embeddings`
13+
14+
When a response is served from the exact cache, GOModel returns:
15+
16+
```http
17+
X-Cache: HIT (exact)
18+
```
19+
20+
Semantic caching is planned separately. The exact layer is the cache behavior
21+
available today.
22+
23+
## Enable the exact cache
24+
25+
Point response caching at Redis:
26+
27+
```yaml
28+
cache:
29+
response:
30+
simple:
31+
redis:
32+
url: redis://localhost:6379
33+
ttl: 3600
34+
```
35+
36+
You can also configure it with environment variables:
37+
38+
- `REDIS_URL`
39+
- `REDIS_KEY_RESPONSES`
40+
- `REDIS_TTL_RESPONSES`
41+
42+
## What the exact cache keys on
43+
44+
The exact cache hashes:
45+
46+
- the request path
47+
- the resolved execution plan context used for execution
48+
specifically execution mode, provider type, and resolved model
49+
- the final request body
50+
51+
This means guardrails and workflows affect cache keys when they change the
52+
resolved execution plan or the final body sent through execution.
53+
54+
You can bypass caching per request with:
55+
56+
```http
57+
Cache-Control: no-cache
58+
```
59+
60+
or:
61+
62+
```http
63+
Cache-Control: no-store
64+
```
65+
66+
## `user_path` behavior
67+
68+
`user_path` is not added to the exact-cache key by itself.
69+
70+
That is intentional. If two requests end up with the same path, resolved
71+
execution plan, and final request body, they can share the same exact-cache
72+
entry even when they originate from different `user_path` values.
73+
74+
<Note>
75+
If you need tenant or path-specific cache behavior, use a scoped workflow or
76+
otherwise make the final request differ for that scope. `user_path` alone is
77+
not an exact-cache partition key.
78+
</Note>
79+
80+
Common patterns:
81+
82+
- disable cache in a scoped workflow
83+
- use different scoped workflows for different `user_path` values
84+
- include scope-specific context so the final request body differs
85+
86+
## Cache analytics
87+
88+
When response caching and usage tracking are enabled, the admin API exposes a
89+
cached-only overview at:
90+
91+
```text
92+
/admin/api/v1/cache/overview
93+
```
94+
95+
Cached usage entries are also visible in the regular usage log and summary
96+
endpoints.

docs/getting-started/quickstart.mdx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ Use one of those model IDs in your requests.
7979

8080
## Next Steps
8181

82+
- Understand response caching: [Cache](/features/cache)
8283
- Configure production settings: [Configuration](/advanced/configuration)
8384
- Add request policies: [Guardrails](/advanced/guardrails)
8485
- Connect OpenClaw: [Using GOModel with OpenClaw](/guides/openclaw)

internal/aliases/batch_preparer_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,40 @@ func TestBatchPreparerRejectsAliasResolvedToDifferentProvider(t *testing.T) {
9393
t.Fatalf("len(fileCreates) = %d, want 0", len(inner.fileCreates))
9494
}
9595
}
96+
97+
func TestBatchPreparerRejectsAliasResolvedToDifferentProviderWithExplicitProviderField(t *testing.T) {
98+
catalog := newTestCatalog()
99+
catalog.add("anthropic/claude-3-7-sonnet", "anthropic", core.Model{ID: "claude-3-7-sonnet", Object: "model"})
100+
101+
service, err := NewService(newMemoryStore(Alias{Name: "smart", TargetModel: "claude-3-7-sonnet", TargetProvider: "anthropic", Enabled: true}), catalog)
102+
if err != nil {
103+
t.Fatalf("NewService() error = %v", err)
104+
}
105+
if err := service.Refresh(context.Background()); err != nil {
106+
t.Fatalf("Refresh() error = %v", err)
107+
}
108+
109+
inner := newProviderMock()
110+
inner.supported["anthropic/claude-3-7-sonnet"] = true
111+
inner.providerType["anthropic/claude-3-7-sonnet"] = "anthropic"
112+
inner.fileContent = &core.FileContentResponse{
113+
ID: "file_source",
114+
Filename: "batch.jsonl",
115+
Data: []byte("{\"custom_id\":\"1\",\"method\":\"POST\",\"url\":\"/v1/chat/completions\",\"body\":{\"model\":\"smart\",\"provider\":\"openai\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}}\n"),
116+
}
117+
118+
preparer := NewBatchPreparer(inner, service)
119+
_, err = preparer.PrepareBatchRequest(context.Background(), "openai", &core.BatchRequest{
120+
InputFileID: "file_source",
121+
Endpoint: "/v1/chat/completions",
122+
})
123+
if err == nil {
124+
t.Fatal("PrepareBatchRequest() error = nil, want provider mismatch")
125+
}
126+
if !strings.Contains(err.Error(), `native batch supports a single provider per batch`) {
127+
t.Fatalf("PrepareBatchRequest() error = %v, want mixed-provider validation error", err)
128+
}
129+
if len(inner.fileCreates) != 0 {
130+
t.Fatalf("len(fileCreates) = %d, want 0", len(inner.fileCreates))
131+
}
132+
}

internal/aliases/service.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,24 @@ func (s *Service) Resolve(model, provider string) (Resolution, bool, error) {
122122
}
123123

124124
func (s *Service) resolveRequested(requested core.RequestedModelSelector) (Resolution, bool, error) {
125-
if !requested.ExplicitProvider {
126-
if resolution, ok := s.resolveAlias(requested.Model); ok {
127-
return resolution, true, nil
128-
}
129-
}
130-
131125
selector, err := requested.Normalize()
132126
if err != nil {
133127
return Resolution{}, false, err
134128
}
129+
130+
if requested.ExplicitProvider {
131+
if !s.catalog.Supports(selector.QualifiedModel()) {
132+
if resolution, ok := s.resolveAlias(requested.Model); ok {
133+
resolution.Requested = selector
134+
return resolution, true, nil
135+
}
136+
}
137+
return Resolution{Requested: selector, Resolved: selector}, false, nil
138+
}
139+
140+
if resolution, ok := s.resolveAlias(requested.Model); ok {
141+
return resolution, true, nil
142+
}
135143
return Resolution{Requested: selector, Resolved: selector}, false, nil
136144
}
137145

internal/aliases/service_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,35 @@ func TestServiceResolveAliasWithExplicitProviderAndSlashModel(t *testing.T) {
235235
}
236236
}
237237

238+
func TestServiceResolveAliasWithExplicitProviderFallbackWhenConcreteSelectorMissing(t *testing.T) {
239+
catalog := newTestCatalog()
240+
catalog.add("anthropic/claude-3-7-sonnet", "anthropic", core.Model{ID: "claude-3-7-sonnet", Object: "model"})
241+
242+
service, err := NewService(newMemoryStore(Alias{
243+
Name: "smart",
244+
TargetModel: "claude-3-7-sonnet",
245+
TargetProvider: "anthropic",
246+
Enabled: true,
247+
}), catalog)
248+
if err != nil {
249+
t.Fatalf("NewService() error = %v", err)
250+
}
251+
if err := service.Refresh(context.Background()); err != nil {
252+
t.Fatalf("Refresh() error = %v", err)
253+
}
254+
255+
selector, changed, err := service.ResolveModel(core.NewRequestedModelSelector("smart", "openai"))
256+
if err != nil {
257+
t.Fatalf("ResolveModel() error = %v", err)
258+
}
259+
if !changed {
260+
t.Fatal("ResolveModel() changed = false, want true")
261+
}
262+
if got := selector.QualifiedModel(); got != "anthropic/claude-3-7-sonnet" {
263+
t.Fatalf("resolved selector = %q, want anthropic/claude-3-7-sonnet", got)
264+
}
265+
}
266+
238267
func TestServiceUpsertRejectsQualifiedAliasChainsAndSelfTargets(t *testing.T) {
239268
catalog := newTestCatalog()
240269
catalog.add("gpt-4o", "openai", core.Model{ID: "gpt-4o", Object: "model"})

internal/providers/openai/openai.go

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,29 @@ func isOSeriesModel(model string) bool {
8989
return len(m) >= 2 && m[0] == 'o' && m[1] >= '0' && m[1] <= '9'
9090
}
9191

92-
// adaptForOSeries rewrites a ChatRequest body for OpenAI o-series models,
93-
// mapping max_tokens -> max_completion_tokens and dropping temperature while
94-
// preserving all unknown top-level JSON fields.
95-
func adaptForOSeries(req *core.ChatRequest) (any, error) {
92+
func isGPT5Model(model string) bool {
93+
m := strings.ToLower(strings.TrimSpace(model))
94+
return m == "gpt-5" || strings.HasPrefix(m, "gpt-5-")
95+
}
96+
97+
// isReasoningChatModel reports whether the model follows OpenAI's reasoning
98+
// chat parameter rules for max_completion_tokens and temperature handling.
99+
func isReasoningChatModel(model string) bool {
100+
return isOSeriesModel(model) || isGPT5Model(model)
101+
}
102+
103+
// adaptForReasoningChat rewrites a ChatRequest body for OpenAI reasoning chat
104+
// models, mapping max_tokens -> max_completion_tokens and dropping temperature
105+
// while preserving all unknown top-level JSON fields.
106+
func adaptForReasoningChat(req *core.ChatRequest) (any, error) {
96107
body, err := json.Marshal(req)
97108
if err != nil {
98-
return nil, core.NewInvalidRequestError("failed to marshal o-series request: "+err.Error(), err)
109+
return nil, core.NewInvalidRequestError("failed to marshal reasoning request: "+err.Error(), err)
99110
}
100111

101112
var raw map[string]json.RawMessage
102113
if err := json.Unmarshal(body, &raw); err != nil {
103-
return nil, core.NewInvalidRequestError("failed to decode o-series request payload: "+err.Error(), err)
114+
return nil, core.NewInvalidRequestError("failed to decode reasoning request payload: "+err.Error(), err)
104115
}
105116
if maxTokens, ok := raw["max_tokens"]; ok {
106117
raw["max_completion_tokens"] = maxTokens
@@ -113,8 +124,8 @@ func adaptForOSeries(req *core.ChatRequest) (any, error) {
113124
// chatRequestBody returns the appropriate request body for the model.
114125
// Reasoning models get parameter adaptation; others pass through as-is.
115126
func chatRequestBody(req *core.ChatRequest) (any, error) {
116-
if isOSeriesModel(req.Model) {
117-
return adaptForOSeries(req)
127+
if isReasoningChatModel(req.Model) {
128+
return adaptForReasoningChat(req)
118129
}
119130
return req, nil
120131
}

0 commit comments

Comments
 (0)