Skip to content

Commit e3a5ffa

Browse files
authored
Merge pull request #83 from ENTERPILOT/me/gom-33-anthropic-reasoning-error-when-i-request-an-opus-model
fix(me/gom-33): anthropic reasoning error when i request an opus model
2 parents 42a9d43 + 94290a9 commit e3a5ffa

2 files changed

Lines changed: 680 additions & 263 deletions

File tree

internal/providers/anthropic/anthropic.go

Lines changed: 99 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ type Provider struct {
4141
func New(apiKey string, opts providers.ProviderOptions) core.Provider {
4242
p := &Provider{apiKey: apiKey}
4343
cfg := llmclient.Config{
44-
ProviderName: "anthropic",
45-
BaseURL: defaultBaseURL,
46-
Retry: opts.Resilience.Retry,
47-
Hooks: opts.Hooks,
44+
ProviderName: "anthropic",
45+
BaseURL: defaultBaseURL,
46+
Retry: opts.Resilience.Retry,
47+
Hooks: opts.Hooks,
4848
CircuitBreaker: opts.Resilience.CircuitBreaker,
4949
}
5050
p.client = llmclient.New(cfg, p.setHeaders)
@@ -80,21 +80,43 @@ func (p *Provider) setHeaders(req *http.Request) {
8080
}
8181
}
8282

83-
// anthropicThinking represents the thinking configuration for Anthropic's extended thinking
83+
// anthropicThinking represents the thinking configuration for Anthropic's extended thinking.
84+
// For 4.6 models: {type: "adaptive"} (budget_tokens omitted).
85+
// For older models: {type: "enabled", budget_tokens: N}.
8486
type anthropicThinking struct {
8587
Type string `json:"type"`
86-
BudgetTokens int `json:"budget_tokens"`
88+
BudgetTokens int `json:"budget_tokens,omitempty"`
89+
}
90+
91+
// anthropicOutputConfig controls effort level for adaptive thinking on 4.6 models.
92+
type anthropicOutputConfig struct {
93+
Effort string `json:"effort,omitempty"`
8794
}
8895

8996
// anthropicRequest represents the Anthropic API request format
9097
type anthropicRequest struct {
91-
Model string `json:"model"`
92-
Messages []anthropicMessage `json:"messages"`
93-
MaxTokens int `json:"max_tokens"`
94-
Temperature *float64 `json:"temperature,omitempty"`
95-
System string `json:"system,omitempty"`
96-
Stream bool `json:"stream,omitempty"`
97-
Thinking *anthropicThinking `json:"thinking,omitempty"`
98+
Model string `json:"model"`
99+
Messages []anthropicMessage `json:"messages"`
100+
MaxTokens int `json:"max_tokens"`
101+
Temperature *float64 `json:"temperature,omitempty"`
102+
System string `json:"system,omitempty"`
103+
Stream bool `json:"stream,omitempty"`
104+
Thinking *anthropicThinking `json:"thinking,omitempty"`
105+
OutputConfig *anthropicOutputConfig `json:"output_config,omitempty"`
106+
}
107+
108+
var adaptiveThinkingPrefixes = []string{
109+
"claude-opus-4-6",
110+
"claude-sonnet-4-6",
111+
}
112+
113+
func isAdaptiveThinkingModel(model string) bool {
114+
for _, prefix := range adaptiveThinkingPrefixes {
115+
if model == prefix || strings.HasPrefix(model, prefix+"-") {
116+
return true
117+
}
118+
}
119+
return false
98120
}
99121

100122
// anthropicMessage represents a message in Anthropic format
@@ -159,29 +181,62 @@ type anthropicModelsResponse struct {
159181
LastID string `json:"last_id"`
160182
}
161183

162-
// reasoningEffortToBudgetTokens maps OpenAI reasoning effort levels to Anthropic budget tokens
163-
func reasoningEffortToBudgetTokens(effort string) int {
184+
// normalizeEffort maps effort to gateway-supported values. Anthropic Opus 4.6
185+
// supports "max" for adaptive thinking, but the gateway's public type
186+
// core.Reasoning.Effort only exposes "low", "medium", and "high". "max" is
187+
// therefore intentionally rejected; any unsupported value is downgraded to
188+
// "low" and logged via slog.Warn.
189+
func normalizeEffort(effort string) string {
164190
switch effort {
165-
case "low":
166-
return 5000
191+
case "low", "medium", "high":
192+
return effort
193+
default:
194+
slog.Warn("invalid reasoning effort, defaulting to 'low'", "effort", effort)
195+
return "low"
196+
}
197+
}
198+
199+
// applyReasoning configures thinking and effort on an anthropicRequest.
200+
// Opus 4.6 and Sonnet 4.6 use adaptive thinking with output_config.effort.
201+
// Older models and Haiku 4.6 use manual thinking with budget_tokens.
202+
func applyReasoning(req *anthropicRequest, model, effort string) {
203+
if isAdaptiveThinkingModel(model) {
204+
req.Thinking = &anthropicThinking{Type: "adaptive"}
205+
req.OutputConfig = &anthropicOutputConfig{Effort: normalizeEffort(effort)}
206+
} else {
207+
budget := reasoningEffortToBudgetTokens(effort)
208+
req.Thinking = &anthropicThinking{
209+
Type: "enabled",
210+
BudgetTokens: budget,
211+
}
212+
if req.MaxTokens <= budget {
213+
adjusted := budget + 1024
214+
slog.Info("MaxTokens adjusted for extended thinking",
215+
"original", req.MaxTokens, "adjusted", adjusted)
216+
req.MaxTokens = adjusted
217+
}
218+
}
219+
220+
if req.Temperature != nil {
221+
if *req.Temperature != 1.0 {
222+
slog.Warn("temperature overridden to nil; extended thinking requires temperature=1",
223+
"original_temperature", *req.Temperature)
224+
req.Temperature = nil
225+
}
226+
}
227+
}
228+
229+
func reasoningEffortToBudgetTokens(effort string) int {
230+
switch normalizeEffort(effort) {
167231
case "medium":
168232
return 10000
169233
case "high":
170234
return 20000
171235
default:
172-
slog.Warn("inappropriate reasoning effort, defaulting to 'low'", "effort", effort)
173236
return 5000
174237
}
175238
}
176239

177-
// logMaxTokensAdjustment logs when MaxTokens is adjusted to meet Anthropic requirements
178-
func logMaxTokensAdjustment(original, adjusted int, reason string) {
179-
slog.Info("MaxTokens adjusted to meet Anthropic extended thinking requirements",
180-
"original", original,
181-
"adjusted", adjusted,
182-
"reason", reason)
183-
}
184-
185240
// convertToAnthropicRequest converts core.ChatRequest to Anthropic format
186241
func convertToAnthropicRequest(req *core.ChatRequest) *anthropicRequest {
187242
anthropicReq := &anthropicRequest{
@@ -196,27 +251,10 @@ func convertToAnthropicRequest(req *core.ChatRequest) *anthropicRequest {
196251
anthropicReq.MaxTokens = *req.MaxTokens
197252
}
198253

199-
// Map reasoning effort to Anthropic extended thinking
200254
if req.Reasoning != nil && req.Reasoning.Effort != "" {
201-
budget := reasoningEffortToBudgetTokens(req.Reasoning.Effort)
202-
anthropicReq.Thinking = &anthropicThinking{
203-
Type: "enabled",
204-
BudgetTokens: budget,
205-
}
206-
// Ensure MaxTokens is at least the budget tokens
207-
if anthropicReq.MaxTokens < budget {
208-
logMaxTokensAdjustment(anthropicReq.MaxTokens, budget,
209-
"extended thinking budget_tokens must be <= max_tokens")
210-
anthropicReq.MaxTokens = budget
211-
}
212-
// Extended thinking requires temperature to be unset (defaults to 1)
213-
if anthropicReq.Temperature != nil {
214-
slog.Warn("temperature overridden to nil, reasoning requires unset temperature")
215-
anthropicReq.Temperature = nil
216-
}
255+
applyReasoning(anthropicReq, req.Model, req.Reasoning.Effort)
217256
}
218257

219-
// Extract system message if present and convert messages
220258
for _, msg := range req.Messages {
221259
if msg.Role == "system" {
222260
anthropicReq.System = msg.Content
@@ -233,10 +271,7 @@ func convertToAnthropicRequest(req *core.ChatRequest) *anthropicRequest {
233271

234272
// convertFromAnthropicResponse converts Anthropic response to core.ChatResponse
235273
func convertFromAnthropicResponse(resp *anthropicResponse) *core.ChatResponse {
236-
content := ""
237-
if len(resp.Content) > 0 {
238-
content = resp.Content[0].Text
239-
}
274+
content := extractTextContent(resp.Content)
240275

241276
finishReason := resp.StopReason
242277
if finishReason == "" {
@@ -527,24 +562,8 @@ func convertResponsesRequestToAnthropic(req *core.ResponsesRequest) *anthropicRe
527562
anthropicReq.MaxTokens = *req.MaxOutputTokens
528563
}
529564

530-
// Map reasoning effort to Anthropic extended thinking
531565
if req.Reasoning != nil && req.Reasoning.Effort != "" {
532-
budget := reasoningEffortToBudgetTokens(req.Reasoning.Effort)
533-
anthropicReq.Thinking = &anthropicThinking{
534-
Type: "enabled",
535-
BudgetTokens: budget,
536-
}
537-
// Ensure MaxTokens is at least the budget tokens
538-
if anthropicReq.MaxTokens < budget {
539-
logMaxTokensAdjustment(anthropicReq.MaxTokens, budget,
540-
"extended thinking budget_tokens must be <= max_tokens")
541-
anthropicReq.MaxTokens = budget
542-
}
543-
// Extended thinking requires temperature to be unset (defaults to 1)
544-
if anthropicReq.Temperature != nil {
545-
slog.Warn("temperature overridden to nil, reasoning requires unset temperature")
546-
anthropicReq.Temperature = nil
547-
}
566+
applyReasoning(anthropicReq, req.Model, req.Reasoning.Effort)
548567
}
549568

550569
// Set system instruction if provided
@@ -597,12 +616,22 @@ func extractContentFromResponsesInput(content interface{}) string {
597616
return ""
598617
}
599618

619+
// extractTextContent returns the text from the last "text" content block.
620+
// When extended thinking is enabled, Anthropic returns: [text("\n\n"), thinking(...), text(answer)].
621+
// Taking the last text block ensures we get the actual answer, not the empty preamble.
622+
func extractTextContent(blocks []anthropicContent) string {
623+
last := ""
624+
for _, b := range blocks {
625+
if b.Type == "text" {
626+
last = b.Text
627+
}
628+
}
629+
return last
630+
}
631+
600632
// convertAnthropicResponseToResponses converts an Anthropic response to ResponsesResponse
601633
func convertAnthropicResponseToResponses(resp *anthropicResponse, model string) *core.ResponsesResponse {
602-
content := ""
603-
if len(resp.Content) > 0 {
604-
content = resp.Content[0].Text
605-
}
634+
content := extractTextContent(resp.Content)
606635

607636
return &core.ResponsesResponse{
608637
ID: resp.ID,
@@ -842,3 +871,4 @@ func (sc *responsesStreamConverter) convertEvent(event *anthropicStreamEvent) st
842871

843872
return ""
844873
}
874+

0 commit comments

Comments
 (0)