feat: adapt max_tokens → max_completion_tokens for OpenAI o-series models (#89)

SantiagoDePolonia · claude · web-flow · commit 4f6df74d0e21 · 2026-02-24T19:10:10.000+01:00
OpenAI o-series models (o1, o3, o4) reject max_tokens and require
max_completion_tokens instead. They also don't support temperature.
The OpenAI provider now auto-detects these models and translates
parameters before forwarding, so clients can always use max_tokens
uniformly (Postel's Law).

Co-authored-by: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -8,6 +8,7 @@ Guidance for AI models (like Claude) working with this codebase.
 
 - **Module:** `gomodel` | **Go:** 1.25.0 | **Repo:** https://github.com/ENTERPILOT/GOModel
 - **Stage:** Development—backward compatibility is not a concern
+- **Design philosophy:** [Postel's Law](https://en.wikipedia.org/wiki/Robustness_principle) (the Robustness Principle) — *"Be conservative in what you send, be liberal in what you accept."* The gateway accepts client requests generously (e.g. `max_tokens` for any model) and adapts them to each provider's specific requirements before forwarding (e.g. translating `max_tokens` → `max_completion_tokens` for OpenAI reasoning models).
 
 ## Commands
 
diff --git a/go.mod b/go.mod
@@ -36,7 +36,6 @@ require (
 	github.com/containerd/log v0.1.0 // indirect
 	github.com/containerd/platforms v0.2.1 // indirect
 	github.com/cpuguy83/dockercfg v0.3.2 // indirect
-	github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
 	github.com/distribution/reference v0.6.0 // indirect
@@ -86,14 +85,11 @@ require (
 	github.com/prometheus/common v0.67.4 // indirect
 	github.com/prometheus/procfs v0.19.2 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
-	github.com/russross/blackfriday/v2 v2.0.1 // indirect
 	github.com/shirou/gopsutil/v4 v4.25.6 // indirect
-	github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
 	github.com/swaggo/files/v2 v2.0.0 // indirect
 	github.com/tklauser/go-sysconf v0.3.12 // indirect
 	github.com/tklauser/numcpus v0.6.1 // indirect
-	github.com/urfave/cli/v2 v2.3.0 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/valyala/fasttemplate v1.2.2 // indirect
 	github.com/xdg-go/pbkdf2 v1.0.0 // indirect
@@ -123,5 +119,4 @@ require (
 	modernc.org/libc v1.67.6 // indirect
 	modernc.org/mathutil v1.7.1 // indirect
 	modernc.org/memory v1.11.0 // indirect
-	sigs.k8s.io/yaml v1.3.0 // indirect
 )
diff --git a/go.sum b/go.sum
@@ -4,7 +4,6 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8af
 github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
 github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8=
 github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
-github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
 github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
 github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
@@ -31,8 +30,6 @@ github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpS
 github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
 github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
 github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
-github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
-github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
 github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
@@ -184,13 +181,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
-github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
-github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
-github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/shirou/gopsutil/v4 v4.25.6 h1:kLysI2JsKorfaFPcYmcJqbzROzsBWEOAtw6A7dIfqXs=
 github.com/shirou/gopsutil/v4 v4.25.6/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
-github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
-github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -222,8 +214,6 @@ github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFA
 github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
 github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
 github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
-github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M=
-github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo=
@@ -335,7 +325,6 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
@@ -372,5 +361,3 @@ modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
 modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
 modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
 modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
-sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
-sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
diff --git a/internal/providers/openai/openai.go b/internal/providers/openai/openai.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"io"
 	"net/http"
+	"strings"
 
 	"gomodel/internal/core"
 	"gomodel/internal/llmclient"
@@ -84,13 +85,56 @@ func isValidClientRequestID(id string) bool {
 	return true
 }
 
+// isOSeriesModel reports whether the model is an OpenAI o-series model
+// (o1, o3, o4) that requires max_completion_tokens instead of max_tokens
+// and does not support the temperature parameter.
+func isOSeriesModel(model string) bool {
+	m := strings.ToLower(model)
+	// Match o1, o3, o4 families (e.g. o3-mini, o4-mini, o3, o1-preview).
+	// Non-reasoning models like gpt-4o start with "gpt-", not "o".
+	return len(m) >= 2 && m[0] == 'o' && m[1] >= '0' && m[1] <= '9'
+}
+
+// oSeriesChatRequest is the JSON body sent to OpenAI for o-series models.
+// It uses max_completion_tokens (required) instead of max_tokens (rejected).
+type oSeriesChatRequest struct {
+	Model              string              `json:"model"`
+	Messages           []core.Message      `json:"messages"`
+	Stream             bool                `json:"stream,omitempty"`
+	StreamOptions      *core.StreamOptions `json:"stream_options,omitempty"`
+	Reasoning          *core.Reasoning     `json:"reasoning,omitempty"`
+	MaxCompletionTokens *int               `json:"max_completion_tokens,omitempty"`
+}
+
+// adaptForOSeries converts a ChatRequest into an oSeriesChatRequest,
+// mapping max_tokens → max_completion_tokens and dropping temperature.
+func adaptForOSeries(req *core.ChatRequest) *oSeriesChatRequest {
+	return &oSeriesChatRequest{
+		Model:               req.Model,
+		Messages:            req.Messages,
+		Stream:              req.Stream,
+		StreamOptions:       req.StreamOptions,
+		Reasoning:           req.Reasoning,
+		MaxCompletionTokens: req.MaxTokens,
+	}
+}
+
+// chatRequestBody returns the appropriate request body for the model.
+// Reasoning models get parameter adaptation; others pass through as-is.
+func chatRequestBody(req *core.ChatRequest) any {
+	if isOSeriesModel(req.Model) {
+		return adaptForOSeries(req)
+	}
+	return req
+}
+
 // ChatCompletion sends a chat completion request to OpenAI
 func (p *Provider) ChatCompletion(ctx context.Context, req *core.ChatRequest) (*core.ChatResponse, error) {
 	var resp core.ChatResponse
 	err := p.client.Do(ctx, llmclient.Request{
 		Method:   http.MethodPost,
 		Endpoint: "/chat/completions",
-		Body:     req,
+		Body:     chatRequestBody(req),
 	}, &resp)
 	if err != nil {
 		return nil, err
@@ -104,10 +148,11 @@ func (p *Provider) ChatCompletion(ctx context.Context, req *core.ChatRequest) (*
 
 // StreamChatCompletion returns a raw response body for streaming (caller must close)
 func (p *Provider) StreamChatCompletion(ctx context.Context, req *core.ChatRequest) (io.ReadCloser, error) {
+	streamReq := req.WithStreaming()
 	return p.client.DoStream(ctx, llmclient.Request{
 		Method:   http.MethodPost,
 		Endpoint: "/chat/completions",
-		Body:     req.WithStreaming(),
+		Body:     chatRequestBody(streamReq),
 	})
 }
 
diff --git a/internal/providers/openai/openai_test.go b/internal/providers/openai/openai_test.go
@@ -751,6 +751,227 @@ func TestResponsesWithContext(t *testing.T) {
 	}
 }
 
+func TestIsOSeriesModel(t *testing.T) {
+	tests := []struct {
+		model    string
+		expected bool
+	}{
+		{"o3-mini", true},
+		{"o4-mini", true},
+		{"o3", true},
+		{"o4", true},
+		{"o1-preview", true},
+		{"o1-mini", true},
+		{"o3-mini-2025-01-31", true},
+		{"gpt-4o", false},
+		{"gpt-4o-mini", false},
+		{"gpt-4", false},
+		{"gpt-3.5-turbo", false},
+		{"claude-3-opus", false},
+		{"", false},
+		{"o", false},
+		{"openai", false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.model, func(t *testing.T) {
+			if got := isOSeriesModel(tt.model); got != tt.expected {
+				t.Errorf("isOSeriesModel(%q) = %v, want %v", tt.model, got, tt.expected)
+			}
+		})
+	}
+}
+
+func TestChatCompletion_ReasoningModel_AdaptsParameters(t *testing.T) {
+	maxTokens := 1000
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, err := io.ReadAll(r.Body)
+		if err != nil {
+			t.Fatalf("failed to read request body: %v", err)
+		}
+
+		var raw map[string]interface{}
+		if err := json.Unmarshal(body, &raw); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		// max_tokens must NOT be present
+		if _, ok := raw["max_tokens"]; ok {
+			t.Error("reasoning model request should not contain max_tokens")
+		}
+
+		// max_completion_tokens must be present with the right value
+		mct, ok := raw["max_completion_tokens"]
+		if !ok {
+			t.Fatal("reasoning model request should contain max_completion_tokens")
+		}
+		if int(mct.(float64)) != maxTokens {
+			t.Errorf("max_completion_tokens = %v, want %d", mct, maxTokens)
+		}
+
+		// temperature must NOT be present
+		if _, ok := raw["temperature"]; ok {
+			t.Error("reasoning model request should not contain temperature")
+		}
+
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte(`{
+			"id": "chatcmpl-123",
+			"object": "chat.completion",
+			"model": "o3-mini",
+			"choices": [{"index": 0, "message": {"role": "assistant", "content": "Hi"}, "finish_reason": "stop"}],
+			"usage": {"prompt_tokens": 5, "completion_tokens": 10, "total_tokens": 15}
+		}`))
+	}))
+	defer server.Close()
+
+	provider := NewWithHTTPClient("test-api-key", nil, llmclient.Hooks{})
+	provider.SetBaseURL(server.URL)
+
+	temp := 0.7
+	req := &core.ChatRequest{
+		Model:       "o3-mini",
+		Messages:    []core.Message{{Role: "user", Content: "Hello"}},
+		MaxTokens:   &maxTokens,
+		Temperature: &temp,
+	}
+
+	resp, err := provider.ChatCompletion(context.Background(), req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resp.Model != "o3-mini" {
+		t.Errorf("Model = %q, want %q", resp.Model, "o3-mini")
+	}
+}
+
+func TestChatCompletion_NonReasoningModel_PassesMaxTokens(t *testing.T) {
+	maxTokens := 1000
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, err := io.ReadAll(r.Body)
+		if err != nil {
+			t.Fatalf("failed to read request body: %v", err)
+		}
+
+		var raw map[string]interface{}
+		if err := json.Unmarshal(body, &raw); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		// max_tokens must be present
+		mt, ok := raw["max_tokens"]
+		if !ok {
+			t.Fatal("non-reasoning model request should contain max_tokens")
+		}
+		if int(mt.(float64)) != maxTokens {
+			t.Errorf("max_tokens = %v, want %d", mt, maxTokens)
+		}
+
+		// max_completion_tokens must NOT be present
+		if _, ok := raw["max_completion_tokens"]; ok {
+			t.Error("non-reasoning model request should not contain max_completion_tokens")
+		}
+
+		// temperature must be present
+		if _, ok := raw["temperature"]; !ok {
+			t.Error("non-reasoning model request should contain temperature")
+		}
+
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte(`{
+			"id": "chatcmpl-456",
+			"object": "chat.completion",
+			"model": "gpt-4o",
+			"choices": [{"index": 0, "message": {"role": "assistant", "content": "Hi"}, "finish_reason": "stop"}],
+			"usage": {"prompt_tokens": 5, "completion_tokens": 10, "total_tokens": 15}
+		}`))
+	}))
+	defer server.Close()
+
+	provider := NewWithHTTPClient("test-api-key", nil, llmclient.Hooks{})
+	provider.SetBaseURL(server.URL)
+
+	temp := 0.7
+	req := &core.ChatRequest{
+		Model:       "gpt-4o",
+		Messages:    []core.Message{{Role: "user", Content: "Hello"}},
+		MaxTokens:   &maxTokens,
+		Temperature: &temp,
+	}
+
+	resp, err := provider.ChatCompletion(context.Background(), req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resp.Model != "gpt-4o" {
+		t.Errorf("Model = %q, want %q", resp.Model, "gpt-4o")
+	}
+}
+
+func TestStreamChatCompletion_ReasoningModel_AdaptsParameters(t *testing.T) {
+	maxTokens := 2000
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, err := io.ReadAll(r.Body)
+		if err != nil {
+			t.Fatalf("failed to read request body: %v", err)
+		}
+
+		var raw map[string]interface{}
+		if err := json.Unmarshal(body, &raw); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		// Must use max_completion_tokens, not max_tokens
+		if _, ok := raw["max_tokens"]; ok {
+			t.Error("streaming reasoning model request should not contain max_tokens")
+		}
+		mct, ok := raw["max_completion_tokens"]
+		if !ok {
+			t.Fatal("streaming reasoning model request should contain max_completion_tokens")
+		}
+		if int(mct.(float64)) != maxTokens {
+			t.Errorf("max_completion_tokens = %v, want %d", mct, maxTokens)
+		}
+
+		// stream must be true
+		if stream, ok := raw["stream"].(bool); !ok || !stream {
+			t.Error("stream should be true")
+		}
+
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte(`data: {"id":"chatcmpl-123","object":"chat.completion.chunk","model":"o4-mini","choices":[{"index":0,"delta":{"content":"Hi"},"finish_reason":null}]}
+
+data: [DONE]
+`))
+	}))
+	defer server.Close()
+
+	provider := NewWithHTTPClient("test-api-key", nil, llmclient.Hooks{})
+	provider.SetBaseURL(server.URL)
+
+	req := &core.ChatRequest{
+		Model:    "o4-mini",
+		Messages: []core.Message{{Role: "user", Content: "Hello"}},
+		MaxTokens: &maxTokens,
+	}
+
+	body, err := provider.StreamChatCompletion(context.Background(), req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	defer func() { _ = body.Close() }()
+
+	respBody, err := io.ReadAll(body)
+	if err != nil {
+		t.Fatalf("failed to read response body: %v", err)
+	}
+	if !strings.Contains(string(respBody), "o4-mini") {
+		t.Error("response should contain o4-mini model")
+	}
+}
+
 func TestIsValidClientRequestID(t *testing.T) {
 	tests := []struct {
 		name  string