Implement anthropic count tokens api

sdcb · sdcb · commit 5ca98aa7f207 · 2025-11-26T18:50:52.000+08:00
diff --git a/src/BE/Controllers/Api/AnthropicCompatible/AnthropicCountTokenController.cs b/src/BE/Controllers/Api/AnthropicCompatible/AnthropicCountTokenController.cs
@@ -0,0 +1,97 @@
+using Chats.BE.Controllers.Api.AnthropicCompatible.Dtos;
+using Chats.BE.DB;
+using Chats.BE.DB.Enums;
+using Chats.BE.Services;
+using Chats.BE.Services.Models;
+using Chats.BE.Services.Models.ChatServices;
+using Chats.BE.Services.OpenAIApiKeySession;
+using Microsoft.AspNetCore.Authorization;
+using Microsoft.AspNetCore.Mvc;
+using System.Text.Json.Nodes;
+
+namespace Chats.BE.Controllers.Api.AnthropicCompatible;
+
+[Authorize(AuthenticationSchemes = "OpenAIApiKey")]
+public class AnthropicCountTokenController(
+    CurrentApiKey currentApiKey,
+    ChatFactory cf,
+    UserModelManager userModelManager,
+    ILogger<AnthropicCountTokenController> logger) : ControllerBase
+{
+    private static readonly DBApiType[] AllowedApiTypes = [DBApiType.OpenAIChatCompletion, DBApiType.OpenAIResponse, DBApiType.AnthropicMessages];
+
+    [HttpPost("v1/messages/count_tokens")]
+    public async Task<ActionResult> CountTokens([FromBody] JsonObject json, CancellationToken cancellationToken)
+    {
+        AnthropicCountTokenRequestWrapper request = new(json);
+
+        if (!request.SeemsValid())
+        {
+            return ErrorMessage(AnthropicErrorTypes.InvalidRequestError, "Invalid request: model and messages are required.");
+        }
+
+        if (string.IsNullOrWhiteSpace(request.Model))
+        {
+            return ErrorMessage(AnthropicErrorTypes.InvalidRequestError, "model is required.");
+        }
+
+        UserModel? userModel = await userModelManager.GetUserModel(currentApiKey.ApiKey, request.Model, cancellationToken);
+        if (userModel == null)
+        {
+            return ErrorMessage(AnthropicErrorTypes.NotFoundError, $"The model `{request.Model}` does not exist or you do not have access to it.");
+        }
+
+        if (!AllowedApiTypes.Contains(userModel.Model.ApiType))
+        {
+            return ErrorMessage(AnthropicErrorTypes.InvalidRequestError, $"The model `{request.Model}` does not support messages API.");
+        }
+
+        try
+        {
+            Model cm = userModel.Model;
+            using ChatService s = cf.CreateChatService(cm);
+            ChatRequest chatRequest = request.ToChatRequest(currentApiKey.User.Id.ToString(), cm);
+            int inputTokens = await s.CountTokenAsync(chatRequest, cancellationToken);
+
+            return Ok(new AnthropicCountTokenResponse { InputTokens = inputTokens });
+        }
+        catch (Exception e)
+        {
+            logger.LogError(e, "Error counting tokens");
+            return ErrorMessage(AnthropicErrorTypes.ApiError, "Internal server error");
+        }
+    }
+
+    private BadRequestObjectResult ErrorMessage(string errorType, string message)
+    {
+        return BadRequest(new AnthropicErrorResponse
+        {
+            Error = new AnthropicErrorDetail
+            {
+                Type = errorType,
+                Message = message
+            }
+        });
+    }
+}
+
+public class AnthropicCountTokenRequestWrapper(JsonObject json)
+{
+    public string? Model => (string?)json["model"];
+
+    public bool SeemsValid()
+    {
+        return Model != null && json["messages"] != null;
+    }
+
+    public ChatRequest ToChatRequest(string userId, Model model)
+    {
+        // For count tokens, we reuse the AnthropicRequestWrapper logic
+        // but we need to ensure max_tokens has a default value
+        json["max_tokens"] ??= model.MaxResponseTokens;
+        json["stream"] ??= false;
+
+        AnthropicRequestWrapper wrapper = new(json);
+        return wrapper.ToChatRequest(userId, model);
+    }
+}
diff --git a/src/BE/Controllers/Api/AnthropicCompatible/Dtos/AnthropicResponse.cs b/src/BE/Controllers/Api/AnthropicCompatible/Dtos/AnthropicResponse.cs
@@ -103,3 +103,9 @@ public record AnthropicUsage
     [JsonPropertyName("cache_read_input_tokens"), JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
     public int? CacheReadInputTokens { get; init; }
 }
+
+public record AnthropicCountTokenResponse
+{
+    [JsonPropertyName("input_tokens")]
+    public required int InputTokens { get; init; }
+}
diff --git a/src/BE/Services/Models/ChatServices/Anthropic/AnthropicChatService.cs b/src/BE/Services/Models/ChatServices/Anthropic/AnthropicChatService.cs
@@ -179,6 +179,26 @@ public override async Task<string[]> ListModels(ModelKey modelKey, CancellationT
         return [.. result.Data.Select(x => x.ID)];
     }
 
+    public override async Task<int> CountTokenAsync(ChatRequest request, CancellationToken cancellationToken)
+    {
+        AnthropicClient anthropicClient = CreateAnthropicClient(request.ChatConfig.Model.ModelKey);
+        MessageCreateParams messageParams = ConvertOptions(request);
+
+        MessageCountTokensParams countParams = new()
+        {
+            Messages = messageParams.Messages,
+            Model = messageParams.Model,
+            System = request.ChatConfig.SystemPrompt is { } systemPrompt ? systemPrompt : null!,
+            Thinking = messageParams.Thinking,
+            Tools = messageParams.Tools != null
+                ? [.. messageParams.Tools.Select(x => new MessageCountTokensTool(x.Json))]
+                : null,
+        };
+
+        MessageTokensCount result = await anthropicClient.Messages.CountTokens(countParams, cancellationToken);
+        return (int)result.InputTokens;
+    }
+
     static MessageCreateParams ConvertOptions(ChatRequest request)
     {
         // Anthropic has a very strict policy on thinking blocks - they need pass back thinking AND signature together
diff --git a/src/BE/Services/Models/ChatServices/ChatService.cs b/src/BE/Services/Models/ChatServices/ChatService.cs
@@ -3,12 +3,9 @@
 using Chats.BE.DB.Enums;
 using Chats.BE.Services.FileServices;
 using Chats.BE.Services.Models.ChatServices;
-using Chats.BE.Services.Models.ChatServices.OpenAI;
 using Chats.BE.Services.Models.Dtos;
 using Microsoft.ML.Tokenizers;
 using OpenAI;
-using OpenAI.Models;
-using System.ClientModel;
 using System.Runtime.CompilerServices;
 using Tokenizer = Microsoft.ML.Tokenizers.Tokenizer;
 
@@ -24,6 +21,11 @@ public abstract partial class ChatService : IDisposable
 
     public virtual Task<string[]> ListModels(ModelKey modelKey, CancellationToken cancellationToken) => Task.FromResult(Array.Empty<string>());
 
+    public virtual Task<int> CountTokenAsync(ChatRequest request, CancellationToken cancellationToken)
+    {
+        return Task.FromResult(request.EstimatePromptTokens(Tokenizer));
+    }
+
     public virtual async Task<ChatSegment> Chat(ChatRequest request, CancellationToken cancellationToken)
     {
         List<ChatSegmentItem> segments = [];