Skip to content

Commit df04b81

Browse files
fix(gateway): preserve utf8-heavy chat history text
Address the non-ASCII chat.history regression by adding a byte-aware fallback to text truncation. Keep long UTF-8 replies readable instead of replacing them with the oversized placeholder, and cover the behavior with a regression test for emoji-heavy assistant text.
1 parent 6a974aa commit df04b81

2 files changed

Lines changed: 73 additions & 2 deletions

File tree

src/gateway/server-methods/chat.ts

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ type ChatAbortRequester = {
9696

9797
const CHAT_HISTORY_TEXT_MAX_CHARS = 64_000;
9898
const CHAT_HISTORY_MAX_SINGLE_MESSAGE_BYTES = 128 * 1024;
99+
const CHAT_HISTORY_TEXT_MAX_BYTES = CHAT_HISTORY_MAX_SINGLE_MESSAGE_BYTES - 8 * 1024;
99100
const CHAT_HISTORY_OVERSIZED_PLACEHOLDER = "[chat.history omitted: message too large]";
101+
const CHAT_HISTORY_TRUNCATED_SUFFIX = "\n...(truncated)...";
100102
let chatHistoryPlaceholderEmitCount = 0;
101103
const CHANNEL_AGNOSTIC_SESSION_SCOPES = new Set([
102104
"main",
@@ -405,11 +407,45 @@ async function rewriteChatSendUserTurnMediaPaths(params: {
405407
}
406408

407409
function truncateChatHistoryText(text: string): { text: string; truncated: boolean } {
408-
if (text.length <= CHAT_HISTORY_TEXT_MAX_CHARS) {
410+
if (
411+
text.length <= CHAT_HISTORY_TEXT_MAX_CHARS &&
412+
Buffer.byteLength(text, "utf8") <= CHAT_HISTORY_TEXT_MAX_BYTES
413+
) {
409414
return { text, truncated: false };
410415
}
416+
417+
let next = text;
418+
let truncated = false;
419+
420+
if (next.length > CHAT_HISTORY_TEXT_MAX_CHARS) {
421+
next = next.slice(0, CHAT_HISTORY_TEXT_MAX_CHARS);
422+
truncated = true;
423+
}
424+
425+
if (Buffer.byteLength(next, "utf8") > CHAT_HISTORY_TEXT_MAX_BYTES) {
426+
const suffixBytes = Buffer.byteLength(CHAT_HISTORY_TRUNCATED_SUFFIX, "utf8");
427+
const maxPrefixBytes = Math.max(0, CHAT_HISTORY_TEXT_MAX_BYTES - suffixBytes);
428+
let low = 0;
429+
let high = next.length;
430+
while (low < high) {
431+
const mid = Math.ceil((low + high) / 2);
432+
const candidate = next.slice(0, mid);
433+
if (Buffer.byteLength(candidate, "utf8") <= maxPrefixBytes) {
434+
low = mid;
435+
} else {
436+
high = mid - 1;
437+
}
438+
}
439+
next = next.slice(0, low);
440+
truncated = true;
441+
}
442+
443+
if (!truncated) {
444+
return { text: next, truncated: false };
445+
}
446+
411447
return {
412-
text: `${text.slice(0, CHAT_HISTORY_TEXT_MAX_CHARS)}\n...(truncated)...`,
448+
text: `${next}${CHAT_HISTORY_TRUNCATED_SUFFIX}`,
413449
truncated: true,
414450
};
415451
}

src/gateway/server.chat.gateway-server-chat-b.test.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,41 @@ describe("gateway server chat", () => {
321321
});
322322
});
323323

324+
test("chat.history truncates UTF-8-heavy assistant text before oversized placeholder fallback", async () => {
325+
await withGatewayChatHarness(async ({ ws, createSessionDir }) => {
326+
const historyMaxBytes = 128 * 1024;
327+
const sessionDir = await prepareMainHistoryHarness({
328+
ws,
329+
createSessionDir,
330+
historyMaxBytes,
331+
});
332+
333+
const emojiText = "😀".repeat(40_000);
334+
await writeMainSessionTranscript(sessionDir, [
335+
JSON.stringify({
336+
message: {
337+
role: "assistant",
338+
timestamp: Date.now(),
339+
content: [{ type: "text", text: emojiText }],
340+
},
341+
}),
342+
]);
343+
344+
const messages = await fetchHistoryMessages(ws);
345+
expect(messages).toHaveLength(1);
346+
347+
const first = messages[0] as { content?: Array<{ text?: string }> };
348+
const rendered = first.content?.[0]?.text ?? "";
349+
expect(rendered).toContain("...(truncated)...");
350+
expect(rendered).not.toContain("[chat.history omitted: message too large]");
351+
352+
const serialized = JSON.stringify(messages);
353+
const bytes = Buffer.byteLength(serialized, "utf8");
354+
expect(bytes).toBeLessThanOrEqual(historyMaxBytes);
355+
expect(serialized).not.toContain("[chat.history omitted: message too large]");
356+
});
357+
});
358+
324359
test("chat.history preserves usage and cost metadata for assistant messages", async () => {
325360
await withGatewayChatHarness(async ({ ws, createSessionDir }) => {
326361
await connectOk(ws);

0 commit comments

Comments
 (0)