Skip to content

Commit be13928

Browse files
fix(telegram): suppress internal formatting artefacts from leaking into user messages (#88128)
1 parent 440e737 commit be13928

4 files changed

Lines changed: 118 additions & 2 deletions

File tree

src/auto-reply/reply/normalize-reply.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { stripHeartbeatToken } from "../heartbeat.js";
55
import { copyReplyPayloadMetadata } from "../reply-payload.js";
66
import {
77
HEARTBEAT_TOKEN,
8+
isInternalFormattingArtifact,
89
isSilentReplyPayloadText,
910
isSilentReplyText,
1011
SILENT_REPLY_TOKEN,
@@ -18,7 +19,7 @@ import {
1819
type ResponsePrefixContext,
1920
} from "./response-prefix-template.js";
2021

21-
export type NormalizeReplySkipReason = "empty" | "silent" | "heartbeat";
22+
export type NormalizeReplySkipReason = "empty" | "silent" | "heartbeat" | "internalArtifact";
2223

2324
export type NormalizeReplyOptions = {
2425
responsePrefix?: string;
@@ -96,6 +97,14 @@ export function normalizeReplyPayload(
9697
text = stripped.text;
9798
}
9899

100+
// Suppress internal/runtime formatting artefacts (e.g. <channel|>, set-thought,
101+
// ─── separators) that LLM providers emit during streaming but must never
102+
// reach user-facing messaging channels. See issue #88128.
103+
if (text && isInternalFormattingArtifact(text) && !hasContent("")) {
104+
opts.onSkip?.("internalArtifact");
105+
return null;
106+
}
107+
99108
if (text) {
100109
text = sanitizeUserFacingText(text, { errorContext: Boolean(payload.isError) });
101110
}

src/auto-reply/reply/reply-utils.test.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ describe("normalizeReplyPayload", () => {
149149
expect(reply.channelData).toEqual(payload.channelData);
150150
});
151151

152-
it("records skip reasons for silent/empty payloads", () => {
152+
it("records skip reasons for silent/empty/internal-artifact payloads", () => {
153153
const cases = [
154154
{ name: "silent", payload: { text: SILENT_REPLY_TOKEN }, reason: "silent" },
155155
{
@@ -158,6 +158,21 @@ describe("normalizeReplyPayload", () => {
158158
reason: "silent",
159159
},
160160
{ name: "empty", payload: { text: " " }, reason: "empty" },
161+
{
162+
name: "internalArtifact <channel|>",
163+
payload: { text: "<channel|>" },
164+
reason: "internalArtifact",
165+
},
166+
{
167+
name: "internalArtifact set-thought",
168+
payload: { text: "set-thought <channel|>" },
169+
reason: "internalArtifact",
170+
},
171+
{
172+
name: "internalArtifact separator",
173+
payload: { text: "───" },
174+
reason: "internalArtifact",
175+
},
161176
] as const;
162177
for (const testCase of cases) {
163178
const reasons: string[] = [];

src/auto-reply/tokens.test.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { describe, it, expect } from "vitest";
22
import {
3+
isInternalFormattingArtifact,
34
isSilentReplyPrefixText,
45
isSilentReplyPayloadText,
56
isSilentReplyText,
@@ -8,6 +9,70 @@ import {
89
stripSilentToken,
910
} from "./tokens.js";
1011

12+
describe("isInternalFormattingArtifact", () => {
13+
it("matches <channel|> markers (#88128)", () => {
14+
expect(isInternalFormattingArtifact("<channel|>")).toBe(true);
15+
expect(isInternalFormattingArtifact(" <channel|> ")).toBe(true);
16+
expect(isInternalFormattingArtifact("\n<channel|>\n")).toBe(true);
17+
});
18+
19+
it("matches set-thought directives with channel markers (#88128)", () => {
20+
expect(isInternalFormattingArtifact("set-thought <channel|>")).toBe(true);
21+
expect(isInternalFormattingArtifact(" set-thought <channel|> ")).toBe(true);
22+
});
23+
24+
it("matches em-dash / horizontal-rule separators (#88128)", () => {
25+
expect(isInternalFormattingArtifact("───")).toBe(true);
26+
expect(isInternalFormattingArtifact("---")).toBe(true);
27+
expect(isInternalFormattingArtifact("___")).toBe(true);
28+
expect(isInternalFormattingArtifact("***")).toBe(true);
29+
expect(isInternalFormattingArtifact("****")).toBe(true);
30+
expect(isInternalFormattingArtifact(" ─── ")).toBe(true);
31+
});
32+
33+
it("matches lone XML-like tags", () => {
34+
expect(isInternalFormattingArtifact("<tag>")).toBe(true);
35+
expect(isInternalFormattingArtifact("</tag>")).toBe(true);
36+
expect(isInternalFormattingArtifact("<br/>")).toBe(true);
37+
});
38+
39+
it("matches <word|value> channel-style markup", () => {
40+
expect(isInternalFormattingArtifact("<channel|answer>")).toBe(true);
41+
expect(isInternalFormattingArtifact("<lane|reasoning>")).toBe(true);
42+
});
43+
44+
it("returns false for undefined/empty", () => {
45+
expect(isInternalFormattingArtifact(undefined)).toBe(false);
46+
expect(isInternalFormattingArtifact("")).toBe(false);
47+
});
48+
49+
it("returns false for normal user-facing text", () => {
50+
expect(isInternalFormattingArtifact("Hello! How can I help?")).toBe(false);
51+
expect(isInternalFormattingArtifact("The answer is 42.")).toBe(false);
52+
expect(isInternalFormattingArtifact("Here's your code:")).toBe(false);
53+
});
54+
55+
it("returns false for text that merely contains an artifact pattern", () => {
56+
// Real answer text that happens to include a dash separator or tag-like content
57+
// must not be suppressed.
58+
expect(
59+
isInternalFormattingArtifact(
60+
"Here are the options:\n───\n1. Option A\n2. Option B",
61+
),
62+
).toBe(false);
63+
expect(isInternalFormattingArtifact("See <https://example.com> for details.")).toBe(false);
64+
expect(
65+
isInternalFormattingArtifact("Use <channel|> syntax in your config."),
66+
).toBe(false);
67+
});
68+
69+
it("returns false for code blocks and markdown with substance", () => {
70+
expect(isInternalFormattingArtifact("```js\nconsole.log('hi')\n```")).toBe(false);
71+
expect(isInternalFormattingArtifact("**bold** and *italic* text")).toBe(false);
72+
expect(isInternalFormattingArtifact("# Heading")).toBe(false);
73+
});
74+
});
75+
1176
describe("isSilentReplyText", () => {
1277
it("returns true for exact token", () => {
1378
expect(isSilentReplyText("NO_REPLY")).toBe(true);

src/auto-reply/tokens.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,33 @@ import { escapeRegExp } from "../shared/regexp.js";
33
export const HEARTBEAT_TOKEN = "HEARTBEAT_OK";
44
export const SILENT_REPLY_TOKEN = "NO_REPLY";
55

6+
/**
7+
* Detects text that consists solely of internal/runtime formatting artefacts
8+
* produced by LLM providers (e.g. Codex) during streaming responses.
9+
*
10+
* These are NOT meaningful user-facing output and must never be sent to
11+
* messaging channels as visible messages.
12+
*
13+
* Matched patterns:
14+
* - `<channel|>`, `<word|>`, `<word|value>` — incomplete/empty angle-bracket
15+
* markup used by providers for internal channel/routing markers.
16+
* - `set-thought <...>` — reasoning directive prefixes emitted by Codex-style
17+
* providers to signal thought block transitions.
18+
* - `───`, `---`, `___`, `***`, `****` — markdown horizontal-rule separators
19+
* when they are the only non-whitespace content.
20+
* - `<tag>` / `</tag>` — lone unclosed or empty XML-like tags that are not
21+
* valid user-facing content.
22+
*/
23+
const INTERNAL_ARTEFACT_RE =
24+
/^\s*(?:(?:set-thought\s+)?<[\w]*\|[^>]*>|+|-{3,}|_{3,}|\*{3,4}|<[\w]+\/?>|<\/[\w]+>)\s*$/;
25+
26+
export function isInternalFormattingArtifact(text: string | undefined): boolean {
27+
if (!text) {
28+
return false;
29+
}
30+
return INTERNAL_ARTEFACT_RE.test(text);
31+
}
32+
633
const silentExactRegexByToken = new Map<string, RegExp>();
734
const silentTrailingRegexByToken = new Map<string, RegExp>();
835
const silentLeadingAttachedRegexByToken = new Map<string, RegExp>();

0 commit comments

Comments
 (0)