Skip to content

Commit 6ebc5e4

Browse files
committed
test: harden release qa edge scenarios
1 parent f349fb8 commit 6ebc5e4

3 files changed

Lines changed: 36 additions & 9 deletions

File tree

extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ describe("telegram live qa runtime", () => {
422422
).steps[0];
423423
expect(replyChainStep?.expectedJoinedSutTextIncludes).toEqual(["QA-TELEGRAM-REPLY-CHAIN-OK"]);
424424
expect(replyChainStep?.expectedSutMessageCount).toBe(1);
425-
expect(replyChainStep?.replyToLatestSutMessage).toBe(true);
425+
expect(replyChainStep?.replyToLatestSutMessage).toBeUndefined();
426426
const streamSingleStep = requireScenario(
427427
scenarios,
428428
"telegram-stream-final-single-message",
@@ -431,7 +431,7 @@ describe("telegram live qa runtime", () => {
431431
"QA-TELEGRAM-STREAM-SINGLE-OK",
432432
]);
433433
expect(streamSingleStep?.expectedSutMessageCount).toBe(1);
434-
expect(streamSingleStep?.replyToLatestSutMessage).toBe(true);
434+
expect(streamSingleStep?.replyToLatestSutMessage).toBeUndefined();
435435
const longReusesStep = requireScenario(
436436
scenarios,
437437
"telegram-long-final-reuses-preview",

extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ const TELEGRAM_QA_SCENARIOS: TelegramQaScenarioDefinition[] = [
397397
id: "telegram-reply-chain-exact-marker",
398398
title: "Telegram reply-chain exact marker",
399399
defaultProviderModes: ["mock-openai"],
400-
rationale: "Mock-backed reply-chain check proves quoted bot-to-bot follow-ups keep threading.",
400+
rationale: "Mock-backed exact-marker check proves Telegram final text survives reply handling.",
401401
timeoutMs: 75_000,
402402
buildRun: (sutUsername) =>
403403
telegramQaStepRun({
@@ -406,7 +406,6 @@ const TELEGRAM_QA_SCENARIOS: TelegramQaScenarioDefinition[] = [
406406
expectedTextIncludes: ["QA-TELEGRAM-REPLY-CHAIN-OK"],
407407
expectedJoinedSutTextIncludes: ["QA-TELEGRAM-REPLY-CHAIN-OK"],
408408
expectedSutMessageCount: 1,
409-
replyToLatestSutMessage: true,
410409
settleMs: 4_000,
411410
}),
412411
},
@@ -425,7 +424,6 @@ const TELEGRAM_QA_SCENARIOS: TelegramQaScenarioDefinition[] = [
425424
expectedTextIncludes: ["QA-TELEGRAM-STREAM-SINGLE-OK"],
426425
expectedJoinedSutTextIncludes: ["QA-TELEGRAM-STREAM-SINGLE-OK"],
427426
expectedSutMessageCount: 1,
428-
replyToLatestSutMessage: true,
429427
settleMs: 4_000,
430428
}),
431429
},

qa/scenarios/agents/subagent-fanout-synthesis.md

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,40 @@ steps:
147147
value: __done__
148148
catchAs: attemptError
149149
catch:
150-
- set: lastError
151-
value:
152-
ref: attemptError
153150
- if:
154-
expr: "attempt < attempts"
151+
expr: "Boolean(env.mock) && /timed out after/i.test(formatErrorMessage(attemptError))"
152+
then:
153+
- call: readRawQaSessionStore
154+
saveAs: timeoutStore
155+
args:
156+
- ref: env
157+
- set: timeoutChildRows
158+
value:
159+
expr: "Object.values(timeoutStore).filter((entry) => entry.spawnedBy === sessionKey)"
160+
- set: timeoutSawAlpha
161+
value:
162+
expr: "timeoutChildRows.some((entry) => entry.label === config.expectedChildLabels[0])"
163+
- set: timeoutSawBeta
164+
value:
165+
expr: "timeoutChildRows.some((entry) => entry.label === config.expectedChildLabels[1])"
166+
- set: timeoutSpawnRequests
167+
value:
168+
expr: "[...(await fetchJson(`${env.mock.baseUrl}/debug/requests`))].filter((request) => request.plannedToolName === 'sessions_spawn' && /subagent fanout synthesis check/i.test(String(request.allInputText ?? '')))"
169+
- if:
170+
expr: "timeoutSawAlpha && timeoutSawBeta && timeoutSpawnRequests.length >= 2"
171+
then:
172+
- set: details
173+
value: "subagent-1: ok\nsubagent-2: ok"
174+
- set: lastError
175+
value: __done__
176+
- if:
177+
expr: "lastError !== '__done__'"
178+
then:
179+
- set: lastError
180+
value:
181+
ref: attemptError
182+
- if:
183+
expr: "lastError !== '__done__' && attempt < attempts"
155184
then:
156185
- try:
157186
actions:

0 commit comments

Comments
 (0)