Skip to content

Commit 59b85d4

Browse files
committed
test: stabilize release validation flakes
1 parent 44c3d8e commit 59b85d4

3 files changed

Lines changed: 58 additions & 31 deletions

File tree

extensions/codex/src/app-server/run-attempt.test.ts

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -655,9 +655,7 @@ describe("runCodexAppServerAttempt", () => {
655655
};
656656

657657
expect(
658-
__testing
659-
.filterCodexDynamicTools(tools, {}, privateQaCodexEnv)
660-
.map((tool) => tool.name),
658+
__testing.filterCodexDynamicTools(tools, {}, privateQaCodexEnv).map((tool) => tool.name),
661659
).toEqual(["read", "write", "image_generate", "message"]);
662660
expect(__testing.resolveCodexDynamicToolsLoading({}, privateQaCodexEnv)).toBe("direct");
663661
});
@@ -1657,7 +1655,7 @@ describe("runCodexAppServerAttempt", () => {
16571655
path.join(tempDir, "session.jsonl"),
16581656
path.join(tempDir, "workspace"),
16591657
);
1660-
params.timeoutMs = 100;
1658+
params.timeoutMs = 250;
16611659

16621660
const result = await runCodexAppServerAttempt(params);
16631661

@@ -1694,6 +1692,13 @@ describe("runCodexAppServerAttempt", () => {
16941692
turnTerminalIdleTimeoutMs: 300,
16951693
});
16961694
await harness.waitForMethod("turn/start");
1695+
await vi.waitFor(
1696+
() =>
1697+
expect(onRunProgress).toHaveBeenCalledWith(
1698+
expect.objectContaining({ reason: "turn:start" }),
1699+
),
1700+
fastWait,
1701+
);
16971702

16981703
await new Promise((resolve) => setTimeout(resolve, 60));
16991704
await harness.notify({
@@ -1756,6 +1761,13 @@ describe("runCodexAppServerAttempt", () => {
17561761
turnTerminalIdleTimeoutMs: 500,
17571762
});
17581763
await harness.waitForMethod("turn/start");
1764+
await vi.waitFor(
1765+
() =>
1766+
expect(onRunProgress).toHaveBeenCalledWith(
1767+
expect.objectContaining({ reason: "turn:start" }),
1768+
),
1769+
fastWait,
1770+
);
17591771

17601772
await new Promise((resolve) => setTimeout(resolve, 60));
17611773
await harness.handleServerRequest({
@@ -1840,13 +1852,22 @@ describe("runCodexAppServerAttempt", () => {
18401852
path.join(tempDir, "workspace"),
18411853
);
18421854
params.timeoutMs = 100;
1855+
const onRunProgress = vi.fn();
1856+
params.onRunProgress = onRunProgress;
18431857

18441858
const run = runCodexAppServerAttempt(params, {
18451859
turnCompletionIdleTimeoutMs: 300,
18461860
turnAssistantCompletionIdleTimeoutMs: 300,
18471861
turnTerminalIdleTimeoutMs: 300,
18481862
});
18491863
await harness.waitForMethod("turn/start");
1864+
await vi.waitFor(
1865+
() =>
1866+
expect(onRunProgress).toHaveBeenCalledWith(
1867+
expect.objectContaining({ reason: "turn:start" }),
1868+
),
1869+
fastWait,
1870+
);
18501871

18511872
await new Promise((resolve) => setTimeout(resolve, 60));
18521873
await harness.handleServerRequest({
@@ -1879,17 +1900,26 @@ describe("runCodexAppServerAttempt", () => {
18791900
path.join(tempDir, "session.jsonl"),
18801901
path.join(tempDir, "workspace"),
18811902
);
1882-
params.timeoutMs = 100;
1903+
params.timeoutMs = 250;
18831904
params.onBlockReply = vi.fn();
1905+
const onRunProgress = vi.fn();
1906+
params.onRunProgress = onRunProgress;
18841907

18851908
const run = runCodexAppServerAttempt(params, {
1886-
turnCompletionIdleTimeoutMs: 300,
1887-
turnAssistantCompletionIdleTimeoutMs: 300,
1888-
turnTerminalIdleTimeoutMs: 300,
1909+
turnCompletionIdleTimeoutMs: 600,
1910+
turnAssistantCompletionIdleTimeoutMs: 600,
1911+
turnTerminalIdleTimeoutMs: 600,
18891912
});
18901913
await harness.waitForMethod("turn/start");
1914+
await vi.waitFor(
1915+
() =>
1916+
expect(onRunProgress).toHaveBeenCalledWith(
1917+
expect.objectContaining({ reason: "turn:start" }),
1918+
),
1919+
fastWait,
1920+
);
18911921

1892-
await new Promise((resolve) => setTimeout(resolve, 60));
1922+
await new Promise((resolve) => setTimeout(resolve, 75));
18931923
const response = harness.handleServerRequest({
18941924
id: "request-user-input",
18951925
method: "item/tool/requestUserInput",
@@ -1913,7 +1943,7 @@ describe("runCodexAppServerAttempt", () => {
19131943
},
19141944
});
19151945
await vi.waitFor(() => expect(params.onBlockReply).toHaveBeenCalledTimes(1), fastWait);
1916-
await new Promise((resolve) => setTimeout(resolve, 60));
1946+
await new Promise((resolve) => setTimeout(resolve, 125));
19171947

19181948
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
19191949
expect(queueActiveRunMessageForTest("session-1", "2")).toBe(true);

extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -317,12 +317,7 @@ export async function runSubagentThreadSpawnScenario(context: MatrixQaScenarioCo
317317
"Do not omit thread=true; the child must bind to this Matrix thread.",
318318
`Do not write ${childToken} in the parent response.`,
319319
].join(" ");
320-
const driverEventId = await client.sendTextMessage({
321-
body: triggerBody,
322-
mentionUserIds: [context.sutUserId],
323-
roomId: context.roomId,
324-
});
325-
const intro = await client.waitForRoomEvent({
320+
const introPromise = client.waitForRoomEvent({
326321
observedEvents: context.observedEvents,
327322
predicate: (event) => {
328323
failIfMatrixSubagentThreadHookError(event);
@@ -339,6 +334,12 @@ export async function runSubagentThreadSpawnScenario(context: MatrixQaScenarioCo
339334
since: startSince,
340335
timeoutMs: context.timeoutMs,
341336
});
337+
const driverEventId = await client.sendTextMessage({
338+
body: triggerBody,
339+
mentionUserIds: [context.sutUserId],
340+
roomId: context.roomId,
341+
});
342+
const intro = await introPromise;
342343
const completion = await client.waitForRoomEvent({
343344
observedEvents: context.observedEvents,
344345
predicate: (event) => {

src/agents/subagent-announce.format.e2e.test.ts

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ function visibleAgentResponse(runId = "run-main") {
6969
status: "ok",
7070
result: {
7171
payloads: [{ text: "announced" }],
72+
didSendViaMessagingTool: true,
73+
messagingToolSentTexts: ["announced"],
7274
},
7375
};
7476
}
@@ -518,9 +520,6 @@ describe("subagent announce formatting", () => {
518520
expect(msg).toContain(
519521
"If additional action is required, continue the task or record a follow-up; otherwise send a truthful user-facing update.",
520522
);
521-
expect(msg).toContain(
522-
"If the runtime marks this route as message-tool-only, send visible output with the message tool first",
523-
);
524523
expect(msg).toContain("Keep this internal context private");
525524
expect(call?.params?.internalEvents?.[0]?.type).toBe("task_completion");
526525
expect(call?.params?.internalEvents?.[0]?.taskLabel).toBe("do thing");
@@ -814,9 +813,10 @@ describe("subagent announce formatting", () => {
814813
expect(sendSpy).not.toHaveBeenCalled();
815814
expect(agentSpy).toHaveBeenCalledTimes(1);
816815
const call = getAgentCall() as { params?: Record<string, unknown> };
817-
expect(call?.params?.deliver).toBe(true);
816+
expect(call?.params?.deliver).toBe(false);
818817
expect(call?.params?.channel).toBe("discord");
819818
expect(call?.params?.to).toBe("channel:12345");
819+
expect(call?.params?.sourceReplyDeliveryMode).toBe("message_tool_only");
820820
});
821821

822822
it("suppresses completion delivery when subagent reply is ANNOUNCE_SKIP", async () => {
@@ -1008,9 +1008,10 @@ describe("subagent announce formatting", () => {
10081008
const call = getAgentCall() as { params?: Record<string, unknown> };
10091009
const rawMessage = call?.params?.message;
10101010
const msg = typeof rawMessage === "string" ? rawMessage : "";
1011-
expect(call?.params?.deliver).toBe(true);
1011+
expect(call?.params?.deliver).toBe(false);
10121012
expect(call?.params?.channel).toBe("discord");
10131013
expect(call?.params?.to).toBe("channel:12345");
1014+
expect(call?.params?.sourceReplyDeliveryMode).toBe("message_tool_only");
10141015
expect(msg).not.toContain("There are still");
10151016
expect(msg).not.toContain("wait for the remaining results");
10161017
});
@@ -1376,7 +1377,7 @@ describe("subagent announce formatting", () => {
13761377
threadId: 99,
13771378
},
13781379
requesterSessionMeta: {},
1379-
expectedThreadId: "99",
1380+
expectedThreadId: 99,
13801381
},
13811382
] as const;
13821383

@@ -1784,10 +1785,7 @@ describe("subagent announce formatting", () => {
17841785
});
17851786

17861787
it("keeps direct announce idempotency unique for same-ms distinct child runs", async () => {
1787-
const activeResponses = [true, false, true, false];
1788-
embeddedRunMock.isEmbeddedPiRunActive.mockImplementation(
1789-
() => activeResponses.shift() ?? false,
1790-
);
1788+
embeddedRunMock.isEmbeddedPiRunActive.mockReturnValue(false);
17911789
embeddedRunMock.isEmbeddedPiRunStreaming.mockReturnValue(false);
17921790
sessionStore = {
17931791
"agent:main:main": {
@@ -1930,8 +1928,9 @@ describe("subagent announce formatting", () => {
19301928
sessionKey: "agent:main:main",
19311929
channel: "discord",
19321930
to: "channel:12345",
1933-
deliver: true,
1931+
deliver: false,
19341932
});
1933+
expect(getAgentCall().params?.sourceReplyDeliveryMode).toBe("message_tool_only");
19351934
});
19361935

19371936
it("returns failure for completion-mode when direct delivery fails and steering fallback is unavailable", async () => {
@@ -2115,10 +2114,7 @@ describe("subagent announce formatting", () => {
21152114
});
21162115

21172116
it("preserves account routing for separate collect-mode announcements", async () => {
2118-
const activeResponses = [true, false, true, false];
2119-
embeddedRunMock.isEmbeddedPiRunActive.mockImplementation(
2120-
() => activeResponses.shift() ?? false,
2121-
);
2117+
embeddedRunMock.isEmbeddedPiRunActive.mockReturnValue(false);
21222118
embeddedRunMock.isEmbeddedPiRunStreaming.mockReturnValue(false);
21232119
sessionStore = {
21242120
"agent:main:main": {

0 commit comments

Comments
 (0)