Skip to content

Commit 8a1e220

Browse files
committed
test(qa): relax Matrix tool progress matching
1 parent 250be27 commit 8a1e220

2 files changed

Lines changed: 203 additions & 15 deletions

File tree

extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts

Lines changed: 98 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -690,12 +690,77 @@ function assertMatrixQaToolProgressMentionsInert(event: MatrixQaObservedEvent) {
690690
}
691691
}
692692

693+
function hasMatrixQaToolProgressPreviewLine(body: string | undefined) {
694+
return Boolean(
695+
body?.split(/\r?\n/).some((line) => /^\s*[-*]\s+`?[^`\s][^`]*`?\s*$/u.test(line)),
696+
);
697+
}
698+
699+
function truncateMatrixQaToolProgressBody(body: string | undefined) {
700+
if (!body) {
701+
return "<none>";
702+
}
703+
return body.length <= 240 ? body : `${body.slice(0, 237)}...`;
704+
}
705+
706+
function describeMatrixQaToolProgressCandidate(event: MatrixQaObservedEvent) {
707+
const relation = event.relatesTo?.relType
708+
? `${event.relatesTo.relType}:${event.relatesTo.eventId ?? "<none>"}`
709+
: "<none>";
710+
return [
711+
`${event.eventId} kind=${event.kind}`,
712+
`relation=${relation}`,
713+
`body=${JSON.stringify(truncateMatrixQaToolProgressBody(event.body))}`,
714+
].join(" ");
715+
}
716+
717+
function buildMatrixQaToolProgressTimeoutMessage(params: {
718+
cause: unknown;
719+
events: MatrixQaObservedEvent[];
720+
expectedPreviewKind: MatrixQaObservedEvent["kind"];
721+
previewEventId: string;
722+
roomId: string;
723+
startIndex: number;
724+
sutUserId: string;
725+
}) {
726+
const candidates = params.events
727+
.slice(params.startIndex)
728+
.filter((event) => {
729+
if (
730+
event.roomId !== params.roomId ||
731+
event.sender !== params.sutUserId ||
732+
event.type !== "m.room.message" ||
733+
event.kind !== params.expectedPreviewKind
734+
) {
735+
return false;
736+
}
737+
return (
738+
event.eventId === params.previewEventId ||
739+
event.relatesTo?.eventId === params.previewEventId ||
740+
/\bWorking\b/i.test(event.body ?? "")
741+
);
742+
})
743+
.slice(-8);
744+
const candidateDetails =
745+
candidates.length === 0
746+
? ["observed preview candidates: <none>"]
747+
: ["observed preview candidates:", ...candidates.map(describeMatrixQaToolProgressCandidate)];
748+
return [
749+
params.cause instanceof Error
750+
? params.cause.message
751+
: `Matrix tool progress wait failed: ${String(params.cause)}`,
752+
`preview event: ${params.previewEventId}`,
753+
...candidateDetails,
754+
].join("\n");
755+
}
756+
693757
async function runMatrixToolProgressScenario(
694758
context: MatrixQaScenarioContext,
695759
params: {
696760
expectedPreviewKind: MatrixQaObservedEvent["kind"];
697761
finalText: string;
698762
label: string;
763+
allowGenericProgressLine?: boolean;
699764
mentionSafety?: boolean;
700765
progressPattern: RegExp;
701766
triggerBodyBuilder: (sutUserId: string, finalText: string) => string;
@@ -721,22 +786,39 @@ async function runMatrixToolProgressScenario(
721786
since: startSince,
722787
timeoutMs: context.timeoutMs,
723788
});
724-
const progress = params.progressPattern.test(preview.event.body ?? "")
789+
const matchesExpectedProgress = (body: string | undefined) =>
790+
params.progressPattern.test(body ?? "") ||
791+
(params.allowGenericProgressLine === true && hasMatrixQaToolProgressPreviewLine(body));
792+
const progress = matchesExpectedProgress(preview.event.body)
725793
? preview
726-
: await client.waitForRoomEvent({
727-
observedEvents: context.observedEvents,
728-
predicate: (event) =>
729-
event.roomId === context.roomId &&
730-
event.sender === context.sutUserId &&
731-
event.kind === params.expectedPreviewKind &&
732-
event.relatesTo?.relType === "m.replace" &&
733-
event.relatesTo.eventId === preview.event.eventId &&
734-
/\bWorking\b/i.test(event.body ?? "") &&
735-
params.progressPattern.test(event.body ?? ""),
736-
roomId: context.roomId,
737-
since: preview.since,
738-
timeoutMs: context.timeoutMs,
739-
});
794+
: await client
795+
.waitForRoomEvent({
796+
observedEvents: context.observedEvents,
797+
predicate: (event) =>
798+
event.roomId === context.roomId &&
799+
event.sender === context.sutUserId &&
800+
event.kind === params.expectedPreviewKind &&
801+
event.relatesTo?.relType === "m.replace" &&
802+
event.relatesTo.eventId === preview.event.eventId &&
803+
/\bWorking\b/i.test(event.body ?? "") &&
804+
matchesExpectedProgress(event.body),
805+
roomId: context.roomId,
806+
since: preview.since,
807+
timeoutMs: context.timeoutMs,
808+
})
809+
.catch((err: unknown) => {
810+
throw new Error(
811+
buildMatrixQaToolProgressTimeoutMessage({
812+
cause: err,
813+
events: context.observedEvents,
814+
expectedPreviewKind: params.expectedPreviewKind,
815+
previewEventId: preview.event.eventId,
816+
roomId: context.roomId,
817+
startIndex: startObservedIndex,
818+
sutUserId: context.sutUserId,
819+
}),
820+
);
821+
});
740822

741823
if (params.mentionSafety) {
742824
assertMatrixQaToolProgressMentionsInert(progress.event);
@@ -804,6 +886,7 @@ export async function runToolProgressPreviewScenario(context: MatrixQaScenarioCo
804886
expectedPreviewKind: "notice",
805887
finalText: buildMatrixQaToken("MATRIX_QA_TOOL_PROGRESS"),
806888
label: "tool progress preview",
889+
allowGenericProgressLine: true,
807890
progressPattern: /\btool:\s*read\b/i,
808891
triggerBodyBuilder: buildMatrixToolProgressPrompt,
809892
});

extensions/qa-matrix/src/runners/contract/scenarios.test.ts

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2554,6 +2554,111 @@ describe("matrix live qa scenarios", () => {
25542554
});
25552555
});
25562556

2557+
it("accepts non-read Matrix tool progress lines in quiet previews", async () => {
2558+
const previewEventId = "$tool-progress-generic-preview";
2559+
mockMatrixQaRoomClient({
2560+
driverEventId: "$tool-progress-generic-trigger",
2561+
events: [
2562+
{
2563+
event: matrixQaMessageEvent({
2564+
kind: "notice",
2565+
eventId: previewEventId,
2566+
body: "Working...",
2567+
}),
2568+
since: "driver-sync-preview",
2569+
},
2570+
{
2571+
event: matrixQaMessageEvent({
2572+
kind: "notice",
2573+
eventId: "$tool-progress-generic-update",
2574+
body: "Working...\n- `tool: exec_command`",
2575+
relatesTo: {
2576+
relType: "m.replace",
2577+
eventId: previewEventId,
2578+
},
2579+
}),
2580+
since: "driver-sync-progress",
2581+
},
2582+
{
2583+
event: ({ sendTextMessage }) =>
2584+
matrixQaMessageEvent({
2585+
kind: "notice",
2586+
eventId: "$tool-progress-generic-final",
2587+
body: readMatrixQaReplyDirective(
2588+
sendTextMessage.mock.calls[0]?.[0]?.body,
2589+
"MATRIX_QA_TOOL_PROGRESS_FIXED",
2590+
),
2591+
relatesTo: {
2592+
relType: "m.replace",
2593+
eventId: previewEventId,
2594+
},
2595+
}),
2596+
since: "driver-sync-next",
2597+
},
2598+
],
2599+
});
2600+
2601+
const scenario = MATRIX_QA_SCENARIOS.find(
2602+
(entry) => entry.id === "matrix-room-tool-progress-preview",
2603+
);
2604+
expect(scenario).toBeDefined();
2605+
2606+
await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).resolves.toMatchObject({
2607+
artifacts: {
2608+
driverEventId: "$tool-progress-generic-trigger",
2609+
previewBodyPreview: "Working...\n- `tool: exec_command`",
2610+
previewEventId: "$tool-progress-generic-preview",
2611+
reply: {
2612+
eventId: "$tool-progress-generic-final",
2613+
},
2614+
},
2615+
});
2616+
});
2617+
2618+
it("reports Matrix tool progress preview candidates when the progress wait times out", async () => {
2619+
const previewEvent = matrixQaMessageEvent({
2620+
kind: "notice",
2621+
eventId: "$tool-progress-timeout-preview",
2622+
body: "Working...",
2623+
});
2624+
const updateEvent = matrixQaMessageEvent({
2625+
kind: "notice",
2626+
eventId: "$tool-progress-timeout-update",
2627+
body: "Working...\nstill deciding",
2628+
relatesTo: {
2629+
relType: "m.replace",
2630+
eventId: previewEvent.eventId,
2631+
},
2632+
});
2633+
const context = matrixQaScenarioContext();
2634+
const primeRoom = vi.fn().mockResolvedValue("driver-sync-start");
2635+
const sendTextMessage = vi.fn().mockResolvedValue("$tool-progress-timeout-trigger");
2636+
const waitForRoomEvent = vi
2637+
.fn()
2638+
.mockImplementationOnce(async () => {
2639+
context.observedEvents.push(previewEvent);
2640+
return { event: previewEvent, since: "driver-sync-preview" };
2641+
})
2642+
.mockImplementationOnce(async () => {
2643+
context.observedEvents.push(updateEvent);
2644+
throw new Error("timed out after 8000ms waiting for Matrix room event");
2645+
});
2646+
createMatrixQaClient.mockReturnValue({
2647+
primeRoom,
2648+
sendTextMessage,
2649+
waitForRoomEvent,
2650+
});
2651+
2652+
const scenario = MATRIX_QA_SCENARIOS.find(
2653+
(entry) => entry.id === "matrix-room-tool-progress-preview",
2654+
);
2655+
expect(scenario).toBeDefined();
2656+
2657+
await expect(runMatrixQaScenario(scenario!, context)).rejects.toThrow(
2658+
/observed preview candidates:[\s\S]*\$tool-progress-timeout-update/,
2659+
);
2660+
});
2661+
25572662
it("keeps Matrix tool progress opt-out from creating Working previews", async () => {
25582663
const { waitForRoomEvent } = mockMatrixQaRoomClient({
25592664
driverEventId: "$tool-progress-optout-trigger",

0 commit comments

Comments
 (0)