Skip to content

Commit a9f099d

Browse files
committed
test(qa): require channel scenario markers
1 parent 2fa60af commit a9f099d

5 files changed

Lines changed: 198 additions & 11 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ Docs: https://docs.openclaw.ai
6767
- Release/CI/E2E: require the Kitchen Sink RPC walk to prove every expected plugin tool is cataloged and effective before invoking tool fixtures.
6868
- Release/CI/E2E: stop tracked Docker build commands when centralized build wrappers receive shutdown signals.
6969
- Release/CI/E2E: cover MCP channel pairing reconnects by asserting the same temporary client state is reused across reconnects.
70+
- Release/CI/E2E: require QA channel baseline and reconnect scenarios to assert their scenario markers instead of accepting any outbound reply.
7071
- Release/CI/E2E: fail secret-provider proof runs when temporary state cleanup still fails after retries instead of hiding the cleanup error.
7172
- Release/CI/E2E: fail package-candidate ref proofs when temporary source worktree cleanup fails instead of leaving stale worktrees behind.
7273
- Release/CI/E2E: remove package tarball extract directories when tar extraction fails before validation can continue.

extensions/qa-lab/src/scenario-flow-runner.test.ts

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,98 @@
11
import { describe, expect, it } from "vitest";
22
import { createQaBusState } from "./bus-state.js";
3+
import { readQaScenarioById } from "./scenario-catalog.js";
34
import { runScenarioFlow } from "./scenario-flow-runner.js";
45

6+
type QaFlowStep = {
7+
name: string;
8+
run: () => Promise<string | void>;
9+
};
10+
11+
function formatTestTranscript(state: ReturnType<typeof createQaBusState>) {
12+
return state
13+
.getSnapshot()
14+
.messages.map((message) => `${message.direction}:${message.conversation.id}:${message.text}`)
15+
.join("\n");
16+
}
17+
18+
async function runLoadedScenarioFlow(
19+
scenarioId: string,
20+
params: {
21+
onWaitForOutboundMessage?: (params: {
22+
waitCount: number;
23+
state: ReturnType<typeof createQaBusState>;
24+
}) => void;
25+
} = {},
26+
) {
27+
const scenario = readQaScenarioById(scenarioId);
28+
const flow = scenario.execution.flow;
29+
if (!flow) {
30+
throw new Error(`scenario has no flow: ${scenarioId}`);
31+
}
32+
33+
const state = createQaBusState();
34+
let waitCount = 0;
35+
const api = {
36+
env: {},
37+
state,
38+
scenario,
39+
config: scenario.execution.config ?? {},
40+
randomUUID: () => "00000000-0000-4000-8000-000000000000",
41+
liveTurnTimeoutMs: (_env: unknown, timeoutMs: number) => timeoutMs,
42+
waitForGatewayHealthy: async () => undefined,
43+
waitForQaChannelReady: async () => undefined,
44+
waitForNoOutbound: async () => undefined,
45+
sleep: async () => undefined,
46+
reset: async () => {
47+
state.reset();
48+
},
49+
resetBus: async () => {
50+
state.reset();
51+
},
52+
runAgentPrompt: async () => undefined,
53+
formatTransportTranscript: formatTestTranscript,
54+
waitForOutboundMessage: async (
55+
stateLocal: ReturnType<typeof createQaBusState>,
56+
predicate: (candidate: unknown) => boolean,
57+
timeoutMs: number,
58+
options?: { sinceIndex?: number },
59+
) => {
60+
waitCount += 1;
61+
params.onWaitForOutboundMessage?.({ waitCount, state: stateLocal });
62+
const match = stateLocal
63+
.getSnapshot()
64+
.messages.slice(options?.sinceIndex ?? 0)
65+
.find((candidate) => predicate(candidate));
66+
if (match) {
67+
return match;
68+
}
69+
throw new Error(`timed out after ${timeoutMs}ms waiting for outbound marker`);
70+
},
71+
runScenario: async (_name: string, steps: QaFlowStep[]) => {
72+
const stepResults = [];
73+
for (const step of steps) {
74+
const details = await step.run();
75+
stepResults.push({
76+
name: step.name,
77+
status: "pass" as const,
78+
...(details !== undefined ? { details } : {}),
79+
});
80+
}
81+
return {
82+
name: scenario.title,
83+
status: "pass" as const,
84+
steps: stepResults,
85+
};
86+
},
87+
};
88+
89+
return await runScenarioFlow({
90+
api,
91+
scenarioTitle: scenario.title,
92+
flow,
93+
});
94+
}
95+
596
describe("scenario-flow-runner", () => {
697
it("supports qaImport inside flow expressions", async () => {
798
const result = await runScenarioFlow({
@@ -221,4 +312,78 @@ describe("scenario-flow-runner", () => {
221312
expect(result.status).toBe("pass");
222313
expect(result.steps[0]?.details).toBe("QA_CODEX_PLUGIN_TURN_OK");
223314
});
315+
316+
it.each([
317+
{
318+
scenarioId: "channel-chat-baseline",
319+
to: "channel:qa-room",
320+
text: "generic shared-channel reply without the required marker",
321+
},
322+
{
323+
scenarioId: "dm-chat-baseline",
324+
to: "dm:alice",
325+
text: "generic DM reply without the required marker",
326+
},
327+
])("rejects unmarked outbound replies for $scenarioId", async ({ scenarioId, to, text }) => {
328+
await expect(
329+
runLoadedScenarioFlow(scenarioId, {
330+
onWaitForOutboundMessage: ({ state }) => {
331+
state.addOutboundMessage({
332+
accountId: "qa-channel",
333+
to,
334+
text,
335+
});
336+
},
337+
}),
338+
).rejects.toThrow("waiting for outbound marker");
339+
});
340+
341+
it("rejects reconnect follow-up replies that replay the first marker", async () => {
342+
await expect(
343+
runLoadedScenarioFlow("qa-channel-reconnect-dedupe", {
344+
onWaitForOutboundMessage: ({ waitCount, state }) => {
345+
if (waitCount === 1) {
346+
state.addOutboundMessage({
347+
accountId: "qa-channel",
348+
to: "channel:qa-room",
349+
text: "RECONNECT-FIRST-OK",
350+
});
351+
return;
352+
}
353+
state.addOutboundMessage({
354+
accountId: "qa-channel",
355+
to: "channel:qa-room",
356+
text: "RECONNECT-FIRST-OK",
357+
});
358+
},
359+
}),
360+
).rejects.toThrow("waiting for outbound marker");
361+
});
362+
363+
it("rejects reconnect follow-up turns with extra unmarked outbound replies", async () => {
364+
await expect(
365+
runLoadedScenarioFlow("qa-channel-reconnect-dedupe", {
366+
onWaitForOutboundMessage: ({ waitCount, state }) => {
367+
if (waitCount === 1) {
368+
state.addOutboundMessage({
369+
accountId: "qa-channel",
370+
to: "channel:qa-room",
371+
text: "RECONNECT-FIRST-OK",
372+
});
373+
return;
374+
}
375+
state.addOutboundMessage({
376+
accountId: "qa-channel",
377+
to: "channel:qa-room",
378+
text: "RECONNECT-SECOND-OK",
379+
});
380+
state.addOutboundMessage({
381+
accountId: "qa-channel",
382+
to: "channel:qa-room",
383+
text: "unmarked duplicate delivery",
384+
});
385+
},
386+
}),
387+
).rejects.toThrow("exactly one marked post-restart reply");
388+
});
224389
});

qa/scenarios/channels/channel-chat-baseline.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ coverage:
1212
objective: Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.
1313
successCriteria:
1414
- Agent replies in the shared channel transcript.
15+
- Agent visible reply contains the scenario marker.
1516
- Agent keeps the conversation scoped to the channel.
1617
- Agent respects mention-driven group routing semantics.
1718
docsRefs:
@@ -24,7 +25,8 @@ execution:
2425
kind: flow
2526
summary: Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.
2627
config:
27-
mentionPrompt: "@openclaw explain the QA lab"
28+
expectedMarker: QA-CHANNEL-BASELINE-OK
29+
mentionPrompt: "@openclaw qa channel baseline marker check. Reply exactly: QA-CHANNEL-BASELINE-OK"
2830
```
2931
3032
```yaml qa-flow
@@ -78,7 +80,14 @@ steps:
7880
- ref: state
7981
- lambda:
8082
params: [candidate]
81-
expr: "candidate.conversation.id === 'qa-room' && !candidate.threadId"
83+
expr: "candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && candidate.conversation.kind === 'channel' && !candidate.threadId && String(candidate.text ?? '').includes(config.expectedMarker)"
8284
- expr: liveTurnTimeoutMs(env, 180000)
85+
- set: matchingOutbound
86+
value:
87+
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && candidate.conversation.kind === 'channel' && String(candidate.text ?? '').includes(config.expectedMarker))"
88+
- assert:
89+
expr: matchingOutbound.length === 1
90+
message:
91+
expr: "`expected exactly one channel baseline marker reply, saw ${matchingOutbound.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
8392
detailsExpr: message.text
8493
```

qa/scenarios/channels/dm-chat-baseline.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ coverage:
1212
objective: Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.
1313
successCriteria:
1414
- Agent replies in DM without channel routing mistakes.
15+
- Agent visible reply contains the scenario marker.
1516
- Agent explains the QA lab and message bus correctly.
1617
- Agent keeps the dev C-3PO personality.
1718
docsRefs:
@@ -24,7 +25,8 @@ execution:
2425
kind: flow
2526
summary: Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.
2627
config:
27-
prompt: "Hello there, who are you?"
28+
expectedMarker: QA-DM-BASELINE-OK
29+
prompt: "DM baseline marker check. Include exact marker: `QA-DM-BASELINE-OK` and briefly identify the QA lab message bus."
2830
```
2931
3032
```yaml qa-flow
@@ -47,7 +49,14 @@ steps:
4749
- ref: state
4850
- lambda:
4951
params: [candidate]
50-
expr: "candidate.conversation.id === 'alice'"
52+
expr: "candidate.direction === 'outbound' && candidate.conversation.id === 'alice' && candidate.conversation.kind === 'direct' && String(candidate.text ?? '').includes(config.expectedMarker)"
5153
- expr: liveTurnTimeoutMs(env, 45000)
54+
- set: matchingOutbound
55+
value:
56+
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'alice' && candidate.conversation.kind === 'direct' && String(candidate.text ?? '').includes(config.expectedMarker))"
57+
- assert:
58+
expr: matchingOutbound.length === 1
59+
message:
60+
expr: "`expected exactly one DM baseline marker reply, saw ${matchingOutbound.length}; transcript=${formatTransportTranscript(state, { conversationId: 'alice' })}`"
5261
detailsExpr: outbound.text
5362
```

qa/scenarios/channels/qa-channel-reconnect-dedupe.md

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ steps:
6464
- ref: state
6565
- lambda:
6666
params: [candidate]
67-
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
67+
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound' && String(candidate.text ?? '').includes(config.firstMarker)"
6868
- expr: liveTurnTimeoutMs(env, 60000)
6969
- set: beforeRestartCursor
7070
value:
@@ -80,9 +80,9 @@ steps:
8080
value:
8181
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
8282
- assert:
83-
expr: "firstMatchesBeforeFollowup.length === 1"
83+
expr: "firstMatchesBeforeFollowup.length === 1 && String(firstMatchesBeforeFollowup[0]?.text ?? '').includes(config.firstMarker)"
8484
message:
85-
expr: "`readiness cycle replayed first reply ${firstMatchesBeforeFollowup.length} times; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
85+
expr: "`readiness cycle should preserve exactly one marked first reply, saw ${firstMatchesBeforeFollowup.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
8686
- call: runAgentPrompt
8787
args:
8888
- ref: env
@@ -99,7 +99,7 @@ steps:
9999
- ref: state
100100
- lambda:
101101
params: [candidate]
102-
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
102+
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound' && String(candidate.text ?? '').includes(config.secondMarker)"
103103
- expr: liveTurnTimeoutMs(env, 60000)
104104
- sinceIndex:
105105
ref: beforeRestartCursor
@@ -108,13 +108,16 @@ steps:
108108
expr: state.getSnapshot()
109109
- set: firstMatches
110110
value:
111-
expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
111+
expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && String(candidate.text ?? '').includes(config.firstMarker))"
112112
- set: secondMatches
113+
value:
114+
expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && String(candidate.text ?? '').includes(config.secondMarker))"
115+
- set: postRestartOutbounds
113116
value:
114117
expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
115118
- assert:
116-
expr: "firstMatches.length === 1 && secondMatches.length === 1"
119+
expr: "firstMatches.length === 1 && secondMatches.length === 1 && postRestartOutbounds.length === 1 && !postRestartOutbounds.some((candidate) => String(candidate.text ?? '').includes(config.firstMarker))"
117120
message:
118-
expr: "`expected one pre-restart and one post-restart reply; first=${firstMatches.length} second=${secondMatches.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
121+
expr: "`expected one marked pre-restart reply and exactly one marked post-restart reply without replaying the first marker; first=${firstMatches.length} second=${secondMatches.length} post=${postRestartOutbounds.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
119122
detailsExpr: "`before=${firstOutbound.text}\\nafter=${secondOutbound.text}`"
120123
```

0 commit comments

Comments
 (0)