Skip to content

Commit 1ad980a

Browse files
committed
fix(ci): honor exact-head proof verdicts
1 parent 38f11a0 commit 1ad980a

6 files changed

Lines changed: 170 additions & 8 deletions

File tree

.github/workflows/real-behavior-proof.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ jobs:
1818
name: Real behavior proof
1919
permissions:
2020
contents: read
21+
issues: read
2122
pull-requests: read
2223
runs-on: ubuntu-24.04
2324
steps:

scripts/github/barnacle-auto-response.mjs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
PROOF_SUFFICIENT_LABEL,
88
PROOF_SUPPLIED_LABEL,
99
evaluateRealBehaviorProof,
10+
hasClawSweeperExactHeadProof,
1011
labelsForRealBehaviorProof,
1112
} from "./real-behavior-proof-policy.mjs";
1213

@@ -767,6 +768,15 @@ async function listPullRequestFiles(github, context, pullRequest) {
767768
});
768769
}
769770

771+
async function listIssueComments(github, context, issueNumber) {
772+
return github.paginate(github.rest.issues.listComments, {
773+
owner: context.repo.owner,
774+
repo: context.repo.repo,
775+
issue_number: issueNumber,
776+
per_page: 100,
777+
});
778+
}
779+
770780
async function addMissingLabels(github, context, core, issueNumber, labels, labelSet) {
771781
const missingLabels = labels.filter((label) => !labelSet.has(label));
772782
if (missingLabels.length === 0) {
@@ -784,7 +794,10 @@ async function addMissingLabels(github, context, core, issueNumber, labels, labe
784794
core.info(`Added candidate labels to #${issueNumber}: ${missingLabels.join(", ")}`);
785795
}
786796

787-
function shouldRemoveProofSufficientLabel(context, proofEvaluation) {
797+
function shouldRemoveProofSufficientLabel(context, proofEvaluation, hasExactHeadClawSweeperProof) {
798+
if (hasExactHeadClawSweeperProof) {
799+
return false;
800+
}
788801
if (proofEvaluation.status !== "passed") {
789802
return true;
790803
}
@@ -793,6 +806,12 @@ function shouldRemoveProofSufficientLabel(context, proofEvaluation) {
793806

794807
async function applyPullRequestCandidateLabels(github, context, core, pullRequest, labelSet) {
795808
const files = await listPullRequestFiles(github, context, pullRequest);
809+
const hasExactHeadClawSweeperProof =
810+
labelSet.has(PROOF_SUFFICIENT_LABEL) &&
811+
hasClawSweeperExactHeadProof({
812+
pullRequest,
813+
comments: await listIssueComments(github, context, pullRequest.number),
814+
});
796815
const proofEvaluation = evaluateRealBehaviorProof({
797816
pullRequest: {
798817
...pullRequest,
@@ -811,7 +830,7 @@ async function applyPullRequestCandidateLabels(github, context, core, pullReques
811830
);
812831
if (
813832
labelSet.has(PROOF_SUFFICIENT_LABEL) &&
814-
shouldRemoveProofSufficientLabel(context, proofEvaluation)
833+
shouldRemoveProofSufficientLabel(context, proofEvaluation, hasExactHeadClawSweeperProof)
815834
) {
816835
staleProofLabels.push(PROOF_SUFFICIENT_LABEL);
817836
}

scripts/github/real-behavior-proof-check.mjs

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env node
22
import { readFileSync } from "node:fs";
33
import {
4+
evaluateClawSweeperExactHeadProof,
45
evaluateRealBehaviorProof,
56
isMaintainerTeamMember,
67
} from "./real-behavior-proof-policy.mjs";
@@ -26,12 +27,12 @@ if (!pullRequest) {
2627
process.exit(0);
2728
}
2829

29-
const token = process.env.GH_APP_TOKEN;
30+
const appToken = process.env.GH_APP_TOKEN;
3031
const org = event.repository?.owner?.login;
3132
const authorLogin = pullRequest.user?.login;
32-
if (token && org && authorLogin) {
33+
if (appToken && org && authorLogin) {
3334
try {
34-
if (await isMaintainerTeamMember({ token, org, login: authorLogin })) {
35+
if (await isMaintainerTeamMember({ token: appToken, org, login: authorLogin })) {
3536
console.log(
3637
`PR author @${authorLogin} is an active member of the ${org}/maintainer team; skipping real behavior proof gate.`,
3738
);
@@ -50,6 +51,44 @@ if (evaluation.passed) {
5051
process.exit(0);
5152
}
5253

54+
const token = appToken || process.env.GITHUB_TOKEN;
55+
const repository = process.env.GITHUB_REPOSITORY;
56+
if (token && repository && pullRequest.number) {
57+
const [owner, repo] = repository.split("/");
58+
const comments = [];
59+
for (let page = 1; page <= 10; page += 1) {
60+
const url = new URL(
61+
`https://api.github.com/repos/${owner}/${repo}/issues/${pullRequest.number}/comments`,
62+
);
63+
url.searchParams.set("per_page", "100");
64+
url.searchParams.set("page", String(page));
65+
const response = await fetch(url, {
66+
headers: {
67+
Accept: "application/vnd.github+json",
68+
Authorization: `Bearer ${token}`,
69+
"X-GitHub-Api-Version": "2022-11-28",
70+
},
71+
});
72+
if (!response.ok) {
73+
throw new Error(`Failed to fetch PR comments for proof verdicts: ${response.status}`);
74+
}
75+
const pageComments = await response.json();
76+
comments.push(...pageComments);
77+
if (pageComments.length < 100) {
78+
break;
79+
}
80+
}
81+
82+
const clawSweeperEvaluation = evaluateClawSweeperExactHeadProof({
83+
pullRequest,
84+
comments,
85+
});
86+
if (clawSweeperEvaluation.passed) {
87+
console.log(clawSweeperEvaluation.reason);
88+
process.exit(0);
89+
}
90+
}
91+
5392
const message = `${evaluation.reason} Add after-fix evidence from a real OpenClaw setup in the PR body. Screenshots, recordings, terminal screenshots, console output, redacted runtime logs, linked artifacts, or copied live output count. Unit tests, mocks, snapshots, lint, typechecks, and CI are supplemental only. A maintainer can apply proof: override when appropriate.`;
5493
console.error(`::error title=Real behavior proof required::${escapeCommandValue(message)}`);
5594
process.exit(1);

scripts/github/real-behavior-proof-policy.mjs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ export const NEEDS_REAL_BEHAVIOR_PROOF_LABEL = "triage: needs-real-behavior-proo
55
export const MOCK_ONLY_PROOF_LABEL = "triage: mock-only-proof";
66
export const MAINTAINER_TEAM_SLUG = "maintainer";
77

8+
export const CLAWSWEEPER_PROOF_VERDICT_STATUS = "clawsweeper_exact_head_pass";
9+
810
const privilegedAuthorAssociations = new Set(["OWNER", "MEMBER", "COLLABORATOR"]);
911

1012
const requiredProofFields = [
@@ -230,11 +232,47 @@ function result(status, reason, details = {}) {
230232
status,
231233
reason,
232234
applies: ["passed", "missing", "mock_only", "insufficient", "override"].includes(status),
233-
passed: ["passed", "skipped", "override"].includes(status),
235+
passed: ["passed", "skipped", "override", CLAWSWEEPER_PROOF_VERDICT_STATUS].includes(status),
234236
...details,
235237
};
236238
}
237239

240+
function extractMarkerField(marker, name) {
241+
const match = marker.match(new RegExp(`\\b${escapeRegex(name)}=([^\\s>]+)`, "i"));
242+
return match?.[1] ?? "";
243+
}
244+
245+
export function hasClawSweeperExactHeadProof({ pullRequest, comments = [] } = {}) {
246+
const pullNumber = String(pullRequest?.number ?? "");
247+
const headSha = String(pullRequest?.head?.sha ?? pullRequest?.head_sha ?? "").toLowerCase();
248+
if (!pullNumber || !/^[0-9a-f]{40}$/i.test(headSha)) {
249+
return false;
250+
}
251+
252+
for (const comment of comments) {
253+
const body = String(comment?.body ?? "");
254+
const markers = body.match(/<!--\s*clawsweeper-verdict:pass\b[\s\S]*?-->/gi) ?? [];
255+
for (const marker of markers) {
256+
const item = extractMarkerField(marker, "item");
257+
const sha = extractMarkerField(marker, "sha").toLowerCase();
258+
if (item === pullNumber && sha === headSha) {
259+
return true;
260+
}
261+
}
262+
}
263+
return false;
264+
}
265+
266+
export function evaluateClawSweeperExactHeadProof({ pullRequest, comments = [] } = {}) {
267+
if (hasClawSweeperExactHeadProof({ pullRequest, comments })) {
268+
return result(
269+
CLAWSWEEPER_PROOF_VERDICT_STATUS,
270+
"ClawSweeper accepted real behavior proof for the exact PR head.",
271+
);
272+
}
273+
return result("insufficient", "No exact-head ClawSweeper proof verdict was found.");
274+
}
275+
238276
export function evaluateRealBehaviorProof({ pullRequest, labels } = {}) {
239277
const currentLabels = labels ?? pullRequest?.labels ?? [];
240278
if (hasProofOverride(currentLabels)) {

test/scripts/barnacle-auto-response.test.ts

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ function barnacleGithub(
135135
maintainerLogins?: string[];
136136
removeLabelNotFound?: string[];
137137
repositoryRoles?: Record<string, string>;
138+
comments?: Array<{ body: string }>;
138139
} = {},
139140
) {
140141
const maintainerLogins = new Set(
@@ -154,8 +155,10 @@ function barnacleGithub(
154155
removeLabel: [] as Array<{ issue_number: number; name: string }>,
155156
update: [] as Array<{ issue_number: number; state?: string }>,
156157
};
158+
const listFiles = async () => files;
159+
const listComments = async () => options.comments ?? [];
157160
const github = {
158-
paginate: async () => files,
161+
paginate: async (fn: unknown) => (fn === listComments ? (options.comments ?? []) : files),
159162
rest: {
160163
issues: {
161164
addLabels: async (params: { issue_number: number; labels: string[] }) => {
@@ -173,6 +176,7 @@ function barnacleGithub(
173176
managedLabelSpecs[params.name as keyof typeof managedLabelSpecs]?.description ?? "",
174177
},
175178
}),
179+
listComments,
176180
lock: async (params: { issue_number: number; lock_reason?: string }) => {
177181
calls.lock.push(params);
178182
},
@@ -190,7 +194,7 @@ function barnacleGithub(
190194
updateLabel: async () => undefined,
191195
},
192196
pulls: {
193-
listFiles: async () => files,
197+
listFiles,
194198
},
195199
repos: {
196200
getCollaboratorPermissionLevel: async ({ username }: { username: string }) => {
@@ -784,6 +788,36 @@ describe("barnacle-auto-response", () => {
784788
},
785789
);
786790

791+
it("preserves sufficient proof on synchronize when ClawSweeper passed the exact head", async () => {
792+
const headSha = "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f";
793+
const { calls, github } = barnacleGithub([file("src/gateway/server.ts")], {
794+
comments: [
795+
{
796+
body: `<!-- clawsweeper-verdict:pass item=123 sha=${headSha} confidence=high -->`,
797+
},
798+
],
799+
});
800+
801+
await runBarnacleAutoResponse({
802+
github,
803+
context: barnacleContext(
804+
{
805+
body: blankTemplateBody,
806+
head: { sha: headSha },
807+
},
808+
[PROOF_SUFFICIENT_LABEL],
809+
{ action: "synchronize" },
810+
),
811+
core: {
812+
info: () => undefined,
813+
},
814+
});
815+
816+
expect(calls.removeLabel).not.toContainEqual(
817+
expect.objectContaining({ name: PROOF_SUFFICIENT_LABEL }),
818+
);
819+
});
820+
787821
it("preserves ClawSweeper's sufficient proof label on ordinary label events", async () => {
788822
const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]);
789823

test/scripts/real-behavior-proof-policy.test.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ import {
44
NEEDS_REAL_BEHAVIOR_PROOF_LABEL,
55
PROOF_OVERRIDE_LABEL,
66
PROOF_SUPPLIED_LABEL,
7+
evaluateClawSweeperExactHeadProof,
78
evaluateRealBehaviorProof,
9+
hasClawSweeperExactHeadProof,
810
isMaintainerTeamMember,
911
labelsForRealBehaviorProof,
1012
} from "../../scripts/github/real-behavior-proof-policy.mjs";
@@ -174,6 +176,35 @@ describe("real-behavior-proof-policy", () => {
174176
}).status,
175177
).toBe("override");
176178
});
179+
180+
it("accepts ClawSweeper pass verdict comments only for the exact PR head", () => {
181+
const pullRequest = {
182+
number: 83581,
183+
head: {
184+
sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
185+
},
186+
};
187+
const comments = [
188+
{
189+
body: [
190+
"Codex review: passed.",
191+
"<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
192+
].join("\n"),
193+
},
194+
];
195+
196+
expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(true);
197+
expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(true);
198+
expect(
199+
hasClawSweeperExactHeadProof({
200+
pullRequest: {
201+
...pullRequest,
202+
head: { sha: "d0215b2d67a45a783277fc7d2949ac4a30f63ec6" },
203+
},
204+
comments,
205+
}),
206+
).toBe(false);
207+
});
177208
});
178209

179210
describe("isMaintainerTeamMember", () => {

0 commit comments

Comments
 (0)