Skip to content

Commit 4757757

Browse files
Repair shell command explainer automerge blockers
1 parent 1024901 commit 4757757

2 files changed

Lines changed: 238 additions & 5 deletions

File tree

src/infra/command-explainer/extract.test.ts

Lines changed: 143 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { afterEach, describe, expect, it, vi } from "vitest";
2-
import type { Parser } from "web-tree-sitter";
2+
import type { Node as TreeSitterNode, Parser, Tree } from "web-tree-sitter";
33
import { explainShellCommand } from "./extract.js";
44
import {
55
getBashParserForCommandExplanation,
@@ -15,6 +15,119 @@ function setParserLoaderForTest(loader: () => Promise<Parser>): void {
1515
setBashParserLoaderForCommandExplanationForTest(loader);
1616
}
1717

18+
type FakeNodeInit = {
19+
type: string;
20+
text: string;
21+
startIndex: number;
22+
endIndex: number;
23+
startPosition: TreeSitterNode["startPosition"];
24+
endPosition: TreeSitterNode["endPosition"];
25+
namedChildren?: TreeSitterNode[];
26+
fieldChildren?: Record<string, TreeSitterNode>;
27+
hasError?: boolean;
28+
};
29+
30+
function fakeNode(init: FakeNodeInit): TreeSitterNode {
31+
const named = init.namedChildren ?? [];
32+
const children = named;
33+
return {
34+
type: init.type,
35+
text: init.text,
36+
startIndex: init.startIndex,
37+
endIndex: init.endIndex,
38+
startPosition: init.startPosition,
39+
endPosition: init.endPosition,
40+
childCount: children.length,
41+
namedChildCount: named.length,
42+
hasError: init.hasError ?? false,
43+
child(index: number): TreeSitterNode | null {
44+
return children[index] ?? null;
45+
},
46+
namedChild(index: number): TreeSitterNode | null {
47+
return named[index] ?? null;
48+
},
49+
childForFieldName(name: string): TreeSitterNode | null {
50+
return init.fieldChildren?.[name] ?? null;
51+
},
52+
} as unknown as TreeSitterNode;
53+
}
54+
55+
function createByteIndexedUnicodeCommandTree(source: string): Tree {
56+
const firstCommand = "echo café";
57+
const separator = " && ";
58+
const secondCommand = "echo ok";
59+
const firstCommandEnd = Buffer.byteLength(firstCommand, "utf8");
60+
const secondCommandStart = Buffer.byteLength(firstCommand + separator, "utf8");
61+
const sourceEnd = Buffer.byteLength(source, "utf8");
62+
63+
const firstName = fakeNode({
64+
type: "command_name",
65+
text: "echo",
66+
startIndex: 0,
67+
endIndex: 4,
68+
startPosition: { row: 0, column: 0 },
69+
endPosition: { row: 0, column: 4 },
70+
});
71+
const firstArgument = fakeNode({
72+
type: "word",
73+
text: "café",
74+
startIndex: 5,
75+
endIndex: firstCommandEnd,
76+
startPosition: { row: 0, column: 5 },
77+
endPosition: { row: 0, column: firstCommandEnd },
78+
});
79+
const first = fakeNode({
80+
type: "command",
81+
text: firstCommand,
82+
startIndex: 0,
83+
endIndex: firstCommandEnd,
84+
startPosition: { row: 0, column: 0 },
85+
endPosition: { row: 0, column: firstCommandEnd },
86+
namedChildren: [firstName, firstArgument],
87+
fieldChildren: { name: firstName },
88+
});
89+
90+
const secondName = fakeNode({
91+
type: "command_name",
92+
text: "echo",
93+
startIndex: secondCommandStart,
94+
endIndex: secondCommandStart + 4,
95+
startPosition: { row: 0, column: secondCommandStart },
96+
endPosition: { row: 0, column: secondCommandStart + 4 },
97+
});
98+
const secondArgument = fakeNode({
99+
type: "word",
100+
text: "ok",
101+
startIndex: secondCommandStart + 5,
102+
endIndex: sourceEnd,
103+
startPosition: { row: 0, column: secondCommandStart + 5 },
104+
endPosition: { row: 0, column: sourceEnd },
105+
});
106+
const second = fakeNode({
107+
type: "command",
108+
text: secondCommand,
109+
startIndex: secondCommandStart,
110+
endIndex: sourceEnd,
111+
startPosition: { row: 0, column: secondCommandStart },
112+
endPosition: { row: 0, column: sourceEnd },
113+
namedChildren: [secondName, secondArgument],
114+
fieldChildren: { name: secondName },
115+
});
116+
117+
return {
118+
rootNode: fakeNode({
119+
type: "program",
120+
text: source,
121+
startIndex: 0,
122+
endIndex: sourceEnd,
123+
startPosition: { row: 0, column: 0 },
124+
endPosition: { row: 0, column: sourceEnd },
125+
namedChildren: [first, second],
126+
}),
127+
delete: vi.fn(),
128+
} as unknown as Tree;
129+
}
130+
18131
afterEach(() => {
19132
if (parserLoaderOverridden) {
20133
setBashParserLoaderForCommandExplanationForTest();
@@ -94,6 +207,34 @@ describe("command explainer tree-sitter runtime", () => {
94207
expect(reset).toHaveBeenCalledOnce();
95208
});
96209

210+
it("maps parser byte offsets to JavaScript string spans for Unicode source", async () => {
211+
const source = "echo café && echo ok";
212+
const parser = {
213+
parse: vi.fn(() => createByteIndexedUnicodeCommandTree(source)),
214+
reset: vi.fn(),
215+
};
216+
setParserLoaderForTest(async () => parser as unknown as Parser);
217+
218+
const explanation = await explainShellCommand(source);
219+
220+
expect(explanation.topLevelCommands).toEqual([
221+
expect.objectContaining({
222+
executable: "echo",
223+
argv: ["echo", "café"],
224+
span: expect.objectContaining({ startIndex: 0, endIndex: 9 }),
225+
}),
226+
expect.objectContaining({
227+
executable: "echo",
228+
argv: ["echo", "ok"],
229+
span: expect.objectContaining({ startIndex: 13, endIndex: 20 }),
230+
}),
231+
]);
232+
for (const command of explanation.topLevelCommands) {
233+
expect(source.slice(command.span.startIndex, command.span.endIndex)).toBe(command.text);
234+
expect(command.span.endPosition.column).toBe(command.span.endIndex);
235+
}
236+
});
237+
97238
it("explains a pipeline with python inline eval", async () => {
98239
const explanation = await explainShellCommand('ls | grep "stuff" | python -c \'print("hi")\'');
99240

@@ -566,7 +707,7 @@ describe("command explainer tree-sitter runtime", () => {
566707
'find . -name "*.ts" -exec grep -n TODO {} +',
567708
'bash -lc "echo hi | wc -c"',
568709
];
569-
const iterations = 10;
710+
const iterations = 3;
570711
for (let index = 0; index < iterations; index += 1) {
571712
for (const command of corpus) {
572713
const explanation = await explainShellCommand(command);

src/infra/command-explainer/extract.ts

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,92 @@ function advancePosition(
149149
return { row, column };
150150
}
151151

152+
function utf8ByteLengthForCodePoint(codePoint: number): number {
153+
if (codePoint <= 0x7f) {
154+
return 1;
155+
}
156+
if (codePoint <= 0x7ff) {
157+
return 2;
158+
}
159+
if (codePoint <= 0xffff) {
160+
return 3;
161+
}
162+
return 4;
163+
}
164+
165+
function utf8ByteLength(text: string): number {
166+
let length = 0;
167+
for (let index = 0; index < text.length; index += 1) {
168+
const codePoint = text.codePointAt(index);
169+
if (codePoint === undefined) {
170+
continue;
171+
}
172+
length += utf8ByteLengthForCodePoint(codePoint);
173+
if (codePoint > 0xffff) {
174+
index += 1;
175+
}
176+
}
177+
return length;
178+
}
179+
180+
function utf8ByteOffsetToStringIndex(text: string, byteOffset: number): number {
181+
if (byteOffset <= 0) {
182+
return 0;
183+
}
184+
let currentByteOffset = 0;
185+
for (let index = 0; index < text.length; index += 1) {
186+
const codePoint = text.codePointAt(index);
187+
if (codePoint === undefined) {
188+
return text.length;
189+
}
190+
const codePointLength = utf8ByteLengthForCodePoint(codePoint);
191+
if (currentByteOffset + codePointLength > byteOffset) {
192+
return index;
193+
}
194+
currentByteOffset += codePointLength;
195+
if (currentByteOffset === byteOffset) {
196+
return codePoint > 0xffff ? index + 2 : index + 1;
197+
}
198+
if (codePoint > 0xffff) {
199+
index += 1;
200+
}
201+
}
202+
return text.length;
203+
}
204+
205+
function parserOffsetToStringIndex(
206+
source: string,
207+
rootNode: TreeSitterNode,
208+
): (offset: number) => number {
209+
const utf8Length = utf8ByteLength(source);
210+
if (utf8Length !== source.length && rootNode.endIndex === utf8Length) {
211+
return (offset) => utf8ByteOffsetToStringIndex(source, offset);
212+
}
213+
return (offset) => offset;
214+
}
215+
216+
function spanBaseForParserSource(
217+
source: string,
218+
rootNode: TreeSitterNode,
219+
base: SpanBase,
220+
): SpanBase {
221+
const offsetToStringIndex = parserOffsetToStringIndex(source, rootNode);
222+
return {
223+
startIndex: base.startIndex,
224+
startPosition: base.startPosition,
225+
mapOffset(offset) {
226+
const sourceIndex = offsetToStringIndex(offset);
227+
if (base.mapOffset) {
228+
return base.mapOffset(sourceIndex);
229+
}
230+
return {
231+
index: base.startIndex + sourceIndex,
232+
position: advancePosition(base.startPosition, source.slice(0, sourceIndex)),
233+
};
234+
},
235+
};
236+
}
237+
152238
function valuePrefixLength(node: TreeSitterNode): number {
153239
if (node.type === "string" || node.type === "raw_string") {
154240
return 1;
@@ -1051,18 +1137,23 @@ async function walk(
10511137
);
10521138
if (wrapperPayload && state.wrapperPayloadDepth < MAX_WRAPPER_PAYLOAD_DEPTH) {
10531139
const wrapperTree = await parseBashForCommandExplanation(wrapperPayload.command);
1140+
const wrapperSpanBase = spanBaseForParserSource(
1141+
wrapperPayload.command,
1142+
wrapperTree.rootNode,
1143+
wrapperPayload.spanBase,
1144+
);
10541145
try {
10551146
if (wrapperTree.rootNode.hasError) {
10561147
output.hasParseError = true;
10571148
output.risks.push({
10581149
kind: "syntax-error",
10591150
text: wrapperPayload.command,
1060-
span: spanFromNode(wrapperTree.rootNode, wrapperPayload.spanBase),
1151+
span: spanFromNode(wrapperTree.rootNode, wrapperSpanBase),
10611152
});
10621153
}
10631154
await walk(wrapperTree.rootNode, output, "wrapper-payload", {
10641155
wrapperPayloadDepth: state.wrapperPayloadDepth + 1,
1065-
spanBase: wrapperPayload.spanBase,
1156+
spanBase: wrapperSpanBase,
10661157
});
10671158
} finally {
10681159
wrapperTree.delete();
@@ -1079,6 +1170,7 @@ async function walk(
10791170
export async function explainShellCommand(source: string): Promise<CommandExplanation> {
10801171
const tree = await parseBashForCommandExplanation(source);
10811172
try {
1173+
const spanBase = spanBaseForParserSource(source, tree.rootNode, ROOT_SPAN_BASE);
10821174
const output: MutableExplanation = {
10831175
shapes: new Set(),
10841176
commands: [],
@@ -1087,7 +1179,7 @@ export async function explainShellCommand(source: string): Promise<CommandExplan
10871179
};
10881180
await walk(tree.rootNode, output, "top-level", {
10891181
wrapperPayloadDepth: 0,
1090-
spanBase: ROOT_SPAN_BASE,
1182+
spanBase,
10911183
});
10921184
const topLevelCommands = output.commands.filter((command) => command.context === "top-level");
10931185
return {

0 commit comments

Comments
 (0)