Skip to content

Commit 46030f5

Browse files
TurboTheTurtleclawsweeper[bot]Takhoffman
authored
Skip empty sherpa structured transcripts (#84667)
Summary: - The PR changes sherpa-onnx CLI audio parsing so structured JSON with an empty `text` field becomes no transcript, while preserving non-empty JSON extraction and adding direct plus auto-detect regression coverage. - Reproducibility: yes. Source inspection on current main shows empty sherpa structured JSON misses extraction ... scord voice can skip empty transcripts; I did not run a live Discord reproduction in this read-only review. Automerge notes: - PR branch already contained follow-up commit before automerge: Fix stale CI guardrails for sherpa transcript PR - PR branch already contained follow-up commit before automerge: Skip empty sherpa structured transcripts Validation: - ClawSweeper review passed for head ac03171. - Required merge gates passed before the squash merge. Prepared head SHA: ac03171 Review: #84667 (comment) Co-authored-by: Andy Ye <35905412+TurboTheTurtle@users.noreply.github.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: takhoffman Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
1 parent c031274 commit 46030f5

5 files changed

Lines changed: 107 additions & 14 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
1010

1111
### Fixes
1212

13+
- Media/audio: skip empty structured sherpa-onnx transcripts instead of treating the raw JSON payload as spoken text. (#84667) Thanks @TurboTheTurtle.
1314
- CLI/perf: keep `setup --help`, `onboard --help`, and `configure --help` out of the full wizard runtime while preserving the existing help output. (#84488) Thanks @frankekn.
1415

1516
## 2026.5.20

src/media-understanding/apply.test.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,37 @@ describe("applyMediaUnderstanding", () => {
771771
expectCliRunOptions(options);
772772
});
773773

774+
it("skips auto-detected sherpa audio when structured output has empty text", async () => {
775+
clearMediaUnderstandingBinaryCacheForTests();
776+
const binDir = await createTempMediaDir();
777+
const modelDir = await createTempMediaDir();
778+
await createMockExecutable(binDir, "sherpa-onnx-offline");
779+
await fs.writeFile(path.join(modelDir, "tokens.txt"), "a");
780+
await fs.writeFile(path.join(modelDir, "encoder.onnx"), "a");
781+
await fs.writeFile(path.join(modelDir, "decoder.onnx"), "a");
782+
await fs.writeFile(path.join(modelDir, "joiner.onnx"), "a");
783+
784+
const emptySherpaJson =
785+
'{"lang":"","emotion":"","event":"","text":"","timestamps":[],"durations":[],"tokens":[],"ys_log_probs":[],"words":[]}';
786+
const { ctx, cfg } = await setupAudioAutoDetectCase(emptySherpaJson);
787+
788+
await withMediaAutoDetectEnv(
789+
{
790+
PATH: binDir,
791+
SHERPA_ONNX_MODEL_DIR: modelDir,
792+
},
793+
async () => {
794+
const result = await applyMediaUnderstanding({ ctx, cfg });
795+
expect(result.appliedAudio).toBe(false);
796+
},
797+
);
798+
799+
expect(ctx.Transcript).toBeUndefined();
800+
expect(ctx.Body).toBe("<media:audio>");
801+
const [command] = getRunExecCall();
802+
expect(command).toBe("sherpa-onnx-offline");
803+
});
804+
774805
it("auto-detects whisper-cli when sherpa is unavailable", async () => {
775806
clearMediaUnderstandingBinaryCacheForTests();
776807
const binDir = await createTempMediaDir();

src/media-understanding/runner.cli-audio.test.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,55 @@ describe("media-understanding CLI audio entry", () => {
8181
maxBuffer: CLI_OUTPUT_MAX_BUFFER,
8282
});
8383
});
84+
85+
it("treats sherpa structured JSON with empty text as empty output", async () => {
86+
runExecMock.mockResolvedValueOnce({
87+
stdout:
88+
'{"lang":"","emotion":"","event":"","text":"","timestamps":[],"durations":[],"tokens":[],"ys_log_probs":[],"words":[]}',
89+
stderr: "",
90+
});
91+
92+
await withAudioFixture("openclaw-cli-audio-empty-sherpa", async ({ ctx, cache }) => {
93+
const result = await runCliEntry({
94+
capability: "audio",
95+
entry: {
96+
type: "cli",
97+
command: "sherpa-onnx-offline",
98+
args: ["{{MediaPath}}"],
99+
},
100+
cfg: { tools: { media: { audio: {} } } } as OpenClawConfig,
101+
ctx,
102+
attachmentIndex: 0,
103+
cache,
104+
config: {} as never,
105+
});
106+
107+
expect(result).toBeNull();
108+
});
109+
});
110+
111+
it("extracts sherpa text from the final structured output line", async () => {
112+
runExecMock.mockResolvedValueOnce({
113+
stdout: 'loading model\n{"text":"sherpa transcript","tokens":["sherpa","transcript"]}\n',
114+
stderr: "",
115+
});
116+
117+
await withAudioFixture("openclaw-cli-audio-sherpa-json", async ({ ctx, cache }) => {
118+
const result = await runCliEntry({
119+
capability: "audio",
120+
entry: {
121+
type: "cli",
122+
command: "sherpa-onnx-offline",
123+
args: ["{{MediaPath}}"],
124+
},
125+
cfg: { tools: { media: { audio: {} } } } as OpenClawConfig,
126+
ctx,
127+
attachmentIndex: 0,
128+
cache,
129+
config: {} as never,
130+
});
131+
132+
expect(result?.text).toBe("sherpa transcript");
133+
});
134+
});
84135
});

src/media-understanding/runner.entries.ts

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,16 @@ function trimOutput(text: string, maxChars?: number): string {
9696
return trimmed.slice(0, maxChars).trim();
9797
}
9898

99-
function extractSherpaOnnxText(raw: string): string | null {
100-
const tryParse = (value: string): string | null => {
99+
function extractSherpaOnnxText(raw: string): { matched: boolean; text: string } {
100+
const noMatch = { matched: false, text: "" };
101+
const tryParse = (value: string): { matched: boolean; text: string } => {
101102
const trimmed = value.trim();
102103
if (!trimmed) {
103-
return null;
104+
return noMatch;
104105
}
105106
const head = trimmed[0];
106107
if (head !== "{" && head !== '"') {
107-
return null;
108+
return noMatch;
108109
}
109110
try {
110111
const parsed = JSON.parse(trimmed) as unknown;
@@ -113,16 +114,16 @@ function extractSherpaOnnxText(raw: string): string | null {
113114
}
114115
if (parsed && typeof parsed === "object") {
115116
const text = (parsed as { text?: unknown }).text;
116-
if (typeof text === "string" && text.trim()) {
117-
return text.trim();
117+
if (typeof text === "string") {
118+
return { matched: true, text: text.trim() };
118119
}
119120
}
120121
} catch {}
121-
return null;
122+
return noMatch;
122123
};
123124

124125
const direct = tryParse(raw);
125-
if (direct) {
126+
if (direct.matched) {
126127
return direct;
127128
}
128129

@@ -132,11 +133,11 @@ function extractSherpaOnnxText(raw: string): string | null {
132133
.filter(Boolean);
133134
for (let i = lines.length - 1; i >= 0; i -= 1) {
134135
const parsed = tryParse(lines[i] ?? "");
135-
if (parsed) {
136+
if (parsed.matched) {
136137
return parsed;
137138
}
138139
}
139-
return null;
140+
return noMatch;
140141
}
141142

142143
function commandBase(command: string): string {
@@ -230,8 +231,8 @@ async function resolveCliOutput(params: {
230231

231232
if (commandId === "sherpa-onnx-offline") {
232233
const response = extractSherpaOnnxText(params.stdout);
233-
if (response) {
234-
return response;
234+
if (response.matched) {
235+
return response.text;
235236
}
236237
}
237238

test/scripts/root-package-overrides.test.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,26 @@ function readPnpmWorkspaceConfig(): PnpmWorkspaceConfig {
2222
return YAML.parse(fs.readFileSync(workspacePath, "utf8")) as PnpmWorkspaceConfig;
2323
}
2424

25+
function readPackageManifest(packagePath: string): RootPackageManifest {
26+
return JSON.parse(fs.readFileSync(packagePath, "utf8")) as RootPackageManifest;
27+
}
28+
2529
describe("root package override guardrails", () => {
2630
it("keeps Bedrock runtime ownership in the Amazon provider plugin", () => {
2731
const manifest = readRootManifest();
2832
const pnpmWorkspace = readPnpmWorkspaceConfig();
2933
const packageName = "@aws-sdk/client-bedrock-runtime";
34+
const bedrockManifest = readPackageManifest(
35+
path.resolve(process.cwd(), "extensions", "amazon-bedrock", "package.json"),
36+
);
37+
const bedrockRuntimeDependency = bedrockManifest.dependencies?.[packageName];
3038
const npmOverride = manifest.overrides?.[packageName];
31-
const pnpmOverride = pnpmWorkspace.overrides?.["@aws-sdk/client-bedrock-runtime"];
39+
const pnpmOverride = pnpmWorkspace.overrides?.[packageName];
3240

41+
expect(bedrockRuntimeDependency).toBeDefined();
3342
expect(manifest.dependencies).not.toHaveProperty(packageName);
3443
expect(npmOverride).toBeUndefined();
35-
expect(pnpmOverride).toBe("3.1048.0");
44+
expect(pnpmOverride).toBe(bedrockRuntimeDependency);
3645
});
3746

3847
it("pins the node-domexception alias exactly in npm and pnpm overrides", () => {

0 commit comments

Comments
 (0)