Skip to content

Commit d3e3fa5

Browse files
neeravmakwanaclawsweeper[bot]
authored andcommitted
fix(minimax): stream music generation responses
1 parent 1ac8c71 commit d3e3fa5

2 files changed

Lines changed: 161 additions & 25 deletions

File tree

extensions/minimax/music-generation-provider.test.ts

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ beforeAll(async () => {
2828
installMinimaxProviderHttpMockCleanup();
2929

3030
function mockMusicGenerationResponse(json: Record<string, unknown>): void {
31+
const response = new Response(JSON.stringify(json), {
32+
headers: { "content-type": "application/json" },
33+
});
3134
postJsonRequestMock.mockResolvedValue({
32-
response: {
33-
json: async () => json,
34-
},
35+
response,
3536
release: vi.fn(async () => {}),
3637
});
3738
fetchWithTimeoutMock.mockResolvedValue({
@@ -53,12 +54,22 @@ describe("minimax music generation provider", () => {
5354
expectExplicitMusicGenerationCapabilities(buildMinimaxMusicGenerationProvider());
5455
});
5556

56-
it("creates music and downloads the generated track", async () => {
57-
mockMusicGenerationResponse({
58-
task_id: "task-123",
59-
audio_url: "https://example.com/out.mp3",
60-
lyrics: "our city wakes",
61-
base_resp: { status_code: 0 },
57+
it("streams generated music chunks from MiniMax", async () => {
58+
const chunkA = Buffer.from("ID3\x04\x00mp3-a");
59+
const chunkB = Buffer.from("mp3-b");
60+
postJsonRequestMock.mockResolvedValue({
61+
response: new Response(
62+
[
63+
`data: ${JSON.stringify({ data: { status: 1, audio: chunkA.toString("hex") }, base_resp: { status_code: 0 } })}`,
64+
`data: ${JSON.stringify({ data: { status: 1, audio: chunkB.toString("hex") }, base_resp: { status_code: 0 } })}`,
65+
`data: ${JSON.stringify({ data: { status: 2, audio: Buffer.concat([chunkA, chunkB]).toString("hex") }, base_resp: { status_code: 0 } })}`,
66+
"",
67+
].join("\n\n"),
68+
{
69+
headers: { "content-type": "text/event-stream" },
70+
},
71+
),
72+
release: vi.fn(async () => {}),
6273
});
6374

6475
const provider = buildMinimaxMusicGenerationProvider();
@@ -79,22 +90,76 @@ describe("minimax music generation provider", () => {
7990
expect(body.prompt).not.toContain("Target duration");
8091
expect(body).not.toHaveProperty("duration");
8192
expect(body.lyrics).toBe("our city wakes");
82-
expect(body.output_format).toBe("url");
93+
expect(body.stream).toBe(true);
94+
expect(body.output_format).toBe("hex");
8395
expect(body.audio_setting).toEqual({
8496
sample_rate: 44100,
8597
bitrate: 256000,
8698
format: "mp3",
8799
});
100+
expect(request.timeoutMs).toBe(300000);
88101
expect(request?.headers).toBeInstanceOf(Headers);
89102
const headers = request?.headers as Headers | undefined;
90103
expect(headers?.get("content-type")).toBe("application/json");
91104
expect(result.tracks).toHaveLength(1);
92-
expect(result.lyrics).toEqual(["our city wakes"]);
93-
expect(result.metadata?.taskId).toBe("task-123");
94-
expect(result.metadata?.audioUrl).toBe("https://example.com/out.mp3");
105+
expect(result.tracks[0]?.buffer).toEqual(Buffer.concat([chunkA, chunkB]));
106+
expect(result.tracks[0]?.mimeType).toBe("audio/mpeg");
95107
expect(result.metadata).not.toHaveProperty("requestedDurationSeconds");
96108
});
97109

110+
it("reports streaming music task failures", async () => {
111+
postJsonRequestMock.mockResolvedValue({
112+
response: new Response(
113+
`data: ${JSON.stringify({
114+
base_resp: { status_code: 0 },
115+
})}\n\ndata: ${JSON.stringify({
116+
base_resp: { status_code: 2013, status_msg: "render rejected" },
117+
})}`,
118+
{
119+
headers: { "content-type": "text/event-stream" },
120+
},
121+
),
122+
release: vi.fn(async () => {}),
123+
});
124+
125+
const provider = buildMinimaxMusicGenerationProvider();
126+
127+
await expect(
128+
provider.generateMusic({
129+
provider: "minimax",
130+
model: "music-2.6",
131+
prompt: "upbeat dance-pop with female vocals",
132+
cfg: {},
133+
}),
134+
).rejects.toThrow("MiniMax music generation failed (2013): render rejected");
135+
});
136+
137+
it("keeps terminal streaming audio when no progressive chunks were sent", async () => {
138+
const terminalAudio = Buffer.from("terminal-mp3");
139+
postJsonRequestMock.mockResolvedValue({
140+
response: new Response(
141+
`data: ${JSON.stringify({
142+
data: { status: 2, audio: terminalAudio.toString("hex") },
143+
base_resp: { status_code: 0 },
144+
})}`,
145+
{
146+
headers: { "content-type": "text/event-stream" },
147+
},
148+
),
149+
release: vi.fn(async () => {}),
150+
});
151+
152+
const provider = buildMinimaxMusicGenerationProvider();
153+
const result = await provider.generateMusic({
154+
provider: "minimax",
155+
model: "music-2.6",
156+
prompt: "upbeat dance-pop with female vocals",
157+
cfg: {},
158+
});
159+
160+
expect(result.tracks[0]?.buffer).toEqual(terminalAudio);
161+
});
162+
98163
it("downloads tracks when url output is returned in data.audio", async () => {
99164
mockMusicGenerationResponse({
100165
data: {

extensions/minimax/music-generation-provider.ts

Lines changed: 83 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,18 @@ import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
77
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
88
import {
99
assertOkOrThrowHttpError,
10+
createProviderOperationDeadline,
1011
fetchProviderDownloadResponse,
1112
postJsonRequest,
13+
resolveProviderOperationTimeoutMs,
1214
resolveProviderHttpRequestConfig,
1315
} from "openclaw/plugin-sdk/provider-http";
1416
import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
1517

1618
const DEFAULT_MINIMAX_MUSIC_BASE_URL = "https://api.minimax.io";
1719
const DEFAULT_MINIMAX_MUSIC_MODEL = "music-2.6";
1820
const DEFAULT_TIMEOUT_MS = 120_000;
21+
const DEFAULT_OPERATION_TIMEOUT_MS = 300_000;
1922

2023
type MinimaxBaseResp = {
2124
status_code?: number;
@@ -35,6 +38,14 @@ type MinimaxMusicCreateResponse = {
3538
base_resp?: MinimaxBaseResp;
3639
};
3740

41+
type MinimaxMusicStreamFrame = {
42+
data?: {
43+
audio?: string;
44+
status?: number | string;
45+
};
46+
base_resp?: MinimaxBaseResp;
47+
};
48+
3849
function resolveMinimaxMusicBaseUrl(
3950
cfg: Parameters<typeof resolveApiKeyForProvider>[0]["cfg"],
4051
providerId: string,
@@ -105,6 +116,48 @@ async function downloadTrackFromUrl(params: {
105116
};
106117
}
107118

119+
async function readStreamingTrack(response: Response): Promise<GeneratedMusicAsset> {
120+
const contentType = normalizeOptionalString(response.headers.get("content-type")) ?? "";
121+
if (contentType.toLowerCase().startsWith("audio/")) {
122+
const ext = extensionForMime(contentType)?.replace(/^\./u, "") || "mp3";
123+
return {
124+
buffer: Buffer.from(await response.arrayBuffer()),
125+
mimeType: contentType,
126+
fileName: `track-1.${ext}`,
127+
};
128+
}
129+
const chunks: Buffer[] = [];
130+
const text = await response.text();
131+
for (const rawLine of text.split(/\r?\n/u)) {
132+
const line = rawLine.trim();
133+
if (!line.startsWith("data:")) {
134+
continue;
135+
}
136+
const json = line.slice("data:".length).trim();
137+
if (!json || json === "[DONE]") {
138+
continue;
139+
}
140+
const frame = JSON.parse(json) as MinimaxMusicStreamFrame;
141+
assertMinimaxBaseResp(frame.base_resp, "MiniMax music generation failed");
142+
const audio = normalizeOptionalString(frame.data?.audio);
143+
if (audio) {
144+
if (String(frame.data?.status ?? "") === "2" && chunks.length > 0) {
145+
continue;
146+
}
147+
chunks.push(decodePossibleBinary(audio));
148+
}
149+
}
150+
const buffer = Buffer.concat(chunks);
151+
if (buffer.byteLength === 0) {
152+
throw new Error("MiniMax music generation response missing audio output");
153+
}
154+
return {
155+
buffer,
156+
mimeType: "audio/mpeg",
157+
fileName: "track-1.mp3",
158+
};
159+
}
160+
108161
function resolveMinimaxMusicModel(model: string | undefined): string {
109162
const trimmed = normalizeOptionalString(model);
110163
if (!trimmed) {
@@ -158,6 +211,10 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
158211
}
159212

160213
const fetchFn = fetch;
214+
const deadline = createProviderOperationDeadline({
215+
timeoutMs: req.timeoutMs ?? DEFAULT_OPERATION_TIMEOUT_MS,
216+
label: "MiniMax music generation",
217+
});
161218
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
162219
resolveProviderHttpRequestConfig({
163220
baseUrl: resolveMinimaxMusicBaseUrl(req.cfg, providerId),
@@ -180,7 +237,8 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
180237
prompt: req.prompt.trim(),
181238
...(req.instrumental === true ? { is_instrumental: true } : {}),
182239
...(lyrics ? { lyrics } : req.instrumental === true ? {} : { lyrics_optimizer: true }),
183-
output_format: "url",
240+
stream: true,
241+
output_format: "hex",
184242
audio_setting: {
185243
sample_rate: 44_100,
186244
bitrate: 256_000,
@@ -192,7 +250,10 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
192250
url: `${baseUrl}/v1/music_generation`,
193251
headers: jsonHeaders,
194252
body,
195-
timeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
253+
timeoutMs: resolveProviderOperationTimeoutMs({
254+
deadline,
255+
defaultTimeoutMs: DEFAULT_OPERATION_TIMEOUT_MS,
256+
}),
196257
fetchFn,
197258
pinDns: false,
198259
allowPrivateNetwork,
@@ -201,22 +262,32 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
201262

202263
try {
203264
await assertOkOrThrowHttpError(res, "MiniMax music generation failed");
204-
const payload = (await res.json()) as MinimaxMusicCreateResponse;
205-
assertMinimaxBaseResp(payload.base_resp, "MiniMax music generation failed");
265+
const contentType = normalizeOptionalString(res.headers.get("content-type")) ?? "";
266+
const lowerContentType = contentType.toLowerCase();
267+
const payload =
268+
lowerContentType.includes("text/event-stream") || lowerContentType.startsWith("audio/")
269+
? null
270+
: ((await res.clone().json()) as MinimaxMusicCreateResponse);
271+
if (payload) {
272+
assertMinimaxBaseResp(payload.base_resp, "MiniMax music generation failed");
273+
}
206274

207275
const audioCandidate =
208-
normalizeOptionalString(payload.audio) ?? normalizeOptionalString(payload.data?.audio);
276+
normalizeOptionalString(payload?.audio) ?? normalizeOptionalString(payload?.data?.audio);
209277
const audioUrl =
210-
normalizeOptionalString(payload.audio_url) ||
211-
normalizeOptionalString(payload.data?.audio_url) ||
278+
normalizeOptionalString(payload?.audio_url) ||
279+
normalizeOptionalString(payload?.data?.audio_url) ||
212280
(isLikelyRemoteUrl(audioCandidate) ? audioCandidate : undefined);
213281
const inlineAudio = isLikelyRemoteUrl(audioCandidate) ? undefined : audioCandidate;
214-
const lyrics = decodePossibleText(payload.lyrics ?? payload.data?.lyrics ?? "");
282+
const lyrics = decodePossibleText(payload?.lyrics ?? payload?.data?.lyrics ?? "");
215283

216284
const track = audioUrl
217285
? await downloadTrackFromUrl({
218286
url: audioUrl,
219-
timeoutMs: req.timeoutMs,
287+
timeoutMs: resolveProviderOperationTimeoutMs({
288+
deadline,
289+
defaultTimeoutMs: DEFAULT_TIMEOUT_MS,
290+
}),
220291
fetchFn,
221292
})
222293
: inlineAudio
@@ -225,7 +296,7 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
225296
mimeType: "audio/mpeg",
226297
fileName: "track-1.mp3",
227298
}
228-
: null;
299+
: await readStreamingTrack(res);
229300
if (!track) {
230301
throw new Error("MiniMax music generation response missing audio output");
231302
}
@@ -235,8 +306,8 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
235306
...(lyrics ? { lyrics: [lyrics] } : {}),
236307
model,
237308
metadata: {
238-
...(normalizeOptionalString(payload.task_id)
239-
? { taskId: normalizeOptionalString(payload.task_id) }
309+
...(normalizeOptionalString(payload?.task_id)
310+
? { taskId: normalizeOptionalString(payload?.task_id) }
240311
: {}),
241312
...(audioUrl ? { audioUrl } : {}),
242313
instrumental: req.instrumental === true,

0 commit comments

Comments
 (0)