Skip to content

Commit e4e51bc

Browse files
committed
fix(minimax): stream music generation responses
1 parent 167e73c commit e4e51bc

2 files changed

Lines changed: 162 additions & 25 deletions

File tree

extensions/minimax/music-generation-provider.test.ts

Lines changed: 79 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ beforeAll(async () => {
2828
installMinimaxProviderHttpMockCleanup();
2929

3030
function mockMusicGenerationResponse(json: Record<string, unknown>): void {
31+
const response = new Response(JSON.stringify(json), {
32+
headers: { "content-type": "application/json" },
33+
});
3134
postJsonRequestMock.mockResolvedValue({
32-
response: {
33-
json: async () => json,
34-
},
35+
response,
3536
release: vi.fn(async () => {}),
3637
});
3738
fetchWithTimeoutMock.mockResolvedValue({
@@ -53,12 +54,22 @@ describe("minimax music generation provider", () => {
5354
expectExplicitMusicGenerationCapabilities(buildMinimaxMusicGenerationProvider());
5455
});
5556

56-
it("creates music and downloads the generated track", async () => {
57-
mockMusicGenerationResponse({
58-
task_id: "task-123",
59-
audio_url: "https://example.com/out.mp3",
60-
lyrics: "our city wakes",
61-
base_resp: { status_code: 0 },
57+
it("streams generated music chunks from MiniMax", async () => {
58+
const chunkA = Buffer.from("ID3\x04\x00mp3-a");
59+
const chunkB = Buffer.from("mp3-b");
60+
postJsonRequestMock.mockResolvedValue({
61+
response: new Response(
62+
[
63+
`data: ${JSON.stringify({ data: { status: 1, audio: chunkA.toString("hex") }, base_resp: { status_code: 0 } })}`,
64+
`data: ${JSON.stringify({ data: { status: 1, audio: chunkB.toString("hex") }, base_resp: { status_code: 0 } })}`,
65+
`data: ${JSON.stringify({ data: { status: 2, audio: Buffer.concat([chunkA, chunkB]).toString("hex") }, base_resp: { status_code: 0 } })}`,
66+
"",
67+
].join("\n\n"),
68+
{
69+
headers: { "content-type": "text/event-stream" },
70+
},
71+
),
72+
release: vi.fn(async () => {}),
6273
});
6374

6475
const provider = buildMinimaxMusicGenerationProvider();
@@ -76,19 +87,74 @@ describe("minimax music generation provider", () => {
7687
const body = request.body as Record<string, unknown>;
7788
expect(body.model).toBe("music-2.6");
7889
expect(body.lyrics).toBe("our city wakes");
79-
expect(body.output_format).toBe("url");
90+
expect(body.stream).toBe(true);
91+
expect(body.output_format).toBe("hex");
8092
expect(body.audio_setting).toEqual({
8193
sample_rate: 44100,
8294
bitrate: 256000,
8395
format: "mp3",
8496
});
97+
expect(request.timeoutMs).toBe(300000);
8598
expect(request?.headers).toBeInstanceOf(Headers);
8699
const headers = request?.headers as Headers | undefined;
87100
expect(headers?.get("content-type")).toBe("application/json");
88101
expect(result.tracks).toHaveLength(1);
89-
expect(result.lyrics).toEqual(["our city wakes"]);
90-
expect(result.metadata?.taskId).toBe("task-123");
91-
expect(result.metadata?.audioUrl).toBe("https://example.com/out.mp3");
102+
expect(result.tracks[0]?.buffer).toEqual(Buffer.concat([chunkA, chunkB]));
103+
expect(result.tracks[0]?.mimeType).toBe("audio/mpeg");
104+
expect(result.metadata?.requestedDurationSeconds).toBe(45);
105+
});
106+
107+
it("reports streaming music task failures", async () => {
108+
postJsonRequestMock.mockResolvedValue({
109+
response: new Response(
110+
`data: ${JSON.stringify({
111+
base_resp: { status_code: 0 },
112+
})}\n\ndata: ${JSON.stringify({
113+
base_resp: { status_code: 2013, status_msg: "render rejected" },
114+
})}`,
115+
{
116+
headers: { "content-type": "text/event-stream" },
117+
},
118+
),
119+
release: vi.fn(async () => {}),
120+
});
121+
122+
const provider = buildMinimaxMusicGenerationProvider();
123+
124+
await expect(
125+
provider.generateMusic({
126+
provider: "minimax",
127+
model: "music-2.6",
128+
prompt: "upbeat dance-pop with female vocals",
129+
cfg: {},
130+
}),
131+
).rejects.toThrow("MiniMax music generation failed (2013): render rejected");
132+
});
133+
134+
it("keeps terminal streaming audio when no progressive chunks were sent", async () => {
135+
const terminalAudio = Buffer.from("terminal-mp3");
136+
postJsonRequestMock.mockResolvedValue({
137+
response: new Response(
138+
`data: ${JSON.stringify({
139+
data: { status: 2, audio: terminalAudio.toString("hex") },
140+
base_resp: { status_code: 0 },
141+
})}`,
142+
{
143+
headers: { "content-type": "text/event-stream" },
144+
},
145+
),
146+
release: vi.fn(async () => {}),
147+
});
148+
149+
const provider = buildMinimaxMusicGenerationProvider();
150+
const result = await provider.generateMusic({
151+
provider: "minimax",
152+
model: "music-2.6",
153+
prompt: "upbeat dance-pop with female vocals",
154+
cfg: {},
155+
});
156+
157+
expect(result.tracks[0]?.buffer).toEqual(terminalAudio);
92158
});
93159

94160
it("downloads tracks when url output is returned in data.audio", async () => {

extensions/minimax/music-generation-provider.ts

Lines changed: 83 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,18 @@ import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
88
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
99
import {
1010
assertOkOrThrowHttpError,
11+
createProviderOperationDeadline,
1112
fetchProviderDownloadResponse,
1213
postJsonRequest,
14+
resolveProviderOperationTimeoutMs,
1315
resolveProviderHttpRequestConfig,
1416
} from "openclaw/plugin-sdk/provider-http";
1517
import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
1618

1719
const DEFAULT_MINIMAX_MUSIC_BASE_URL = "https://api.minimax.io";
1820
const DEFAULT_MINIMAX_MUSIC_MODEL = "music-2.6";
1921
const DEFAULT_TIMEOUT_MS = 120_000;
22+
const DEFAULT_OPERATION_TIMEOUT_MS = 300_000;
2023

2124
type MinimaxBaseResp = {
2225
status_code?: number;
@@ -36,6 +39,14 @@ type MinimaxMusicCreateResponse = {
3639
base_resp?: MinimaxBaseResp;
3740
};
3841

42+
type MinimaxMusicStreamFrame = {
43+
data?: {
44+
audio?: string;
45+
status?: number | string;
46+
};
47+
base_resp?: MinimaxBaseResp;
48+
};
49+
3950
function resolveMinimaxMusicBaseUrl(
4051
cfg: Parameters<typeof resolveApiKeyForProvider>[0]["cfg"],
4152
providerId: string,
@@ -106,6 +117,48 @@ async function downloadTrackFromUrl(params: {
106117
};
107118
}
108119

120+
async function readStreamingTrack(response: Response): Promise<GeneratedMusicAsset> {
121+
const contentType = normalizeOptionalString(response.headers.get("content-type")) ?? "";
122+
if (contentType.toLowerCase().startsWith("audio/")) {
123+
const ext = extensionForMime(contentType)?.replace(/^\./u, "") || "mp3";
124+
return {
125+
buffer: Buffer.from(await response.arrayBuffer()),
126+
mimeType: contentType,
127+
fileName: `track-1.${ext}`,
128+
};
129+
}
130+
const chunks: Buffer[] = [];
131+
const text = await response.text();
132+
for (const rawLine of text.split(/\r?\n/u)) {
133+
const line = rawLine.trim();
134+
if (!line.startsWith("data:")) {
135+
continue;
136+
}
137+
const json = line.slice("data:".length).trim();
138+
if (!json || json === "[DONE]") {
139+
continue;
140+
}
141+
const frame = JSON.parse(json) as MinimaxMusicStreamFrame;
142+
assertMinimaxBaseResp(frame.base_resp, "MiniMax music generation failed");
143+
const audio = normalizeOptionalString(frame.data?.audio);
144+
if (audio) {
145+
if (String(frame.data?.status ?? "") === "2" && chunks.length > 0) {
146+
continue;
147+
}
148+
chunks.push(decodePossibleBinary(audio));
149+
}
150+
}
151+
const buffer = Buffer.concat(chunks);
152+
if (buffer.byteLength === 0) {
153+
throw new Error("MiniMax music generation response missing audio output");
154+
}
155+
return {
156+
buffer,
157+
mimeType: "audio/mpeg",
158+
fileName: "track-1.mp3",
159+
};
160+
}
161+
109162
function buildPrompt(req: MusicGenerationRequest): string {
110163
const parts = [req.prompt.trim()];
111164
if (typeof req.durationSeconds === "number" && Number.isFinite(req.durationSeconds)) {
@@ -168,6 +221,10 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
168221
}
169222

170223
const fetchFn = fetch;
224+
const deadline = createProviderOperationDeadline({
225+
timeoutMs: req.timeoutMs ?? DEFAULT_OPERATION_TIMEOUT_MS,
226+
label: "MiniMax music generation",
227+
});
171228
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
172229
resolveProviderHttpRequestConfig({
173230
baseUrl: resolveMinimaxMusicBaseUrl(req.cfg, providerId),
@@ -190,7 +247,8 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
190247
prompt: buildPrompt(req),
191248
...(req.instrumental === true ? { is_instrumental: true } : {}),
192249
...(lyrics ? { lyrics } : req.instrumental === true ? {} : { lyrics_optimizer: true }),
193-
output_format: "url",
250+
stream: true,
251+
output_format: "hex",
194252
audio_setting: {
195253
sample_rate: 44_100,
196254
bitrate: 256_000,
@@ -202,7 +260,10 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
202260
url: `${baseUrl}/v1/music_generation`,
203261
headers: jsonHeaders,
204262
body,
205-
timeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
263+
timeoutMs: resolveProviderOperationTimeoutMs({
264+
deadline,
265+
defaultTimeoutMs: DEFAULT_OPERATION_TIMEOUT_MS,
266+
}),
206267
fetchFn,
207268
pinDns: false,
208269
allowPrivateNetwork,
@@ -211,22 +272,32 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
211272

212273
try {
213274
await assertOkOrThrowHttpError(res, "MiniMax music generation failed");
214-
const payload = (await res.json()) as MinimaxMusicCreateResponse;
215-
assertMinimaxBaseResp(payload.base_resp, "MiniMax music generation failed");
275+
const contentType = normalizeOptionalString(res.headers.get("content-type")) ?? "";
276+
const lowerContentType = contentType.toLowerCase();
277+
const payload =
278+
lowerContentType.includes("text/event-stream") || lowerContentType.startsWith("audio/")
279+
? null
280+
: ((await res.clone().json()) as MinimaxMusicCreateResponse);
281+
if (payload) {
282+
assertMinimaxBaseResp(payload.base_resp, "MiniMax music generation failed");
283+
}
216284

217285
const audioCandidate =
218-
normalizeOptionalString(payload.audio) ?? normalizeOptionalString(payload.data?.audio);
286+
normalizeOptionalString(payload?.audio) ?? normalizeOptionalString(payload?.data?.audio);
219287
const audioUrl =
220-
normalizeOptionalString(payload.audio_url) ||
221-
normalizeOptionalString(payload.data?.audio_url) ||
288+
normalizeOptionalString(payload?.audio_url) ||
289+
normalizeOptionalString(payload?.data?.audio_url) ||
222290
(isLikelyRemoteUrl(audioCandidate) ? audioCandidate : undefined);
223291
const inlineAudio = isLikelyRemoteUrl(audioCandidate) ? undefined : audioCandidate;
224-
const lyrics = decodePossibleText(payload.lyrics ?? payload.data?.lyrics ?? "");
292+
const lyrics = decodePossibleText(payload?.lyrics ?? payload?.data?.lyrics ?? "");
225293

226294
const track = audioUrl
227295
? await downloadTrackFromUrl({
228296
url: audioUrl,
229-
timeoutMs: req.timeoutMs,
297+
timeoutMs: resolveProviderOperationTimeoutMs({
298+
deadline,
299+
defaultTimeoutMs: DEFAULT_TIMEOUT_MS,
300+
}),
230301
fetchFn,
231302
})
232303
: inlineAudio
@@ -235,7 +306,7 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
235306
mimeType: "audio/mpeg",
236307
fileName: "track-1.mp3",
237308
}
238-
: null;
309+
: await readStreamingTrack(res);
239310
if (!track) {
240311
throw new Error("MiniMax music generation response missing audio output");
241312
}
@@ -245,8 +316,8 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
245316
...(lyrics ? { lyrics: [lyrics] } : {}),
246317
model,
247318
metadata: {
248-
...(normalizeOptionalString(payload.task_id)
249-
? { taskId: normalizeOptionalString(payload.task_id) }
319+
...(normalizeOptionalString(payload?.task_id)
320+
? { taskId: normalizeOptionalString(payload?.task_id) }
250321
: {}),
251322
...(audioUrl ? { audioUrl } : {}),
252323
instrumental: req.instrumental === true,

0 commit comments

Comments
 (0)