Skip to content

Commit dc0ee2e

Browse files
committed
feat: add music generation tooling
1 parent 3de91d9 commit dc0ee2e

79 files changed

Lines changed: 3538 additions & 620 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

apps/shared/OpenClawKit/Sources/OpenClawKit/Resources/tool-display.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,29 @@
10301030
}
10311031
}
10321032
},
1033+
"music_generate": {
1034+
"emoji": "🎵",
1035+
"title": "Music Generation",
1036+
"actions": {
1037+
"generate": {
1038+
"label": "generate",
1039+
"detailKeys": [
1040+
"prompt",
1041+
"model",
1042+
"durationSeconds",
1043+
"format",
1044+
"instrumental"
1045+
]
1046+
},
1047+
"list": {
1048+
"label": "list",
1049+
"detailKeys": [
1050+
"provider",
1051+
"model"
1052+
]
1053+
}
1054+
}
1055+
},
10331056
"video_generate": {
10341057
"emoji": "🎬",
10351058
"title": "Video Generation",
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
73fbcd00d17685b462dfb11aff74baae99265ae5671db28893d8608456daa44e config-baseline.json
2-
effaf240920c16fce2c78af52dec15aa9ceb049e34f703c568669cb6beef3f91 config-baseline.core.json
3-
3c999707b167138de34f6255e3488b99e404c5132d3fc5879a1fa12d815c31f5 config-baseline.channel.json
4-
031b237717ca108ea2cd314413db4c91edfdfea55f808179e3066331f41af134 config-baseline.plugin.json
1+
fb2c88ef41657f1aa7237dcce655d16313dc849fd03991b221346367c569a482 config-baseline.json
2+
ff8f64e1866748644776b229bdf334762875e3139b717a3adb8e5c587286ada3 config-baseline.core.json
3+
ba5f7e89aad95d3eae0bc4e3b590c8dbb87bd921bba0d8f12fe67545af5887c6 config-baseline.channel.json
4+
dc19ac1c60544d87fe08944d1184e0ade7b469367cdf8d6ce61452f64f9e0a47 config-baseline.plugin.json
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
97509287d728c8f5d1736f7ea07521451ada4b9d7ef56555dbe860a89e1b6e08 plugin-sdk-api-baseline.json
2-
a22b3d427953cc8394b28c87ef7a992d2eb4f2c9f6a76fa58b33079e2306661b plugin-sdk-api-baseline.jsonl
1+
4e024092a28987e1a826b0c731e9ee5adb9d28e73b5cac51ca055c46d9067258 plugin-sdk-api-baseline.json
2+
9e3279a3e78e24b72952ab0f1707dcf465f8c283acf568f043e9b232fd0ae5dd plugin-sdk-api-baseline.jsonl

extensions/google/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
resolveGoogleGenerativeAiTransport,
1313
} from "./api.js";
1414
import { registerGoogleGeminiCliProvider } from "./gemini-cli-provider.js";
15+
import { buildGoogleMusicGenerationProvider } from "./music-generation-provider.js";
1516
import { isModernGoogleModel, resolveGoogleGeminiForwardCompatModel } from "./provider-models.js";
1617
import { createGeminiWebSearchProvider } from "./src/gemini-web-search-provider.js";
1718
import { buildGoogleVideoGenerationProvider } from "./video-generation-provider.js";
@@ -166,6 +167,7 @@ export default definePluginEntry({
166167
});
167168
api.registerImageGenerationProvider(createLazyGoogleImageGenerationProvider());
168169
api.registerMediaUnderstandingProvider(createLazyGoogleMediaUnderstandingProvider());
170+
api.registerMusicGenerationProvider(buildGoogleMusicGenerationProvider());
169171
api.registerVideoGenerationProvider(buildGoogleVideoGenerationProvider());
170172
api.registerWebSearchProvider(createGeminiWebSearchProvider());
171173
},
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import { afterEach, describe, expect, it, vi } from "vitest";
2+
3+
const { GoogleGenAIMock, generateContentMock } = vi.hoisted(() => {
4+
const generateContentMock = vi.fn();
5+
const GoogleGenAIMock = vi.fn(function GoogleGenAI() {
6+
return {
7+
models: {
8+
generateContent: generateContentMock,
9+
},
10+
};
11+
});
12+
return { GoogleGenAIMock, generateContentMock };
13+
});
14+
15+
vi.mock("@google/genai", () => ({
16+
GoogleGenAI: GoogleGenAIMock,
17+
}));
18+
19+
import * as providerAuthRuntime from "openclaw/plugin-sdk/provider-auth-runtime";
20+
import { buildGoogleMusicGenerationProvider } from "./music-generation-provider.js";
21+
22+
describe("google music generation provider", () => {
23+
afterEach(() => {
24+
vi.restoreAllMocks();
25+
generateContentMock.mockReset();
26+
GoogleGenAIMock.mockClear();
27+
});
28+
29+
it("submits generation and returns inline audio bytes plus lyrics", async () => {
30+
vi.spyOn(providerAuthRuntime, "resolveApiKeyForProvider").mockResolvedValue({
31+
apiKey: "google-key",
32+
source: "env",
33+
mode: "api-key",
34+
});
35+
generateContentMock.mockResolvedValue({
36+
candidates: [
37+
{
38+
content: {
39+
parts: [
40+
{ text: "wake the city up" },
41+
{
42+
inlineData: {
43+
data: Buffer.from("mp3-bytes").toString("base64"),
44+
mimeType: "audio/mpeg",
45+
},
46+
},
47+
],
48+
},
49+
},
50+
],
51+
});
52+
53+
const provider = buildGoogleMusicGenerationProvider();
54+
const result = await provider.generateMusic({
55+
provider: "google",
56+
model: "lyria-3-clip-preview",
57+
prompt: "upbeat synthpop anthem",
58+
cfg: {},
59+
instrumental: true,
60+
});
61+
62+
expect(generateContentMock).toHaveBeenCalledWith(
63+
expect.objectContaining({
64+
model: "lyria-3-clip-preview",
65+
config: {
66+
responseModalities: ["AUDIO", "TEXT"],
67+
},
68+
}),
69+
);
70+
expect(result.tracks).toHaveLength(1);
71+
expect(result.tracks[0]?.mimeType).toBe("audio/mpeg");
72+
expect(result.lyrics).toEqual(["wake the city up"]);
73+
expect(GoogleGenAIMock).toHaveBeenCalledWith(
74+
expect.objectContaining({
75+
apiKey: "google-key",
76+
}),
77+
);
78+
});
79+
80+
it("rejects unsupported wav output on clip model", async () => {
81+
vi.spyOn(providerAuthRuntime, "resolveApiKeyForProvider").mockResolvedValue({
82+
apiKey: "google-key",
83+
source: "env",
84+
mode: "api-key",
85+
});
86+
const provider = buildGoogleMusicGenerationProvider();
87+
88+
await expect(
89+
provider.generateMusic({
90+
provider: "google",
91+
model: "lyria-3-clip-preview",
92+
prompt: "ambient ocean",
93+
cfg: {},
94+
format: "wav",
95+
}),
96+
).rejects.toThrow("supports mp3 output");
97+
});
98+
});
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
import { GoogleGenAI } from "@google/genai";
2+
import { extensionForMime } from "openclaw/plugin-sdk/msteams";
3+
import type {
4+
GeneratedMusicAsset,
5+
MusicGenerationProvider,
6+
MusicGenerationRequest,
7+
} from "openclaw/plugin-sdk/music-generation";
8+
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
9+
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
10+
import { normalizeGoogleApiBaseUrl } from "./api.js";
11+
12+
const DEFAULT_GOOGLE_MUSIC_MODEL = "lyria-3-clip-preview";
13+
const GOOGLE_PRO_MUSIC_MODEL = "lyria-3-pro-preview";
14+
const DEFAULT_TIMEOUT_MS = 180_000;
15+
const GOOGLE_MAX_INPUT_IMAGES = 10;
16+
17+
type GoogleInlineDataPart = {
18+
mimeType?: string;
19+
mime_type?: string;
20+
data?: string;
21+
};
22+
23+
type GoogleGenerateMusicResponse = {
24+
candidates?: Array<{
25+
content?: {
26+
parts?: Array<{
27+
text?: string;
28+
inlineData?: GoogleInlineDataPart;
29+
inline_data?: GoogleInlineDataPart;
30+
}>;
31+
};
32+
}>;
33+
};
34+
35+
function resolveConfiguredGoogleMusicBaseUrl(req: MusicGenerationRequest): string | undefined {
36+
const configured = req.cfg?.models?.providers?.google?.baseUrl?.trim();
37+
return configured ? normalizeGoogleApiBaseUrl(configured) : undefined;
38+
}
39+
40+
function buildMusicPrompt(req: MusicGenerationRequest): string {
41+
const parts = [req.prompt.trim()];
42+
const lyrics = req.lyrics?.trim();
43+
if (req.instrumental === true) {
44+
parts.push("Instrumental only. No vocals, no sung lyrics, no spoken word.");
45+
}
46+
if (lyrics) {
47+
parts.push(`Lyrics:\n${lyrics}`);
48+
}
49+
return parts.join("\n\n");
50+
}
51+
52+
function resolveSupportedFormats(model: string): readonly string[] {
53+
return model === GOOGLE_PRO_MUSIC_MODEL ? ["mp3", "wav"] : ["mp3"];
54+
}
55+
56+
function resolveTrackFileName(params: { index: number; mimeType: string; model: string }): string {
57+
const ext =
58+
extensionForMime(params.mimeType)?.replace(/^\./u, "") ||
59+
(params.model === GOOGLE_PRO_MUSIC_MODEL ? "wav" : "mp3");
60+
return `track-${params.index + 1}.${ext}`;
61+
}
62+
63+
function extractTracks(params: { payload: GoogleGenerateMusicResponse; model: string }): {
64+
tracks: GeneratedMusicAsset[];
65+
lyrics: string[];
66+
} {
67+
const lyrics: string[] = [];
68+
const tracks: GeneratedMusicAsset[] = [];
69+
for (const part of params.payload.candidates?.[0]?.content?.parts ?? []) {
70+
if (part.text?.trim()) {
71+
lyrics.push(part.text.trim());
72+
continue;
73+
}
74+
const inline = part.inlineData ?? part.inline_data;
75+
const data = inline?.data?.trim();
76+
if (!data) {
77+
continue;
78+
}
79+
const mimeType = inline?.mimeType?.trim() || inline?.mime_type?.trim() || "audio/mpeg";
80+
tracks.push({
81+
buffer: Buffer.from(data, "base64"),
82+
mimeType,
83+
fileName: resolveTrackFileName({
84+
index: tracks.length,
85+
mimeType,
86+
model: params.model,
87+
}),
88+
});
89+
}
90+
return { tracks, lyrics };
91+
}
92+
93+
export function buildGoogleMusicGenerationProvider(): MusicGenerationProvider {
94+
return {
95+
id: "google",
96+
label: "Google",
97+
defaultModel: DEFAULT_GOOGLE_MUSIC_MODEL,
98+
models: [DEFAULT_GOOGLE_MUSIC_MODEL, GOOGLE_PRO_MUSIC_MODEL],
99+
isConfigured: ({ agentDir }) =>
100+
isProviderApiKeyConfigured({
101+
provider: "google",
102+
agentDir,
103+
}),
104+
capabilities: {
105+
maxTracks: 1,
106+
maxInputImages: GOOGLE_MAX_INPUT_IMAGES,
107+
supportsLyrics: true,
108+
supportsInstrumental: true,
109+
supportsFormat: true,
110+
supportedFormatsByModel: {
111+
[DEFAULT_GOOGLE_MUSIC_MODEL]: ["mp3"],
112+
[GOOGLE_PRO_MUSIC_MODEL]: ["mp3", "wav"],
113+
},
114+
},
115+
async generateMusic(req) {
116+
if ((req.inputImages?.length ?? 0) > GOOGLE_MAX_INPUT_IMAGES) {
117+
throw new Error(
118+
`Google music generation supports at most ${GOOGLE_MAX_INPUT_IMAGES} reference images.`,
119+
);
120+
}
121+
const auth = await resolveApiKeyForProvider({
122+
provider: "google",
123+
cfg: req.cfg,
124+
agentDir: req.agentDir,
125+
store: req.authStore,
126+
});
127+
if (!auth.apiKey) {
128+
throw new Error("Google API key missing");
129+
}
130+
131+
const model = req.model?.trim() || DEFAULT_GOOGLE_MUSIC_MODEL;
132+
if (req.format) {
133+
const supportedFormats = resolveSupportedFormats(model);
134+
if (!supportedFormats.includes(req.format)) {
135+
throw new Error(
136+
`Google music generation model ${model} supports ${supportedFormats.join(", ")} output.`,
137+
);
138+
}
139+
}
140+
141+
const client = new GoogleGenAI({
142+
apiKey: auth.apiKey,
143+
httpOptions: {
144+
...(resolveConfiguredGoogleMusicBaseUrl(req)
145+
? { baseUrl: resolveConfiguredGoogleMusicBaseUrl(req) }
146+
: {}),
147+
timeout: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
148+
},
149+
});
150+
const response = (await client.models.generateContent({
151+
model,
152+
contents: [
153+
{ text: buildMusicPrompt(req) },
154+
...(req.inputImages ?? []).map((image) => ({
155+
inlineData: {
156+
mimeType: image.mimeType?.trim() || "image/png",
157+
data: image.buffer?.toString("base64") ?? "",
158+
},
159+
})),
160+
],
161+
config: {
162+
responseModalities: ["AUDIO", "TEXT"],
163+
},
164+
})) as GoogleGenerateMusicResponse;
165+
166+
const { tracks, lyrics } = extractTracks({
167+
payload: response,
168+
model,
169+
});
170+
if (tracks.length === 0) {
171+
throw new Error("Google music generation response missing audio data");
172+
}
173+
return {
174+
tracks,
175+
...(lyrics.length > 0 ? { lyrics } : {}),
176+
model,
177+
metadata: {
178+
inputImageCount: req.inputImages?.length ?? 0,
179+
instrumental: req.instrumental === true,
180+
...(req.lyrics?.trim() ? { requestedLyrics: true } : {}),
181+
...(req.format ? { requestedFormat: req.format } : {}),
182+
},
183+
};
184+
},
185+
};
186+
}

extensions/google/openclaw.plugin.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
"contracts": {
4747
"mediaUnderstandingProviders": ["google"],
4848
"imageGenerationProviders": ["google"],
49+
"musicGenerationProviders": ["google"],
4950
"videoGenerationProviders": ["google"],
5051
"webSearchProviders": ["gemini"]
5152
},

0 commit comments

Comments
 (0)