Skip to content

Commit 56fe2aa

Browse files
committed
fix: attach Google Meet realtime bridge
1 parent b5e5f2c commit 56fe2aa

10 files changed

Lines changed: 145 additions & 13 deletions

File tree

docs/plugins/google-meet.md

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,8 @@ Defaults:
355355
- `realtime.toolPolicy: "safe-read-only"`
356356
- `realtime.instructions`: brief spoken replies, with
357357
`openclaw_agent_consult` for deeper answers
358+
- `realtime.introMessage`: short spoken readiness check when the realtime bridge
359+
connects; set it to `""` to join silently
358360

359361
Optional overrides:
360362

@@ -371,6 +373,7 @@ Optional overrides:
371373
},
372374
realtime: {
373375
toolPolicy: "owner",
376+
introMessage: "Say exactly: I'm here.",
374377
},
375378
}
376379
```
@@ -409,7 +412,16 @@ VM. In both cases the realtime model and `openclaw_agent_consult` run on the
409412
Gateway host, so model credentials stay there.
410413

411414
Use `action: "status"` to list active sessions or inspect a session ID. Use
412-
`action: "leave"` to mark a session ended.
415+
`action: "speak"` with `sessionId` and `message` to make the realtime agent
416+
speak immediately. Use `action: "leave"` to mark a session ended.
417+
418+
```json
419+
{
420+
"action": "speak",
421+
"sessionId": "meet_...",
422+
"message": "Say exactly: I'm here and listening."
423+
}
424+
```
413425

414426
## Realtime agent consult
415427

@@ -434,6 +446,12 @@ voice session. The voice model can then speak that answer back into the meeting.
434446
The consult session key is scoped per Meet session, so follow-up consult calls
435447
can reuse prior consult context during the same meeting.
436448

449+
To force a spoken readiness check after Chrome has fully joined the call:
450+
451+
```bash
452+
openclaw googlemeet speak meet_... "Say exactly: I'm here and listening."
453+
```
454+
437455
## Notes
438456

439457
Google Meet's official media API is receive-oriented, so speaking into a Meet
@@ -453,9 +471,9 @@ For clean duplex audio, route Meet output and Meet microphone through separate
453471
virtual devices or a Loopback-style virtual device graph. A single shared
454472
BlackHole device can echo other participants back into the call.
455473

456-
`googlemeet leave` stops the command-pair realtime audio bridge for Chrome
457-
sessions. For Twilio sessions delegated through the Voice Call plugin, it also
458-
hangs up the underlying voice call.
474+
`googlemeet speak` triggers the active realtime audio bridge for a Chrome
475+
session. `googlemeet leave` stops that bridge. For Twilio sessions delegated
476+
through the Voice Call plugin, `leave` also hangs up the underlying voice call.
459477

460478
## Related
461479

extensions/google-meet/index.test.ts

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ describe("google-meet plugin", () => {
205205
voiceCall: { enabled: true, requestTimeoutMs: 30000, dtmfDelayMs: 2500 },
206206
realtime: {
207207
provider: "openai",
208+
introMessage: "Say exactly: I'm here and listening.",
208209
toolPolicy: "safe-read-only",
209210
},
210211
oauth: {},
@@ -284,7 +285,7 @@ describe("google-meet plugin", () => {
284285
properties: {
285286
action: {
286287
type: "string",
287-
enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave"],
288+
enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave", "speak"],
288289
},
289290
transport: { type: "string", enum: ["chrome", "chrome-node", "twilio"] },
290291
mode: { type: "string", enum: ["realtime", "transcribe"] },
@@ -520,11 +521,16 @@ describe("google-meet plugin", () => {
520521
});
521522

522523
it("joins Chrome on a paired node without local Chrome or BlackHole", async () => {
523-
const { methods, nodesList, nodesInvoke } = setup({
524-
defaultTransport: "chrome-node",
525-
defaultMode: "transcribe",
526-
chromeNode: { node: "parallels-macos" },
527-
});
524+
const { methods, nodesList, nodesInvoke } = setup(
525+
{
526+
defaultTransport: "chrome-node",
527+
defaultMode: "transcribe",
528+
chromeNode: { node: "parallels-macos" },
529+
},
530+
{
531+
nodesInvokeResult: { payload: { launched: true } },
532+
},
533+
);
528534
const handler = methods.get("googlemeet.join") as
529535
| ((ctx: {
530536
params: Record<string, unknown>;
@@ -669,6 +675,7 @@ describe("google-meet plugin", () => {
669675
name: string;
670676
args: unknown;
671677
}) => void;
678+
onReady?: () => void;
672679
tools?: unknown[];
673680
}
674681
| undefined;
@@ -680,6 +687,7 @@ describe("google-meet plugin", () => {
680687
submitToolResult: vi.fn(),
681688
acknowledgeMark: vi.fn(),
682689
close: vi.fn(),
690+
triggerGreeting: vi.fn(),
683691
isConnected: vi.fn(() => true),
684692
};
685693
const provider: RealtimeVoiceProviderPlugin = {
@@ -756,6 +764,7 @@ describe("google-meet plugin", () => {
756764
inputStdout.write(Buffer.from([1, 2, 3]));
757765
callbacks?.onAudio(Buffer.from([4, 5]));
758766
callbacks?.onMark?.("mark-1");
767+
callbacks?.onReady?.();
759768
callbacks?.onToolCall?.({
760769
itemId: "item-1",
761770
callId: "tool-call-1",
@@ -772,6 +781,9 @@ describe("google-meet plugin", () => {
772781
expect(sendAudio).toHaveBeenCalledWith(Buffer.from([1, 2, 3]));
773782
expect(outputStdinWrites).toEqual([Buffer.from([4, 5])]);
774783
expect(bridge.acknowledgeMark).toHaveBeenCalled();
784+
expect(bridge.triggerGreeting).toHaveBeenCalledWith("Say exactly: I'm here and listening.");
785+
handle.speak("Say exactly: hello from the meeting.");
786+
expect(bridge.triggerGreeting).toHaveBeenLastCalledWith("Say exactly: hello from the meeting.");
775787
expect(callbacks).toMatchObject({
776788
tools: [
777789
expect.objectContaining({
@@ -808,6 +820,7 @@ describe("google-meet plugin", () => {
808820
name: string;
809821
args: unknown;
810822
}) => void;
823+
onReady?: () => void;
811824
tools?: unknown[];
812825
}
813826
| undefined;
@@ -819,6 +832,7 @@ describe("google-meet plugin", () => {
819832
submitToolResult: vi.fn(),
820833
acknowledgeMark: vi.fn(),
821834
close: vi.fn(),
835+
triggerGreeting: vi.fn(),
822836
isConnected: vi.fn(() => true),
823837
};
824838
const provider: RealtimeVoiceProviderPlugin = {
@@ -879,6 +893,7 @@ describe("google-meet plugin", () => {
879893
});
880894

881895
callbacks?.onAudio(Buffer.from([1, 2, 3]));
896+
callbacks?.onReady?.();
882897
callbacks?.onToolCall?.({
883898
itemId: "item-1",
884899
callId: "tool-call-1",
@@ -907,6 +922,9 @@ describe("google-meet plugin", () => {
907922
text: "Use the launch update.",
908923
});
909924
});
925+
expect(bridge.triggerGreeting).toHaveBeenCalledWith("Say exactly: I'm here and listening.");
926+
handle.speak("Say exactly: hello from the node.");
927+
expect(bridge.triggerGreeting).toHaveBeenLastCalledWith("Say exactly: hello from the node.");
910928
expect(callbacks).toMatchObject({
911929
tools: [
912930
expect.objectContaining({

extensions/google-meet/index.ts

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ const googleMeetConfigSchema = {
8888
},
8989
"realtime.model": { label: "Realtime Model", advanced: true },
9090
"realtime.instructions": { label: "Realtime Instructions", advanced: true },
91+
"realtime.introMessage": {
92+
label: "Realtime Intro Message",
93+
help: "Spoken once when the realtime bridge is ready. Set to an empty string to join silently.",
94+
},
9195
"realtime.toolPolicy": {
9296
label: "Realtime Tool Policy",
9397
help: "Safe read-only tools are available by default; owner requests can unlock broader tools.",
@@ -111,7 +115,7 @@ const googleMeetConfigSchema = {
111115

112116
const GoogleMeetToolSchema = Type.Object({
113117
action: Type.String({
114-
enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave"],
118+
enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave", "speak"],
115119
description: "Google Meet action to run",
116120
}),
117121
url: Type.Optional(Type.String({ description: "Explicit https://meet.google.com/... URL" })),
@@ -123,6 +127,7 @@ const GoogleMeetToolSchema = Type.Object({
123127
pin: Type.Optional(Type.String({ description: "Meet phone PIN for Twilio" })),
124128
dtmfSequence: Type.Optional(Type.String({ description: "Explicit DTMF sequence for Twilio" })),
125129
sessionId: Type.Optional(Type.String({ description: "Meet session ID" })),
130+
message: Type.Optional(Type.String({ description: "Realtime instructions to speak now" })),
126131
meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })),
127132
accessToken: Type.Optional(Type.String({ description: "Access token override" })),
128133
refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })),
@@ -265,6 +270,23 @@ export default definePluginEntry({
265270
},
266271
);
267272

273+
api.registerGatewayMethod(
274+
"googlemeet.speak",
275+
async ({ params, respond }: GatewayRequestHandlerOptions) => {
276+
try {
277+
const sessionId = normalizeOptionalString(params?.sessionId);
278+
if (!sessionId) {
279+
respond(false, { error: "sessionId required" });
280+
return;
281+
}
282+
const rt = await ensureRuntime();
283+
respond(true, rt.speak(sessionId, normalizeOptionalString(params?.message)));
284+
} catch (err) {
285+
sendError(respond, err);
286+
}
287+
},
288+
);
289+
268290
api.registerTool({
269291
name: "google_meet",
270292
label: "Google Meet",
@@ -318,6 +340,14 @@ export default definePluginEntry({
318340
}
319341
return json(await rt.leave(sessionId));
320342
}
343+
case "speak": {
344+
const rt = await ensureRuntime();
345+
const sessionId = normalizeOptionalString(raw.sessionId);
346+
if (!sessionId) {
347+
throw new Error("sessionId required");
348+
}
349+
return json(rt.speak(sessionId, normalizeOptionalString(raw.message)));
350+
}
321351
default:
322352
throw new Error("unknown google_meet action");
323353
}

extensions/google-meet/openclaw.plugin.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@
108108
"label": "Realtime Instructions",
109109
"advanced": true
110110
},
111+
"realtime.introMessage": {
112+
"label": "Realtime Intro Message",
113+
"help": "Spoken once when the realtime bridge is ready. Set to an empty string to join silently."
114+
},
111115
"realtime.toolPolicy": {
112116
"label": "Realtime Tool Policy",
113117
"help": "Safe read-only tools are available by default; owner requests can unlock broader tools.",
@@ -312,6 +316,10 @@
312316
"type": "string",
313317
"default": "You are joining a private Google Meet as an OpenClaw agent. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call openclaw_agent_consult before answering."
314318
},
319+
"introMessage": {
320+
"type": "string",
321+
"default": "Say exactly: I'm here and listening."
322+
},
315323
"toolPolicy": {
316324
"type": "string",
317325
"enum": ["safe-read-only", "owner", "none"],

extensions/google-meet/src/cli.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,4 +304,20 @@ export function registerGoogleMeetCli(params: {
304304
}
305305
writeStdoutLine("left %s", sessionId);
306306
});
307+
308+
root
309+
.command("speak")
310+
.argument("<session-id>", "Meet session ID")
311+
.argument("[message]", "Realtime instructions to speak now")
312+
.action(async (sessionId: string, message?: string) => {
313+
const rt = await params.ensureRuntime();
314+
const result = rt.speak(sessionId, message);
315+
if (!result.found) {
316+
throw new Error("session not found");
317+
}
318+
if (!result.spoken) {
319+
throw new Error("session has no active realtime audio bridge");
320+
}
321+
writeStdoutLine("speaking on %s", sessionId);
322+
});
307323
}

extensions/google-meet/src/config.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ export type GoogleMeetConfig = {
4848
provider?: string;
4949
model?: string;
5050
instructions?: string;
51+
introMessage?: string;
5152
toolPolicy: GoogleMeetToolPolicy;
5253
providers: Record<string, Record<string, unknown>>;
5354
};
@@ -99,6 +100,7 @@ export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [
99100
] as const;
100101

101102
export const DEFAULT_GOOGLE_MEET_REALTIME_INSTRUCTIONS = `You are joining a private Google Meet as an OpenClaw agent. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`;
103+
export const DEFAULT_GOOGLE_MEET_REALTIME_INTRO_MESSAGE = "Say exactly: I'm here and listening.";
102104

103105
export const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
104106
enabled: true,
@@ -125,6 +127,7 @@ export const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
125127
realtime: {
126128
provider: "openai",
127129
instructions: DEFAULT_GOOGLE_MEET_REALTIME_INSTRUCTIONS,
130+
introMessage: DEFAULT_GOOGLE_MEET_REALTIME_INTRO_MESSAGE,
128131
toolPolicy: "safe-read-only",
129132
providers: {},
130133
},
@@ -339,6 +342,9 @@ export function resolveGoogleMeetConfigWithEnv(
339342
instructions:
340343
normalizeOptionalString(realtime.instructions) ??
341344
DEFAULT_GOOGLE_MEET_CONFIG.realtime.instructions,
345+
introMessage:
346+
normalizeOptionalString(realtime.introMessage) ??
347+
DEFAULT_GOOGLE_MEET_CONFIG.realtime.introMessage,
342348
toolPolicy: resolveToolPolicy(
343349
realtime.toolPolicy,
344350
DEFAULT_GOOGLE_MEET_CONFIG.realtime.toolPolicy,

extensions/google-meet/src/realtime-node.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ export type ChromeNodeRealtimeAudioBridgeHandle = {
1919
providerId: string;
2020
nodeId: string;
2121
bridgeId: string;
22+
speak: (instructions?: string) => void;
2223
stop: () => Promise<void>;
2324
};
2425

@@ -81,6 +82,8 @@ export async function startNodeRealtimeAudioBridge(params: {
8182
provider: resolved.provider,
8283
providerConfig: resolved.providerConfig,
8384
instructions: params.config.realtime.instructions,
85+
initialGreetingInstructions: params.config.realtime.introMessage,
86+
triggerGreetingOnReady: Boolean(params.config.realtime.introMessage),
8487
markStrategy: "ack-immediately",
8588
tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy),
8689
audioSink: {
@@ -188,6 +191,9 @@ export async function startNodeRealtimeAudioBridge(params: {
188191
providerId: resolved.provider.id,
189192
nodeId: params.nodeId,
190193
bridgeId: params.bridgeId,
194+
speak: (instructions) => {
195+
bridge?.triggerGreeting(instructions);
196+
},
191197
stop,
192198
};
193199
}

extensions/google-meet/src/realtime.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ export type ChromeRealtimeAudioBridgeHandle = {
4141
providerId: string;
4242
inputCommand: string[];
4343
outputCommand: string[];
44+
speak: (instructions?: string) => void;
4445
stop: () => Promise<void>;
4546
};
4647

@@ -148,6 +149,8 @@ export async function startCommandRealtimeAudioBridge(params: {
148149
provider: resolved.provider,
149150
providerConfig: resolved.providerConfig,
150151
instructions: params.config.realtime.instructions,
152+
initialGreetingInstructions: params.config.realtime.introMessage,
153+
triggerGreetingOnReady: Boolean(params.config.realtime.introMessage),
151154
markStrategy: "ack-immediately",
152155
tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy),
153156
audioSink: {
@@ -210,6 +213,9 @@ export async function startCommandRealtimeAudioBridge(params: {
210213
providerId: resolved.provider.id,
211214
inputCommand: params.inputCommand,
212215
outputCommand: params.outputCommand,
216+
speak: (instructions) => {
217+
bridge?.triggerGreeting(instructions);
218+
},
213219
stop,
214220
};
215221
}

0 commit comments

Comments
 (0)