Skip to content

Commit 466f718

Browse files
committed
feat: wire talk handoff into native nodes
1 parent c434d77 commit 466f718

34 files changed

Lines changed: 2474 additions & 89 deletions

apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import ai.openclaw.app.node.Quad
3636
import ai.openclaw.app.node.SmsHandler
3737
import ai.openclaw.app.node.SmsManager
3838
import ai.openclaw.app.node.SystemHandler
39+
import ai.openclaw.app.node.TalkHandler
3940
import ai.openclaw.app.node.asObjectOrNull
4041
import ai.openclaw.app.node.asStringOrNull
4142
import ai.openclaw.app.node.invokeErrorFromThrowable
@@ -205,6 +206,16 @@ class NodeRuntime(
205206
deviceHandler = deviceHandler,
206207
notificationsHandler = notificationsHandler,
207208
systemHandler = systemHandler,
209+
talkHandler =
210+
object : TalkHandler {
211+
override suspend fun handlePttStart(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttStart()
212+
213+
override suspend fun handlePttStop(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttStop()
214+
215+
override suspend fun handlePttCancel(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttCancel()
216+
217+
override suspend fun handlePttOnce(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttOnce()
218+
},
208219
photosHandler = photosHandler,
209220
contactsHandler = contactsHandler,
210221
calendarHandler = calendarHandler,
@@ -881,6 +892,80 @@ class NodeRuntime(
881892
setVoiceCaptureMode(if (value) VoiceCaptureMode.TalkMode else VoiceCaptureMode.Off)
882893
}
883894

895+
private suspend fun handleTalkPttStart(): GatewaySession.InvokeResult =
896+
runPreparedTalkPttCommand {
897+
val payload = talkMode.beginPushToTalk()
898+
GatewaySession.InvokeResult.ok(payload.toJson())
899+
}
900+
901+
private suspend fun handleTalkPttStop(): GatewaySession.InvokeResult =
902+
runTalkPttCommand {
903+
val payload = talkMode.endPushToTalk()
904+
finishTalkCaptureIfIdle()
905+
GatewaySession.InvokeResult.ok(payload.toJson())
906+
}
907+
908+
private suspend fun handleTalkPttCancel(): GatewaySession.InvokeResult =
909+
runTalkPttCommand {
910+
val payload = talkMode.cancelPushToTalk()
911+
finishTalkCaptureIfIdle()
912+
GatewaySession.InvokeResult.ok(payload.toJson())
913+
}
914+
915+
private suspend fun handleTalkPttOnce(): GatewaySession.InvokeResult =
916+
runPreparedTalkPttCommand {
917+
val payload = talkMode.runPushToTalkOnce()
918+
finishTalkCaptureIfIdle()
919+
GatewaySession.InvokeResult.ok(payload.toJson())
920+
}
921+
922+
private suspend fun runPreparedTalkPttCommand(block: suspend () -> GatewaySession.InvokeResult): GatewaySession.InvokeResult =
923+
runTalkPttCommand {
924+
prepareTalkCapture()
925+
try {
926+
block()
927+
} catch (err: Throwable) {
928+
cleanupFailedTalkCapture()
929+
throw err
930+
}
931+
}
932+
933+
private suspend fun runTalkPttCommand(block: suspend () -> GatewaySession.InvokeResult): GatewaySession.InvokeResult =
934+
try {
935+
block()
936+
} catch (err: Throwable) {
937+
val (code, message) = invokeErrorFromThrowable(err)
938+
GatewaySession.InvokeResult.error(code = code, message = message)
939+
}
940+
941+
private suspend fun prepareTalkCapture() {
942+
if (!hasRecordAudioPermission()) {
943+
throw IllegalStateException("MIC_PERMISSION_REQUIRED: grant Microphone permission")
944+
}
945+
micCapture.setMicEnabled(false)
946+
stopVoicePlayback()
947+
NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.TalkMode)
948+
talkMode.ttsOnAllResponses = true
949+
talkMode.setPlaybackEnabled(speakerEnabled.value)
950+
talkMode.ensureChatSubscribed()
951+
externalAudioCaptureActive.value = true
952+
}
953+
954+
private suspend fun cleanupFailedTalkCapture() {
955+
runCatching { talkMode.cancelPushToTalk() }
956+
talkMode.ttsOnAllResponses = false
957+
NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off)
958+
externalAudioCaptureActive.value = false
959+
}
960+
961+
private fun finishTalkCaptureIfIdle() {
962+
if (!talkMode.isEnabled.value && !talkMode.isListening.value && !talkMode.isSpeaking.value) {
963+
talkMode.ttsOnAllResponses = false
964+
NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off)
965+
externalAudioCaptureActive.value = false
966+
}
967+
}
968+
884969
val speakerEnabled: StateFlow<Boolean>
885970
get() = prefs.speakerEnabled
886971

apps/android/app/src/main/java/ai/openclaw/app/gateway/GatewayDiscovery.kt

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -278,14 +278,13 @@ class GatewayDiscovery(
278278
return legacyHostAddress(resolved)
279279
}
280280

281-
private fun legacyHostAddress(resolved: NsdServiceInfo): String? {
282-
return try {
281+
private fun legacyHostAddress(resolved: NsdServiceInfo): String? =
282+
try {
283283
val host = NsdServiceInfo::class.java.getMethod("getHost").invoke(resolved) as? InetAddress
284284
host?.hostAddress
285285
} catch (_: Throwable) {
286286
null
287287
}
288-
}
289288

290289
private fun publish() {
291290
_gateways.value =
@@ -529,20 +528,20 @@ class GatewayDiscovery(
529528
val cm = connectivity ?: return null
530529

531530
// Prefer VPN (Tailscale) when present; otherwise use the active network.
532-
trackedNetworks(cm).firstOrNull { n ->
533-
val caps = cm.getNetworkCapabilities(n) ?: return@firstOrNull false
534-
caps.hasTransport(NetworkCapabilities.TRANSPORT_VPN)
535-
}?.let { return it }
531+
trackedNetworks(cm)
532+
.firstOrNull { n ->
533+
val caps = cm.getNetworkCapabilities(n) ?: return@firstOrNull false
534+
caps.hasTransport(NetworkCapabilities.TRANSPORT_VPN)
535+
}?.let { return it }
536536

537537
return cm.activeNetwork
538538
}
539539

540-
private fun trackedNetworks(cm: ConnectivityManager): List<Network> {
541-
return buildList {
540+
private fun trackedNetworks(cm: ConnectivityManager): List<Network> =
541+
buildList {
542542
cm.activeNetwork?.let(::add)
543543
addAll(availableNetworks)
544544
}.distinct()
545-
}
546545

547546
private fun createDirectResolver(): Resolver? {
548547
val cm = connectivity ?: return null

apps/android/app/src/main/java/ai/openclaw/app/node/InvokeCommandRegistry.kt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import ai.openclaw.app.protocol.OpenClawNotificationsCommand
1414
import ai.openclaw.app.protocol.OpenClawPhotosCommand
1515
import ai.openclaw.app.protocol.OpenClawSmsCommand
1616
import ai.openclaw.app.protocol.OpenClawSystemCommand
17+
import ai.openclaw.app.protocol.OpenClawTalkCommand
1718

1819
data class NodeRuntimeFlags(
1920
val cameraEnabled: Boolean,
@@ -81,6 +82,7 @@ object InvokeCommandRegistry {
8182
name = OpenClawCapability.VoiceWake.rawValue,
8283
availability = NodeCapabilityAvailability.VoiceWakeEnabled,
8384
),
85+
NodeCapabilitySpec(name = OpenClawCapability.Talk.rawValue),
8486
NodeCapabilitySpec(
8587
name = OpenClawCapability.Location.rawValue,
8688
availability = NodeCapabilityAvailability.LocationEnabled,
@@ -135,6 +137,18 @@ object InvokeCommandRegistry {
135137
InvokeCommandSpec(
136138
name = OpenClawSystemCommand.Notify.rawValue,
137139
),
140+
InvokeCommandSpec(
141+
name = OpenClawTalkCommand.PttStart.rawValue,
142+
),
143+
InvokeCommandSpec(
144+
name = OpenClawTalkCommand.PttStop.rawValue,
145+
),
146+
InvokeCommandSpec(
147+
name = OpenClawTalkCommand.PttCancel.rawValue,
148+
),
149+
InvokeCommandSpec(
150+
name = OpenClawTalkCommand.PttOnce.rawValue,
151+
),
138152
InvokeCommandSpec(
139153
name = OpenClawCameraCommand.List.rawValue,
140154
requiresForeground = true,

apps/android/app/src/main/java/ai/openclaw/app/node/InvokeDispatcher.kt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import ai.openclaw.app.protocol.OpenClawMotionCommand
1313
import ai.openclaw.app.protocol.OpenClawNotificationsCommand
1414
import ai.openclaw.app.protocol.OpenClawSmsCommand
1515
import ai.openclaw.app.protocol.OpenClawSystemCommand
16+
import ai.openclaw.app.protocol.OpenClawTalkCommand
1617

1718
internal enum class SmsSearchAvailabilityReason {
1819
Available,
@@ -59,6 +60,7 @@ class InvokeDispatcher(
5960
private val deviceHandler: DeviceHandler,
6061
private val notificationsHandler: NotificationsHandler,
6162
private val systemHandler: SystemHandler,
63+
private val talkHandler: TalkHandler,
6264
private val photosHandler: PhotosHandler,
6365
private val contactsHandler: ContactsHandler,
6466
private val calendarHandler: CalendarHandler,
@@ -188,6 +190,12 @@ class InvokeDispatcher(
188190
// System command
189191
OpenClawSystemCommand.Notify.rawValue -> systemHandler.handleSystemNotify(paramsJson)
190192

193+
// Talk commands
194+
OpenClawTalkCommand.PttStart.rawValue -> talkHandler.handlePttStart(paramsJson)
195+
OpenClawTalkCommand.PttStop.rawValue -> talkHandler.handlePttStop(paramsJson)
196+
OpenClawTalkCommand.PttCancel.rawValue -> talkHandler.handlePttCancel(paramsJson)
197+
OpenClawTalkCommand.PttOnce.rawValue -> talkHandler.handlePttOnce(paramsJson)
198+
191199
// Photos command
192200
ai.openclaw.app.protocol.OpenClawPhotosCommand.Latest.rawValue ->
193201
photosHandler.handlePhotosLatest(
@@ -336,3 +344,13 @@ class InvokeDispatcher(
336344
}
337345
}
338346
}
347+
348+
interface TalkHandler {
349+
suspend fun handlePttStart(paramsJson: String?): GatewaySession.InvokeResult
350+
351+
suspend fun handlePttStop(paramsJson: String?): GatewaySession.InvokeResult
352+
353+
suspend fun handlePttCancel(paramsJson: String?): GatewaySession.InvokeResult
354+
355+
suspend fun handlePttOnce(paramsJson: String?): GatewaySession.InvokeResult
356+
}

apps/android/app/src/main/java/ai/openclaw/app/protocol/OpenClawProtocolConstants.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ enum class OpenClawCapability(
77
Camera("camera"),
88
Sms("sms"),
99
VoiceWake("voiceWake"),
10+
Talk("talk"),
1011
Location("location"),
1112
Device("device"),
1213
Notifications("notifications"),
@@ -71,6 +72,20 @@ enum class OpenClawSmsCommand(
7172
}
7273
}
7374

75+
enum class OpenClawTalkCommand(
76+
val rawValue: String,
77+
) {
78+
PttStart("talk.ptt.start"),
79+
PttStop("talk.ptt.stop"),
80+
PttCancel("talk.ptt.cancel"),
81+
PttOnce("talk.ptt.once"),
82+
;
83+
84+
companion object {
85+
const val NamespacePrefix: String = "talk."
86+
}
87+
}
88+
7489
enum class OpenClawLocationCommand(
7590
val rawValue: String,
7691
) {
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package ai.openclaw.app.voice
2+
3+
import kotlinx.serialization.json.JsonArray
4+
import kotlinx.serialization.json.JsonElement
5+
import kotlinx.serialization.json.JsonObject
6+
import kotlinx.serialization.json.JsonPrimitive
7+
8+
internal object ChatEventText {
9+
fun assistantTextFromPayload(payload: JsonObject): String? = assistantTextFromMessage(payload["message"])
10+
11+
fun assistantTextFromMessage(messageEl: JsonElement?): String? {
12+
val message = messageEl.asObjectOrNull() ?: return null
13+
val role = message["role"].asStringOrNull()
14+
if (role != null && role != "assistant") return null
15+
return textFromContent(message["content"])
16+
}
17+
18+
private fun textFromContent(content: JsonElement?): String? =
19+
when (content) {
20+
is JsonPrimitive -> content.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
21+
is JsonArray ->
22+
content
23+
.mapNotNull(::textFromContentPart)
24+
.filter { it.isNotEmpty() }
25+
.joinToString("\n")
26+
.takeIf { it.isNotBlank() }
27+
else -> null
28+
}
29+
30+
private fun textFromContentPart(part: JsonElement): String? {
31+
part
32+
.asStringOrNull()
33+
?.trim()
34+
?.takeIf { it.isNotEmpty() }
35+
?.let { return it }
36+
val obj = part.asObjectOrNull() ?: return null
37+
val type = obj["type"].asStringOrNull()
38+
if (type != null && type != "text") return null
39+
return obj["text"].asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
40+
}
41+
}
42+
43+
private fun JsonElement?.asObjectOrNull(): JsonObject? = this as? JsonObject
44+
45+
private fun JsonElement?.asStringOrNull(): String? = (this as? JsonPrimitive)?.takeIf { it.isString }?.content

apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import kotlinx.coroutines.flow.StateFlow
2121
import kotlinx.coroutines.launch
2222
import kotlinx.coroutines.withContext
2323
import kotlinx.serialization.json.Json
24-
import kotlinx.serialization.json.JsonArray
2524
import kotlinx.serialization.json.JsonObject
2625
import kotlinx.serialization.json.JsonPrimitive
2726
import java.util.UUID
@@ -596,20 +595,7 @@ class MicCaptureManager(
596595
PackageManager.PERMISSION_GRANTED
597596
)
598597

599-
private fun parseAssistantText(payload: JsonObject): String? {
600-
val message = payload["message"].asObjectOrNull() ?: return null
601-
if (message["role"].asStringOrNull() != "assistant") return null
602-
val content = message["content"] as? JsonArray ?: return null
603-
604-
val parts =
605-
content.mapNotNull { item ->
606-
val obj = item.asObjectOrNull() ?: return@mapNotNull null
607-
if (obj["type"].asStringOrNull() != "text") return@mapNotNull null
608-
obj["text"].asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
609-
}
610-
if (parts.isEmpty()) return null
611-
return parts.joinToString("\n")
612-
}
598+
private fun parseAssistantText(payload: JsonObject): String? = ChatEventText.assistantTextFromPayload(payload)
613599

614600
private val listener =
615601
object : RecognitionListener {

apps/android/app/src/main/java/ai/openclaw/app/voice/TalkAudioPlayer.kt

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,26 @@ import kotlinx.coroutines.delay
1212
import kotlinx.coroutines.withContext
1313
import java.io.File
1414

15+
internal interface TalkAudioPlaying {
16+
suspend fun play(audio: TalkSpeakAudio)
17+
18+
fun stop()
19+
}
20+
1521
internal class TalkAudioPlayer(
1622
private val context: Context,
17-
) {
23+
) : TalkAudioPlaying {
1824
private val lock = Any()
1925
private var active: ActivePlayback? = null
2026

21-
suspend fun play(audio: TalkSpeakAudio) {
27+
override suspend fun play(audio: TalkSpeakAudio) {
2228
when (val mode = resolvePlaybackMode(audio)) {
2329
is TalkPlaybackMode.Pcm -> playPcm(audio.bytes, mode.sampleRate)
2430
is TalkPlaybackMode.Compressed -> playCompressed(audio.bytes, mode.fileExtension)
2531
}
2632
}
2733

28-
fun stop() {
34+
override fun stop() {
2935
synchronized(lock) {
3036
active?.cancel()
3137
active = null

0 commit comments

Comments
 (0)