Skip to content

Commit 9ca52ce

Browse files
authored
[codex] improve iOS realtime talk mode (#86355)
Merged via squash. Prepared head SHA: 3f5aedb Co-authored-by: ngutman <1540134+ngutman@users.noreply.github.com> Co-authored-by: ngutman <1540134+ngutman@users.noreply.github.com> Reviewed-by: @ngutman
1 parent 5e94469 commit 9ca52ce

15 files changed

Lines changed: 2301 additions & 247 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
1010
- Control UI: add an ephemeral Activity tab for sanitized live tool activity summaries without persisting raw telemetry. Fixes #12831. Thanks @BunsDev.
1111
- Build: include `ui:build` in the `full` and `ciArtifacts` profiles of `scripts/build-all.mjs` so `pnpm build` always rebuilds `dist/control-ui` after `tsdown` cleans `dist`, removing the second-command requirement and the missing-asset failure mode for source/runtime installs and CI artifact uploads. (#85206)
1212
- Migrate: import supported Hermes, OpenCode, and Codex auth credentials into OpenClaw auth profiles when credential migration is selected, with explicit opt-out and non-interactive controls. (#85667) Thanks @fuller-stack-dev.
13+
- iOS: improve Talk mode with direct realtime voice sessions, compact toolbar status, and responsive voice waveform feedback. (#86355) Thanks @ngutman.
1314

1415
### Fixes
1516

apps/ios/Sources/HomeToolbar.swift

Lines changed: 357 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,363 @@ struct HomeToolbar: View {
7575
}
7676
}
7777

78+
struct TalkToolbarTray: View {
79+
var brighten: Bool
80+
var tint: Color
81+
var statusText: String
82+
var agentName: String
83+
var micLevel: Double
84+
var isListening: Bool
85+
var isSpeaking: Bool
86+
var isUserSpeechDetected: Bool
87+
var permissionState: TalkGatewayPermissionState
88+
var onEnableTalk: () -> Void
89+
var onStopTalk: () -> Void
90+
91+
@Environment(\.colorSchemeContrast) private var contrast
92+
93+
private var state: TalkToolbarTrayState {
94+
TalkToolbarTrayState(
95+
statusText: self.statusText,
96+
isListening: self.isListening,
97+
isSpeaking: self.isSpeaking,
98+
isUserSpeechDetected: self.isUserSpeechDetected,
99+
permissionState: self.permissionState)
100+
}
101+
102+
var body: some View {
103+
HStack(spacing: 12) {
104+
ZStack {
105+
Circle()
106+
.fill(self.tint.opacity(self.state.iconFillOpacity))
107+
.frame(width: 36, height: 36)
108+
Image(systemName: self.state.systemImage)
109+
.font(.system(size: 15, weight: .semibold))
110+
.foregroundStyle(self.state.iconColor(tint: self.tint))
111+
}
112+
113+
VStack(alignment: .leading, spacing: 5) {
114+
HStack(spacing: 8) {
115+
Text(self.state.title)
116+
.font(.subheadline.weight(.semibold))
117+
.foregroundStyle(.primary)
118+
.lineLimit(1)
119+
120+
if self.state.showsProgress {
121+
ProgressView()
122+
.controlSize(.mini)
123+
}
124+
}
125+
126+
HStack(spacing: 8) {
127+
TalkWaveformView(
128+
mode: self.state.waveformMode(micLevel: self.micLevel),
129+
tint: self.state.waveformTint(tint: self.tint))
130+
.frame(width: 84, height: 18)
131+
.accessibilityHidden(true)
132+
133+
Text(self.subtitle)
134+
.font(.caption.weight(.medium))
135+
.foregroundStyle(.secondary)
136+
.lineLimit(1)
137+
}
138+
}
139+
140+
Spacer(minLength: 0)
141+
142+
switch self.state.action {
143+
case .enable:
144+
Button(action: self.onEnableTalk) {
145+
Label("Enable Talk", systemImage: "key.fill")
146+
.labelStyle(.titleAndIcon)
147+
}
148+
.font(.caption.weight(.semibold))
149+
.buttonStyle(.borderedProminent)
150+
.controlSize(.small)
151+
case .stop:
152+
Button(action: self.onStopTalk) {
153+
Image(systemName: "xmark")
154+
.font(.system(size: 13, weight: .bold))
155+
.frame(width: 28, height: 28)
156+
}
157+
.buttonStyle(.plain)
158+
.background {
159+
Circle()
160+
.fill(Color.black.opacity(self.brighten ? 0.10 : 0.18))
161+
.overlay {
162+
Circle()
163+
.strokeBorder(
164+
.white.opacity(self.contrast == .increased ? 0.42 : 0.16),
165+
lineWidth: self.contrast == .increased ? 1.0 : 0.6)
166+
}
167+
}
168+
.accessibilityLabel("Stop Talk")
169+
case .none:
170+
EmptyView()
171+
}
172+
}
173+
.padding(.horizontal, 14)
174+
.padding(.vertical, 12)
175+
.frame(maxWidth: .infinity)
176+
.background(.ultraThinMaterial)
177+
.overlay(alignment: .top) {
178+
Rectangle()
179+
.fill(.white.opacity(self.contrast == .increased ? 0.46 : (self.brighten ? 0.18 : 0.12)))
180+
.frame(height: self.contrast == .increased ? 1.0 : 0.6)
181+
.allowsHitTesting(false)
182+
}
183+
.overlay(alignment: .bottom) {
184+
LinearGradient(
185+
colors: [
186+
self.tint.opacity(self.brighten ? 0.12 : 0.16),
187+
.clear,
188+
],
189+
startPoint: .leading,
190+
endPoint: .trailing)
191+
.frame(height: 1)
192+
.allowsHitTesting(false)
193+
}
194+
.accessibilityElement(children: .combine)
195+
.accessibilityLabel("Talk Mode")
196+
.accessibilityValue("\(self.state.title), \(self.subtitle)")
197+
}
198+
199+
private var subtitle: String {
200+
let trimmedAgent = self.agentName.trimmingCharacters(in: .whitespacesAndNewlines)
201+
if self.state.prefersPermissionCopy {
202+
return "Gateway approval needed"
203+
}
204+
if !trimmedAgent.isEmpty {
205+
return trimmedAgent
206+
}
207+
return "OpenClaw"
208+
}
209+
}
210+
211+
private enum TalkToolbarTrayAction {
212+
case none
213+
case enable
214+
case stop
215+
}
216+
217+
private enum TalkWaveformMode: Equatable {
218+
case level(Double)
219+
case inputSpeech
220+
case speaking
221+
case indeterminate
222+
case still
223+
}
224+
225+
private struct TalkToolbarTrayState: Equatable {
226+
let statusText: String
227+
let isListening: Bool
228+
let isSpeaking: Bool
229+
let isUserSpeechDetected: Bool
230+
let permissionState: TalkGatewayPermissionState
231+
232+
private var normalizedStatus: String {
233+
self.statusText.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
234+
}
235+
236+
var title: String {
237+
switch self.permissionState {
238+
case .missingScope, .requestFailed:
239+
return "Gateway permission required"
240+
case .requestingUpgrade:
241+
return "Requesting approval"
242+
case .upgradeRequested:
243+
return "Approval requested"
244+
default:
245+
break
246+
}
247+
248+
if self.isSpeaking { return "Speaking" }
249+
if self.isListening { return "Listening" }
250+
if self.normalizedStatus.contains("connecting") { return "Connecting" }
251+
if self.normalizedStatus.contains("thinking") { return "Asking OpenClaw" }
252+
if self.normalizedStatus == "ready" { return "Ready to talk" }
253+
if self.normalizedStatus.isEmpty || self.normalizedStatus == "off" { return "Talk" }
254+
return self.statusText
255+
}
256+
257+
var systemImage: String {
258+
switch self.permissionState {
259+
case .missingScope, .requestFailed:
260+
return "key.fill"
261+
case .requestingUpgrade:
262+
return "paperplane.fill"
263+
case .upgradeRequested:
264+
return "hourglass"
265+
default:
266+
break
267+
}
268+
269+
if self.isSpeaking { return "speaker.wave.2.fill" }
270+
if self.isListening { return "mic.fill" }
271+
if self.normalizedStatus.contains("thinking") { return "sparkles" }
272+
if self.normalizedStatus.contains("connecting") { return "dot.radiowaves.left.and.right" }
273+
return "waveform"
274+
}
275+
276+
var action: TalkToolbarTrayAction {
277+
switch self.permissionState {
278+
case .missingScope, .requestFailed:
279+
.enable
280+
case .requestingUpgrade, .upgradeRequested:
281+
.none
282+
default:
283+
.stop
284+
}
285+
}
286+
287+
var showsProgress: Bool {
288+
switch self.permissionState {
289+
case .requestingUpgrade, .upgradeRequested:
290+
true
291+
default:
292+
self.normalizedStatus.contains("connecting") || self.normalizedStatus.contains("thinking")
293+
}
294+
}
295+
296+
var prefersPermissionCopy: Bool {
297+
switch self.permissionState {
298+
case .missingScope, .requestingUpgrade, .upgradeRequested, .requestFailed:
299+
true
300+
default:
301+
false
302+
}
303+
}
304+
305+
var iconFillOpacity: Double {
306+
self.prefersPermissionCopy ? 0.18 : 0.24
307+
}
308+
309+
func iconColor(tint: Color) -> Color {
310+
switch self.permissionState {
311+
case .requestFailed:
312+
.red
313+
case .missingScope, .requestingUpgrade, .upgradeRequested:
314+
.orange
315+
default:
316+
tint
317+
}
318+
}
319+
320+
func waveformTint(tint: Color) -> Color {
321+
switch self.permissionState {
322+
case .requestFailed:
323+
.red
324+
case .missingScope, .requestingUpgrade, .upgradeRequested:
325+
.orange
326+
default:
327+
tint
328+
}
329+
}
330+
331+
func waveformMode(micLevel: Double) -> TalkWaveformMode {
332+
switch self.permissionState {
333+
case .requestingUpgrade, .upgradeRequested:
334+
return .indeterminate
335+
case .missingScope, .requestFailed:
336+
return .still
337+
default:
338+
break
339+
}
340+
341+
if self.isSpeaking {
342+
return .speaking
343+
}
344+
if self.isListening, self.isUserSpeechDetected {
345+
return .inputSpeech
346+
}
347+
if self.isListening {
348+
return .level(micLevel)
349+
}
350+
if self.normalizedStatus.contains("connecting") || self.normalizedStatus.contains("thinking") {
351+
return .indeterminate
352+
}
353+
return .still
354+
}
355+
}
356+
357+
private struct TalkWaveformView: View {
358+
var mode: TalkWaveformMode
359+
var tint: Color
360+
361+
@Environment(\.accessibilityReduceMotion) private var reduceMotion
362+
363+
private let barCount = 14
364+
365+
var body: some View {
366+
TimelineView(.periodic(from: .now, by: 1.0 / 24.0)) { timeline in
367+
HStack(alignment: .center, spacing: 3) {
368+
ForEach(0..<self.barCount, id: \.self) { index in
369+
Capsule(style: .continuous)
370+
.fill(self.tint.opacity(self.opacity(for: index)))
371+
.frame(width: 3, height: self.height(for: index, date: timeline.date))
372+
}
373+
}
374+
.frame(maxHeight: .infinity)
375+
}
376+
}
377+
378+
private func height(for index: Int, date: Date) -> CGFloat {
379+
let minimum: Double = 4
380+
let maximum: Double = 18
381+
let amplitude = self.amplitude(for: index, date: date)
382+
return CGFloat(minimum + ((maximum - minimum) * amplitude))
383+
}
384+
385+
private func opacity(for index: Int) -> Double {
386+
switch self.mode {
387+
case .still:
388+
index == self.barCount / 2 ? 0.64 : 0.32
389+
default:
390+
0.78
391+
}
392+
}
393+
394+
private func amplitude(for index: Int, date: Date) -> Double {
395+
if self.reduceMotion {
396+
switch self.mode {
397+
case let .level(level):
398+
return min(max(level, 0.10), 1.0)
399+
case .inputSpeech:
400+
return 0.72
401+
case .speaking:
402+
return 0.62
403+
case .indeterminate:
404+
return 0.34
405+
case .still:
406+
return 0.18
407+
}
408+
}
409+
410+
let t = date.timeIntervalSinceReferenceDate
411+
let phase = Double(index) * 0.52
412+
switch self.mode {
413+
case let .level(level):
414+
let clamped = min(max(level, 0), 1)
415+
let shaped = 0.12 + (0.88 * clamped)
416+
let variation = 0.72 + (0.28 * sin((t * 12.0) + phase))
417+
return min(max(shaped * variation, 0.10), 1.0)
418+
case .inputSpeech:
419+
let primary = 0.5 + (0.5 * sin((t * 14.0) + phase))
420+
let secondary = 0.5 + (0.5 * sin((t * 5.0) + (phase * 1.35)))
421+
return min(max(0.16 + (0.60 * primary) + (0.24 * secondary), 0.14), 1.0)
422+
case .speaking:
423+
let wave = 0.5 + (0.5 * sin((t * 7.5) + phase))
424+
let secondary = 0.5 + (0.5 * sin((t * 3.0) + (phase * 0.7)))
425+
return min(max(0.18 + (0.58 * wave) + (0.24 * secondary), 0.12), 1.0)
426+
case .indeterminate:
427+
let center = (sin((t * 3.2) + phase) + 1) / 2
428+
return 0.16 + (0.42 * center)
429+
case .still:
430+
return index == self.barCount / 2 ? 0.32 : 0.16
431+
}
432+
}
433+
}
434+
78435
private struct HomeToolbarStatusButton: View {
79436
@Environment(\.scenePhase) private var scenePhase
80437
@Environment(\.accessibilityReduceMotion) private var reduceMotion

0 commit comments

Comments
 (0)