Skip to content

Commit fa91639

Browse files
committed
feat(app): voice conversation
1 parent a61a7ca commit fa91639

File tree

1 file changed

+70
-2
lines changed

1 file changed

+70
-2
lines changed
Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,78 @@
1-
import { useMicVAD } from '@ricky0123/vad-react'
1+
import type { Message } from '@xsai/shared-chat'
2+
3+
import { useMicVAD, utils } from '@ricky0123/vad-react'
4+
import { generateSpeech } from '@xsai/generate-speech'
5+
import { generateText } from '@xsai/generate-text'
6+
import { generateTranscription } from '@xsai/generate-transcription'
7+
import { useDebouncedState } from 'foxact/use-debounced-state'
8+
import { useEffect, useRef } from 'react'
9+
10+
import { useSetAudioBuffer } from '~/context/audio-buffer'
11+
import { useAudioContext } from '~/context/audio-context'
12+
import { useMessages } from '~/hooks/use-messages'
13+
import { useLLMProvider, useSTTProvider, useTTSProvider } from '~/hooks/use-providers'
214

315
export const GalateaVAD = () => {
16+
const [llmProvider] = useLLMProvider()
17+
const [sttProvider] = useSTTProvider()
18+
const [ttsProvider] = useTTSProvider()
19+
20+
const [msg, setMsg] = useMessages()
21+
const msgRef = useRef<Message[]>(msg)
22+
23+
const setAudioBuffer = useSetAudioBuffer()
24+
const audioContext = useAudioContext()
25+
26+
const [file, setFile] = useDebouncedState<Blob | undefined>(undefined, 1000)
27+
const prevFile = useRef<Blob | undefined>(undefined)
28+
429
useMicVAD({
530
model: 'v5',
6-
onSpeechEnd: () => console.warn('User started talking'),
31+
onSpeechEnd: (audio) => {
32+
if (import.meta.env.DEV)
33+
console.warn('onSpeechEnd')
34+
35+
setFile(new Blob([utils.encodeWAV(audio)], { type: 'audio/wav' }))
36+
},
37+
onSpeechStart: () => import.meta.env.DEV && console.warn('onSpeechStart'),
38+
startOnLoad: true,
739
})
840

41+
useEffect(() => {
42+
msgRef.current = msg
43+
}, [msg])
44+
45+
useEffect(() => {
46+
if (!file || file === prevFile.current)
47+
return
48+
49+
prevFile.current = file
50+
51+
const processAudio = async () => {
52+
const { text: content } = await generateTranscription({ ...sttProvider, file })
53+
if (import.meta.env.DEV)
54+
console.warn('Transcription:', content)
55+
56+
const { messages, text: input } = await generateText({
57+
...llmProvider,
58+
messages: [
59+
...msgRef.current,
60+
{ content, role: 'user' },
61+
],
62+
})
63+
if (import.meta.env.DEV)
64+
console.warn('Response:', input)
65+
66+
if (input != null) {
67+
setMsg(messages)
68+
const arrayBuffer = await generateSpeech({ ...ttsProvider, input })
69+
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer)
70+
setAudioBuffer(audioBuffer)
71+
}
72+
}
73+
74+
void processAudio()
75+
}, [file, llmProvider, sttProvider, ttsProvider, audioContext, setMsg, setAudioBuffer])
76+
977
return null
1078
}

0 commit comments

Comments
 (0)