|
1 | | -import { useMicVAD } from '@ricky0123/vad-react' |
| 1 | +import type { Message } from '@xsai/shared-chat' |
| 2 | + |
| 3 | +import { useMicVAD, utils } from '@ricky0123/vad-react' |
| 4 | +import { generateSpeech } from '@xsai/generate-speech' |
| 5 | +import { generateText } from '@xsai/generate-text' |
| 6 | +import { generateTranscription } from '@xsai/generate-transcription' |
| 7 | +import { useDebouncedState } from 'foxact/use-debounced-state' |
| 8 | +import { useEffect, useRef } from 'react' |
| 9 | + |
| 10 | +import { useSetAudioBuffer } from '~/context/audio-buffer' |
| 11 | +import { useAudioContext } from '~/context/audio-context' |
| 12 | +import { useMessages } from '~/hooks/use-messages' |
| 13 | +import { useLLMProvider, useSTTProvider, useTTSProvider } from '~/hooks/use-providers' |
2 | 14 |
|
3 | 15 | export const GalateaVAD = () => { |
| 16 | + const [llmProvider] = useLLMProvider() |
| 17 | + const [sttProvider] = useSTTProvider() |
| 18 | + const [ttsProvider] = useTTSProvider() |
| 19 | + |
| 20 | + const [msg, setMsg] = useMessages() |
| 21 | + const msgRef = useRef<Message[]>(msg) |
| 22 | + |
| 23 | + const setAudioBuffer = useSetAudioBuffer() |
| 24 | + const audioContext = useAudioContext() |
| 25 | + |
| 26 | + const [file, setFile] = useDebouncedState<Blob | undefined>(undefined, 1000) |
| 27 | + const prevFile = useRef<Blob | undefined>(undefined) |
| 28 | + |
4 | 29 | useMicVAD({ |
5 | 30 | model: 'v5', |
6 | | - onSpeechEnd: () => console.warn('User started talking'), |
| 31 | + onSpeechEnd: (audio) => { |
| 32 | + if (import.meta.env.DEV) |
| 33 | + console.warn('onSpeechEnd') |
| 34 | + |
| 35 | + setFile(new Blob([utils.encodeWAV(audio)], { type: 'audio/wav' })) |
| 36 | + }, |
| 37 | + onSpeechStart: () => import.meta.env.DEV && console.warn('onSpeechStart'), |
| 38 | + startOnLoad: true, |
7 | 39 | }) |
8 | 40 |
|
| 41 | + useEffect(() => { |
| 42 | + msgRef.current = msg |
| 43 | + }, [msg]) |
| 44 | + |
| 45 | + useEffect(() => { |
| 46 | + if (!file || file === prevFile.current) |
| 47 | + return |
| 48 | + |
| 49 | + prevFile.current = file |
| 50 | + |
| 51 | + const processAudio = async () => { |
| 52 | + const { text: content } = await generateTranscription({ ...sttProvider, file }) |
| 53 | + if (import.meta.env.DEV) |
| 54 | + console.warn('Transcription:', content) |
| 55 | + |
| 56 | + const { messages, text: input } = await generateText({ |
| 57 | + ...llmProvider, |
| 58 | + messages: [ |
| 59 | + ...msgRef.current, |
| 60 | + { content, role: 'user' }, |
| 61 | + ], |
| 62 | + }) |
| 63 | + if (import.meta.env.DEV) |
| 64 | + console.warn('Response:', input) |
| 65 | + |
| 66 | + if (input != null) { |
| 67 | + setMsg(messages) |
| 68 | + const arrayBuffer = await generateSpeech({ ...ttsProvider, input }) |
| 69 | + const audioBuffer = await audioContext.decodeAudioData(arrayBuffer) |
| 70 | + setAudioBuffer(audioBuffer) |
| 71 | + } |
| 72 | + } |
| 73 | + |
| 74 | + void processAudio() |
| 75 | + }, [file, llmProvider, sttProvider, ttsProvider, audioContext, setMsg, setAudioBuffer]) |
| 76 | + |
9 | 77 | return null |
10 | 78 | } |
0 commit comments