Skip to content

Commit cca672b

Browse files
ayangwebmedclRainyNight9
authored
feat: text to speech now powered by LLM (#750)
* feat: support text to speech * chore: receive bytes stream * chore: update testing code * feat: mp3 paly * update * docs: update changelog * update * update * update --------- Co-authored-by: medcl <m@medcl.net> Co-authored-by: rain9 <15911122312@163.com>
1 parent 5b27488 commit cca672b

11 files changed

Lines changed: 352 additions & 99 deletions

File tree

docs/content.en/docs/release-notes/_index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Information about release notes of Coco Server is provided here.
1515

1616
- feat: file search using spotlight #705
1717
- feat: voice input support in both search and chat modes #732
18+
- feat: text to speech now powered by LLM #750
1819

1920
### 🐛 Bug fix
2021

src-tauri/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ pub fn run() {
169169
extension::built_in::file_search::get_file_system_config,
170170
#[cfg(target_os = "macos")]
171171
extension::built_in::file_search::set_file_system_config,
172+
server::synthesize::synthesize,
172173
])
173174
.setup(|app| {
174175
#[cfg(target_os = "macos")]

src-tauri/src/server/http_client.rs

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub(crate) fn new_reqwest_http_client(accept_invalid_certs: bool) -> Client {
1010
Client::builder()
1111
.read_timeout(Duration::from_secs(3)) // Set a timeout of 3 second
1212
.connect_timeout(Duration::from_secs(3)) // Set a timeout of 3 second
13-
.timeout(Duration::from_secs(10)) // Set a timeout of 10 seconds
13+
.timeout(Duration::from_secs(5 * 60)) // Set a timeout of 5 minute
1414
.danger_accept_invalid_certs(accept_invalid_certs) // allow self-signed certificates
1515
.build()
1616
.expect("Failed to build client")
@@ -106,10 +106,8 @@ impl HttpClient {
106106
}
107107

108108
if let Some(params) = query_params {
109-
let query: Vec<(&str, &str)> = params
110-
.iter()
111-
.filter_map(|s| s.split_once('='))
112-
.collect();
109+
let query: Vec<(&str, &str)> =
110+
params.iter().filter_map(|s| s.split_once('=')).collect();
113111
request_builder = request_builder.query(&query);
114112
}
115113

@@ -121,7 +119,6 @@ impl HttpClient {
121119
request_builder
122120
}
123121

124-
125122
pub async fn send_request(
126123
server_id: &str,
127124
method: Method,
@@ -171,8 +168,7 @@ impl HttpClient {
171168
path: &str,
172169
query_params: Option<Vec<String>>,
173170
) -> Result<reqwest::Response, String> {
174-
HttpClient::send_request(server_id, Method::GET, path, None, query_params,
175-
None).await
171+
HttpClient::send_request(server_id, Method::GET, path, None, query_params, None).await
176172
}
177173

178174
// Convenience method for POST requests
@@ -200,7 +196,7 @@ impl HttpClient {
200196
query_params,
201197
body,
202198
)
203-
.await
199+
.await
204200
}
205201

206202
// Convenience method for PUT requests
@@ -220,7 +216,7 @@ impl HttpClient {
220216
query_params,
221217
body,
222218
)
223-
.await
219+
.await
224220
}
225221

226222
// Convenience method for DELETE requests
@@ -239,6 +235,6 @@ impl HttpClient {
239235
query_params,
240236
None,
241237
)
242-
.await
238+
.await
243239
}
244240
}

src-tauri/src/server/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ pub mod http_client;
88
pub mod profile;
99
pub mod search;
1010
pub mod servers;
11+
pub mod synthesize;
1112
pub mod system_settings;
1213
pub mod transcription;
1314
pub mod websocket;

src-tauri/src/server/synthesize.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
use crate::server::http_client::HttpClient;
2+
use futures_util::StreamExt;
3+
use http::Method;
4+
use serde_json::json;
5+
use tauri::{command, AppHandle, Emitter, Runtime};
6+
7+
#[command]
8+
pub async fn synthesize<R: Runtime>(
9+
app_handle: AppHandle<R>,
10+
client_id: String,
11+
server_id: String,
12+
voice: String,
13+
content: String,
14+
) -> Result<(), String> {
15+
let body = json!({
16+
"voice": voice,
17+
"content": content,
18+
})
19+
.to_string();
20+
21+
let response = HttpClient::send_request(
22+
server_id.as_str(),
23+
Method::POST,
24+
"/services/audio/synthesize",
25+
None,
26+
None,
27+
Some(reqwest::Body::from(body.to_string())),
28+
)
29+
.await?;
30+
31+
log::info!("Synthesize response status: {}", response.status());
32+
33+
if response.status() == 429 {
34+
return Ok(());
35+
}
36+
37+
if !response.status().is_success() {
38+
return Err(format!("Request Failed: {}", response.status()));
39+
}
40+
41+
let mut stream = response.bytes_stream();
42+
while let Some(chunk) = stream.next().await {
43+
match chunk {
44+
Ok(bytes) => {
45+
if let Err(err) = app_handle.emit(&client_id, bytes.to_vec()) {
46+
log::error!("Emit error: {:?}", err);
47+
}
48+
}
49+
Err(e) => {
50+
log::error!("Stream error: {:?}", e);
51+
break;
52+
}
53+
}
54+
}
55+
56+
Ok(())
57+
}

src/components/Assistant/Chat.tsx

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ import type { Chat, StartPage } from "@/types/chat";
2222
import PrevSuggestion from "@/components/ChatMessage/PrevSuggestion";
2323
import { useAppStore } from "@/stores/appStore";
2424
import { useSearchStore } from "@/stores/searchStore";
25-
// import ReadAloud from "./ReadAloud";
2625
import { useAuthStore } from "@/stores/authStore";
2726
import Splash from "./Splash";
27+
import Synthesize from "./Synthesize";
2828

2929
interface ChatAIProps {
3030
isSearchActive?: boolean;
@@ -74,8 +74,7 @@ const ChatAI = memo(
7474
clearChat: clearChat,
7575
}));
7676

77-
const { curChatEnd, setCurChatEnd } =
78-
useChatStore();
77+
const { curChatEnd, setCurChatEnd } = useChatStore();
7978

8079
const isTauri = useAppStore((state) => state.isTauri);
8180

@@ -84,9 +83,8 @@ const ChatAI = memo(
8483
return state.setIsCurrentLogin;
8584
});
8685

87-
const visibleStartPage = useConnectStore((state) => {
88-
return state.visibleStartPage;
89-
});
86+
const { currentService, visibleStartPage } = useConnectStore();
87+
const { synthesizeItem } = useChatStore();
9088

9189
const addError = useAppStore.getState().addError;
9290

@@ -104,9 +102,6 @@ const ChatAI = memo(
104102
const askAiServerId = useSearchStore((state) => {
105103
return state.askAiServerId;
106104
});
107-
const currentService = useConnectStore((state) => {
108-
return state.currentService;
109-
});
110105

111106
useEffect(() => {
112107
activeChatProp && setActiveChat(activeChatProp);
@@ -186,7 +181,7 @@ const ChatAI = memo(
186181
isDeepThinkActive,
187182
isMCPActive,
188183
changeInput,
189-
showChatHistory,
184+
showChatHistory
190185
);
191186

192187
const { dealMsg } = useMessageHandler(
@@ -407,7 +402,7 @@ const ChatAI = memo(
407402
<PrevSuggestion sendMessage={init} />
408403
)}
409404

410-
{/* <ReadAloud /> */}
405+
{synthesizeItem && <Synthesize />}
411406
</div>
412407
</>
413408
);
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import { useEffect, useMemo, useRef } from "react";
2-
import { useReactive } from "ahooks";
1+
import { useEffect, useRef } from "react";
32
import dayjs from "dayjs";
43
import durationPlugin from "dayjs/plugin/duration";
54

@@ -16,77 +15,83 @@ import forwardLight from "@/assets/images/ReadAloud/forward-light.png";
1615
import forwardDark from "@/assets/images/ReadAloud/forward-dark.png";
1716
import closeLight from "@/assets/images/ReadAloud/close-light.png";
1817
import closeDark from "@/assets/images/ReadAloud/close-dark.png";
18+
import { useConnectStore } from "@/stores/connectStore";
19+
import platformAdapter from "@/utils/platformAdapter";
20+
import { useStreamAudio } from "@/hooks/useStreamAudio";
21+
import { nanoid } from "nanoid";
22+
import { useChatStore } from "@/stores/chatStore";
1923

2024
dayjs.extend(durationPlugin);
2125

22-
interface State {
23-
loading: boolean;
24-
playing: boolean;
25-
totalDuration: number;
26-
currentDuration: number;
27-
}
28-
29-
const ReadAloud = () => {
30-
const isDark = useThemeStore((state) => state.isDark);
31-
const state = useReactive<State>({
32-
loading: false,
33-
playing: true,
34-
totalDuration: 300,
35-
currentDuration: 0,
26+
const Synthesize = () => {
27+
const { isDark } = useThemeStore();
28+
const { currentService } = useConnectStore();
29+
const { synthesizeItem, setSynthesizeItem } = useChatStore();
30+
const clientIdRef = useRef(nanoid());
31+
32+
const {
33+
loading,
34+
playing,
35+
currentTime,
36+
totalTime,
37+
audioRef,
38+
audioUrl,
39+
initMediaSource,
40+
toggle,
41+
seek,
42+
appendBuffer,
43+
onCanplay,
44+
onTimeupdate,
45+
onEnded,
46+
} = useStreamAudio({
47+
onSourceopen() {
48+
return platformAdapter.invokeBackend("synthesize", {
49+
clientId: clientIdRef.current,
50+
serverId: currentService.id,
51+
content: synthesizeItem?.content,
52+
voice: "longwan_v2",
53+
});
54+
},
3655
});
37-
const timerRef = useRef<ReturnType<typeof setTimeout>>();
38-
39-
const formatTime = useMemo(() => {
40-
return dayjs.duration(state.currentDuration * 1000).format("mm:ss");
41-
}, [state.currentDuration]);
4256

4357
useEffect(() => {
44-
if (state.playing && state.currentDuration >= state.totalDuration) {
45-
state.currentDuration = 0;
46-
}
47-
48-
changeCurrentDuration();
49-
}, [state.playing]);
50-
51-
const changeCurrentDuration = (duration = state.currentDuration) => {
52-
clearTimeout(timerRef.current);
53-
54-
let nextDuration = duration;
58+
const id = nanoid();
5559

56-
if (duration < 0) {
57-
nextDuration = 0;
58-
}
60+
clientIdRef.current = `synthesize-${id}`;
5961

60-
if (duration >= state.totalDuration) {
61-
state.currentDuration = state.totalDuration;
62+
initMediaSource();
6263

63-
state.playing = false;
64-
}
64+
const unlisten = platformAdapter.listenEvent(
65+
`synthesize-${id}`,
66+
({ payload }) => {
67+
appendBuffer(new Uint8Array(payload));
68+
}
69+
);
6570

66-
if (!state.playing) return;
67-
68-
state.currentDuration = nextDuration;
69-
70-
timerRef.current = setTimeout(() => {
71-
changeCurrentDuration(duration + 1);
72-
}, 1000);
73-
};
71+
return () => {
72+
unlisten.then((unmount) => unmount());
73+
};
74+
}, [synthesizeItem?.id]);
7475

7576
return (
7677
<div className="fixed top-[60px] left-1/2 z-1000 w-[200px] h-12 px-4 flex items-center justify-between -translate-x-1/2 border rounded-lg text-[#333] dark:text-[#D8D8D8] bg-white dark:bg-black dark:border-[#272828] shadow-[0_4px_8px_rgba(0,0,0,0.2)] dark:shadow-[0_4px_8px_rgba(255,255,255,0.15)]">
78+
<audio
79+
ref={audioRef}
80+
src={audioUrl}
81+
onCanPlay={onCanplay}
82+
onTimeUpdate={onTimeupdate}
83+
onEnded={onEnded}
84+
/>
85+
7786
<div className="flex items-center gap-2">
78-
{state.loading ? (
87+
{loading ? (
7988
<img
8089
src={isDark ? loadingDark : loadingLight}
8190
className="size-4 animate-spin"
8291
/>
8392
) : (
84-
<div
85-
onClick={() => {
86-
state.playing = !state.playing;
87-
}}
88-
>
89-
{state.playing ? (
93+
<div onClick={toggle}>
94+
{playing ? (
9095
<img
9196
src={isDark ? playDark : playLight}
9297
className="size-4 cursor-pointer"
@@ -100,24 +105,28 @@ const ReadAloud = () => {
100105
</div>
101106
)}
102107

103-
<span className="text-sm">{formatTime}</span>
108+
{!loading && (
109+
<span className="text-sm">
110+
{dayjs.duration(currentTime * 1000).format("mm:ss")}
111+
</span>
112+
)}
104113
</div>
105114
<div className="flex gap-3">
106-
{!state.loading && (
115+
{!loading && totalTime !== Infinity && (
107116
<>
108117
<img
109118
src={isDark ? backDark : backLight}
110119
className="size-4 cursor-pointer"
111120
onClick={() => {
112-
changeCurrentDuration(state.currentDuration - 15);
121+
seek(currentTime - 15);
113122
}}
114123
/>
115124

116125
<img
117126
src={isDark ? forwardDark : forwardLight}
118127
className="size-4 cursor-pointer"
119128
onClick={() => {
120-
changeCurrentDuration(state.currentDuration + 15);
129+
seek(currentTime + 15);
121130
}}
122131
/>
123132
</>
@@ -126,10 +135,13 @@ const ReadAloud = () => {
126135
<img
127136
src={isDark ? closeDark : closeLight}
128137
className="size-4 cursor-pointer"
138+
onClick={() => {
139+
setSynthesizeItem(void 0);
140+
}}
129141
/>
130142
</div>
131143
</div>
132144
);
133145
};
134146

135-
export default ReadAloud;
147+
export default Synthesize;

0 commit comments

Comments
 (0)