Skip to content

Commit 4b1193a

Browse files
authored
Merge c5106ed into 20d5946
2 parents 20d5946 + c5106ed commit 4b1193a

4 files changed

Lines changed: 255 additions & 57 deletions

File tree

nvdaHelper/localWin10/oneCoreSpeech.cpp

Lines changed: 196 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,25 @@
22
Code for C dll bridge to Windows OneCore voices.
33
This file is a part of the NVDA project.
44
URL: http://www.nvaccess.org/
5-
Copyright 2016-2020 Tyler Spivey, NV Access Limited, Leonard de Ruijter.
6-
This program is free software: you can redistribute it and/or modify
7-
it under the terms of the GNU General Public License version 2.0, as published by
8-
the Free Software Foundation.
9-
This program is distributed in the hope that it will be useful,
10-
but WITHOUT ANY WARRANTY; without even the implied warranty of
11-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5+
Copyright 2016-2022 Tyler Spivey, NV Access Limited, Leonard de Ruijter.
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU General Public License version 2.0, as published by
8+
the Free Software Foundation.
9+
This program is distributed in the hope that it will be useful,
10+
but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
1212
This license can be found at:
1313
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
1414
*/
1515

1616
#include <string>
17+
#include <iostream>
18+
#include <vector>
19+
#include <algorithm>
20+
#include <atomic>
21+
#include <thread>
22+
#include <mutex>
23+
#include <condition_variable>
1724
#include <winrt/Windows.Media.SpeechSynthesis.h>
1825
#include <winrt/Windows.Storage.Streams.h>
1926
#include <winrt/Windows.Foundation.h>
@@ -30,6 +37,130 @@ using namespace winrt::Windows::Media;
3037
using namespace winrt::Windows::Foundation::Collections;
3138
using winrt::Windows::Foundation::Metadata::ApiInformation;
3239

40+
std::recursive_mutex InstanceManager::_instanceStateMutex;
41+
std::vector<OcSpeech*> InstanceManager::_terminatedInstances;
42+
std::unique_ptr<OcSpeech> InstanceManager::_instance;
43+
std::atomic<InstanceState> InstanceManager::_instanceState = InstanceState::notInitialized;
44+
std::condition_variable_any InstanceManager::_readyForInitialization;
45+
std::atomic_int SpeakThreadGuard::_speechThreads = 0;
46+
47+
SpeakThreadGuard::SpeakThreadGuard() {
48+
/*
49+
When initialized, increases the count of active speech threads
50+
waiting on a callback.
51+
When exiting the scope of initialization, decreases the count
52+
of active speech threads and deletes the OneCore instance if it has
53+
been terminated and no speech callbacks are waiting.
54+
*/
55+
++_speechThreads;
56+
}
57+
58+
bool SpeakThreadGuard::areCallbacksPending() {
59+
/*
60+
Checks if any speak threads are active, i.e. with callbacks pending.
61+
*/
62+
return _speechThreads != 0;
63+
}
64+
65+
SpeakThreadGuard::~SpeakThreadGuard() {
66+
/*
67+
When exiting the scope of initialization, decreases the count
68+
of active speech threads and deletes the OneCore instance if it has
69+
been terminated and no speech callbacks are waiting.
70+
*/
71+
--_speechThreads;
72+
if (_speechThreads < 0) {
73+
throw runtime_error(
74+
"Negative callback count."
75+
"decreasePendingCount has been called too many times."
76+
);
77+
}
78+
InstanceManager::deleteInstanceIfTerminatedAndReady();
79+
}
80+
81+
void InstanceManager::deleteInstanceIfTerminatedAndReady() {
82+
/*
83+
If the instance is terminated and no speak callbacks are pending,
84+
delete the instance and notify any waiting threads,
85+
such as the instance initializer.
86+
*/
87+
std::lock_guard g(_instanceStateMutex);
88+
if (
89+
_instanceState == InstanceState::terminated
90+
&& !SpeakThreadGuard::areCallbacksPending()
91+
) {
92+
_instance.reset();
93+
_instanceState = InstanceState::notInitialized;
94+
_readyForInitialization.notify_all();
95+
}
96+
}
97+
98+
void InstanceManager::waitUntilReadyForInitialization() {
99+
/* Wait for a signal that there is no active or terminated instance. */
100+
std::unique_lock lock(_instanceStateMutex);
101+
_readyForInitialization.wait(lock, []{
102+
return !_instance && _instanceState == InstanceState::notInitialized;
103+
});
104+
}
105+
106+
OcSpeech* InstanceManager::getActiveInstance(OcSpeech* token) {
107+
/* Throw a runtime error if no instance is active */
108+
_assertInstanceActive(token);
109+
return _instance.get();
110+
}
111+
112+
void InstanceManager::_assertInstanceActive(OcSpeech* token) {
113+
if (_instanceState != InstanceState::active) {
114+
throw runtime_error("Supplied OneCore token is not active");
115+
}
116+
if (_instance.get() != token) {
117+
throw runtime_error("Supplied OneCore instance token does not match initialized instance");
118+
}
119+
}
120+
121+
OcSpeech* InstanceManager::initializeNewInstance() {
122+
/*
123+
Initializes a new OneCore instance.
124+
Waits until an instance has been fully terminated,
125+
and then (once speech callbacks have finished) deleted.
126+
*/
127+
std::lock_guard g(_instanceStateMutex);
128+
if (_instanceState == InstanceState::active) {
129+
throw runtime_error(
130+
"OneCore token still active."
131+
"Terminate token before calling initialize"
132+
);
133+
}
134+
waitUntilReadyForInitialization();
135+
_instance = std::make_unique<OcSpeech>();
136+
_instanceState = InstanceState::active;
137+
// Remove instance from terminated instances if we get the same pointer again
138+
_terminatedInstances.erase(
139+
std::remove(
140+
_terminatedInstances.begin(),
141+
_terminatedInstances.end(),
142+
_instance.get()
143+
),
144+
_terminatedInstances.end()
145+
);
146+
return _instance.get();
147+
}
148+
149+
void InstanceManager::terminateInstance(OcSpeech* token) {
150+
/*
151+
Marks an instance as terminated.
152+
If no callbacks are pending, proceed with deletion of the instance.
153+
*/
154+
_assertInstanceActive(token);
155+
std::lock_guard g(_instanceStateMutex);
156+
_instanceState = InstanceState::terminated;
157+
_terminatedInstances.emplace_back(_instance.get());
158+
if (!SpeakThreadGuard::areCallbacksPending()){
159+
_instance.reset();
160+
_instanceState = InstanceState::notInitialized;
161+
}
162+
}
163+
33164
bool __stdcall ocSpeech_supportsProsodyOptions() {
34165
return ApiInformation::IsApiContractPresent(hstring{L"Windows.Foundation.UniversalApiContract"}, 5, 0);
35166
}
@@ -45,23 +176,44 @@ OcSpeech::OcSpeech() : synth(SpeechSynthesizer{}) {
45176
}
46177

47178
OcSpeech* __stdcall ocSpeech_initialize() {
48-
auto instance = new OcSpeech;
49-
return instance;
179+
return InstanceManager::initializeNewInstance();
50180
}
51181

52-
void __stdcall ocSpeech_terminate(OcSpeech* instance) {
53-
delete instance;
182+
void __stdcall ocSpeech_terminate(OcSpeech* token) {
183+
InstanceManager::terminateInstance(token);
54184
}
55185

56-
void __stdcall ocSpeech_setCallback(OcSpeech* instance, ocSpeech_Callback fn) {
57-
instance->setCallback(fn);
186+
void __stdcall ocSpeech_setCallback(OcSpeech* token, ocSpeech_Callback fn) {
187+
InstanceManager::getActiveInstance(token)->setCallback(fn);
58188
}
59189

60190
void OcSpeech::setCallback(ocSpeech_Callback fn) {
61191
callback = fn;
62192
}
63193

194+
void protectedCallback(
195+
OcSpeech* token,
196+
BYTE* data,
197+
int length,
198+
const wchar_t* markers
199+
) {
200+
InstanceManager::getActiveInstance(token)->performCallback(data, length, markers);
201+
}
202+
203+
void OcSpeech::performCallback(
204+
BYTE* data,
205+
int length,
206+
const wchar_t* markers
207+
) {
208+
callback(data, length, markers);
209+
}
210+
64211
fire_and_forget OcSpeech::speak(hstring text) {
212+
/*
213+
Send speech to OneCore.
214+
Will block OneCore from being re-initialized until speech callbacks have completed.
215+
*/
216+
SpeakThreadGuard();
65217
// Ensure we catch all exceptions in this method,
66218

67219
// as an unhandled exception causes std::terminate to get called, resulting in a crash.
@@ -76,7 +228,7 @@ fire_and_forget OcSpeech::speak(hstring text) {
76228
speechStream = co_await synth.SynthesizeSsmlToStreamAsync(text);
77229
} catch (hresult_error const& e) {
78230
LOG_ERROR(L"Error " << e.code() << L": " << e.message().c_str());
79-
callback(nullptr, 0, nullptr);
231+
protectedCallback(this, nullptr, 0, nullptr);
80232
co_return;
81233
}
82234
// speechStream.Size() is 64 bit, but Buffer can only take 32 bit.
@@ -98,18 +250,22 @@ fire_and_forget OcSpeech::speak(hstring text) {
98250
// Data has been read from the speech stream.
99251
// Pass it to the callback.
100252
BYTE* bytes = buffer.data();
101-
callback(bytes, buffer.Length(), markersStr.c_str());
253+
protectedCallback(this, bytes, buffer.Length(), markersStr.c_str());
102254
} catch (hresult_error const& e) {
103255
LOG_ERROR(L"Error " << e.code() << L": " << e.message().c_str());
104-
callback(nullptr, 0, nullptr);
256+
protectedCallback(this, nullptr, 0, nullptr);
105257
}
106258
} catch (...) {
107259
LOG_ERROR(L"Unexpected error in OcSpeech::speak");
108260
}
109261
}
110262

111-
void __stdcall ocSpeech_speak(OcSpeech* instance, wchar_t* text) {
112-
instance->speak(text);
263+
void __stdcall ocSpeech_speak(OcSpeech* token, wchar_t* text) {
264+
/*
265+
Send speech to OneCore.
266+
Will block OneCore from being re-initialized until speech callbacks have completed.
267+
*/
268+
InstanceManager::getActiveInstance(token)->speak(text);
113269
}
114270

115271
wstring OcSpeech::getVoices() {
@@ -133,78 +289,80 @@ wstring OcSpeech::getVoices() {
133289
// but the caller then needs to free it.
134290
// We can't just use malloc because the caller might be using a different CRT
135291
// and calling malloc and free from different CRTs isn't safe.
136-
BSTR __stdcall ocSpeech_getVoices(OcSpeech* instance) {
137-
return SysAllocString(instance->getVoices().c_str());
292+
BSTR __stdcall ocSpeech_getVoices(OcSpeech* token) {
293+
return SysAllocString(
294+
InstanceManager::getActiveInstance(token)->getVoices().c_str()
295+
);
138296
}
139297

140298
hstring OcSpeech::getCurrentVoiceId() {
141299
return synth.Voice().Id();
142300
}
143301

144-
const wchar_t* __stdcall ocSpeech_getCurrentVoiceId(OcSpeech* instance) {
145-
return instance->getCurrentVoiceId().c_str();
302+
const wchar_t* __stdcall ocSpeech_getCurrentVoiceId(OcSpeech* token) {
303+
return InstanceManager::getActiveInstance(token)->getCurrentVoiceId().c_str();
146304
}
147305

148306
void OcSpeech::setVoice(int index) {
149307
synth.Voice(synth.AllVoices().GetAt(index));
150308
}
151309

152-
void __stdcall ocSpeech_setVoice(OcSpeech* instance, int index) {
153-
instance->setVoice(index);
310+
void __stdcall ocSpeech_setVoice(OcSpeech* token, int index) {
311+
InstanceManager::getActiveInstance(token)->setVoice(index);
154312
}
155313

156314
hstring OcSpeech::getCurrentVoiceLanguage() {
157315
return synth.Voice().Language();
158316
}
159317

160-
const wchar_t* __stdcall ocSpeech_getCurrentVoiceLanguage(OcSpeech* instance) {
161-
return instance->getCurrentVoiceLanguage().c_str();
318+
const wchar_t* __stdcall ocSpeech_getCurrentVoiceLanguage(OcSpeech* token) {
319+
return InstanceManager::getActiveInstance(token)->getCurrentVoiceLanguage().c_str();
162320
}
163321

164322
double OcSpeech::getPitch() {
165323
return synth.Options().AudioPitch();
166324
}
167325

168-
double __stdcall ocSpeech_getPitch(OcSpeech* instance) {
169-
return instance->getPitch();
326+
double __stdcall ocSpeech_getPitch(OcSpeech* token) {
327+
return InstanceManager::getActiveInstance(token)->getPitch();
170328
}
171329

172330
void OcSpeech::setPitch(double pitch) {
173331
synth.Options().AudioPitch(pitch);
174332
}
175333

176-
void __stdcall ocSpeech_setPitch(OcSpeech* instance, double pitch) {
177-
instance->setPitch(pitch);
334+
void __stdcall ocSpeech_setPitch(OcSpeech* token, double pitch) {
335+
InstanceManager::getActiveInstance(token)->setPitch(pitch);
178336
}
179337

180338
double OcSpeech::getVolume() {
181339
return synth.Options().AudioVolume();
182340
}
183341

184-
double __stdcall ocSpeech_getVolume(OcSpeech* instance) {
185-
return instance->getVolume();
342+
double __stdcall ocSpeech_getVolume(OcSpeech* token) {
343+
return InstanceManager::getActiveInstance(token)->getVolume();
186344
}
187345

188346
void OcSpeech::setVolume(double volume) {
189347
synth.Options().AudioVolume(volume);
190348
}
191349

192-
void __stdcall ocSpeech_setVolume(OcSpeech* instance, double volume) {
193-
instance->setVolume(volume);
350+
void __stdcall ocSpeech_setVolume(OcSpeech* token, double volume) {
351+
InstanceManager::getActiveInstance(token)->setVolume(volume);
194352
}
195353

196354
double OcSpeech::getRate() {
197355
return synth.Options().SpeakingRate();
198356
}
199357

200-
double __stdcall ocSpeech_getRate(OcSpeech* instance) {
201-
return instance->getRate();
358+
double __stdcall ocSpeech_getRate(OcSpeech* token) {
359+
return InstanceManager::getActiveInstance(token)->getRate();
202360
}
203361

204362
void OcSpeech::setRate(double rate) {
205363
synth.Options().SpeakingRate(rate);
206364
}
207365

208-
void __stdcall ocSpeech_setRate(OcSpeech* instance, double rate) {
209-
instance->setRate(rate);
366+
void __stdcall ocSpeech_setRate(OcSpeech* token, double rate) {
367+
InstanceManager::getActiveInstance(token)->setRate(rate);
210368
}

0 commit comments

Comments
 (0)