Skip to content

Commit 3f58ed2

Browse files
authored
Merge 3626314 into 2627312
2 parents 2627312 + 3626314 commit 3f58ed2

8 files changed

Lines changed: 254 additions & 243 deletions

File tree

nvdaHelper/archBuild_sconscript

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,9 @@ if 'RTC' in debug:
146146
env.Append(PDB='${TARGET}.pdb')
147147
env.Append(LINKFLAGS='/OPT:REF') #having symbols usually turns this off but we have no need for unused symbols
148148

149-
win10env=env.Clone()
150-
win10env.Append(CXXFLAGS='/ZW')
149+
win10env = env.Clone()
150+
# Add C++ co-routine support
151+
win10env.Append(CXXFLAGS='/await')
151152
Export('win10env')
152153

153154
if 'debugCRT' in debug:

nvdaHelper/localWin10/oneCoreSpeech.cpp

Lines changed: 123 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Code for C dll bridge to Windows OneCore voices.
33
This file is a part of the NVDA project.
44
URL: http://www.nvaccess.org/
5-
Copyright 2016-2017 Tyler Spivey, NV Access Limited.
5+
Copyright 2016-2020 Tyler Spivey, NV Access Limited, Leonard de Ruijter.
66
This program is free software: you can redistribute it and/or modify
77
it under the terms of the GNU General Public License version 2.0, as published by
88
the Free Software Foundation.
@@ -14,39 +14,38 @@ This license can be found at:
1414
*/
1515

1616
#include <string>
17-
#include <collection.h>
18-
#include <ppltasks.h>
19-
#include <wrl.h>
20-
#include <robuffer.h>
17+
#include <winrt/Windows.Media.SpeechSynthesis.h>
18+
#include <winrt/Windows.Storage.Streams.h>
19+
#include <winrt/Windows.Foundation.h>
20+
#include <winrt/Windows.Foundation.Collections.h>
21+
#include <winrt/Windows.Foundation.Metadata.h>
2122
#include <common/log.h>
22-
#include "utils.h"
2323
#include "oneCoreSpeech.h"
2424

2525
using namespace std;
26-
using namespace Platform;
27-
using namespace Windows::Media::SpeechSynthesis;
28-
using namespace concurrency;
29-
using namespace Windows::Storage::Streams;
30-
using namespace Microsoft::WRL;
31-
using namespace Windows::Media;
32-
using namespace Windows::Foundation::Collections;
33-
using Windows::Foundation::Metadata::ApiInformation;
26+
using namespace winrt;
27+
using namespace winrt::Windows::Media::SpeechSynthesis;
28+
using namespace winrt::Windows::Storage::Streams;
29+
using namespace winrt::Windows::Media;
30+
using namespace winrt::Windows::Foundation::Collections;
31+
using winrt::Windows::Foundation::Metadata::ApiInformation;
3432

3533
bool __stdcall ocSpeech_supportsProsodyOptions() {
36-
return ApiInformation::IsApiContractPresent("Windows.Foundation.UniversalApiContract", 5, 0);
34+
return ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 5, 0);
3735
}
3836

39-
OcSpeech* __stdcall ocSpeech_initialize() {
40-
auto instance = new OcSpeech;
41-
instance->synth = ref new SpeechSynthesizer();
37+
OcSpeech::OcSpeech() : synth(SpeechSynthesizer{}) {
4238
// By default, OneCore speech appends a large annoying chunk of silence at the end of every utterance.
4339
// Newer versions of OneCore speech allow disabling this feature, so turn it off where possible.
44-
if (ApiInformation::IsApiContractPresent("Windows.Foundation.UniversalApiContract", 6, 0)) {
45-
auto options = instance->synth->Options;
46-
options->AppendedSilence = SpeechAppendedSilence::Min;
40+
if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 6, 0)) {
41+
synth.Options().AppendedSilence(SpeechAppendedSilence::Min);
4742
} else {
4843
LOG_DEBUGWARNING(L"AppendedSilence not supported");
4944
}
45+
}
46+
47+
OcSpeech* __stdcall ocSpeech_initialize() {
48+
auto instance = new OcSpeech;
5049
return instance;
5150
}
5251

@@ -55,104 +54,148 @@ void __stdcall ocSpeech_terminate(OcSpeech* instance) {
5554
}
5655

5756
void __stdcall ocSpeech_setCallback(OcSpeech* instance, ocSpeech_Callback fn) {
58-
instance->callback = fn;
57+
instance->setCallback(fn);
5958
}
6059

61-
void __stdcall ocSpeech_speak(OcSpeech* instance, char16 *text) {
62-
String^ textStr = ref new String(text);
60+
void OcSpeech::setCallback(ocSpeech_Callback fn) {
61+
callback = fn;
62+
}
63+
64+
fire_and_forget OcSpeech::speak(hstring text) {
65+
// Ensure that work is performed on a background thread.
66+
co_await resume_background();
67+
6368
auto markersStr = make_shared<wstring>();
64-
task<SpeechSynthesisStream ^> speakTask;
69+
SpeechSynthesisStream speechStream{ nullptr };
6570
try {
66-
speakTask = create_task(instance->synth->SynthesizeSsmlToStreamAsync(textStr));
67-
} catch (Platform::Exception ^e) {
68-
LOG_ERROR(L"Error " << e->HResult << L": " << e->Message->Data());
69-
instance->callback(NULL, 0, NULL);
70-
return;
71+
speechStream = co_await synth.SynthesizeSsmlToStreamAsync(text);
72+
} catch (hresult_error const& e) {
73+
LOG_ERROR(L"Error " << e.code() << L": " << e.message().c_str());
74+
callback(NULL, 0, NULL);
75+
co_return;
7176
}
72-
speakTask.then([markersStr] (SpeechSynthesisStream^ speechStream) {
73-
// speechStream->Size is 64 bit, but Buffer can only take 32 bit.
74-
// We shouldn't get values above 32 bit in reality.
75-
const unsigned int size = static_cast<unsigned int>(speechStream->Size);
76-
Buffer^ buffer = ref new Buffer(size);
77-
IVectorView<IMediaMarker^>^ markers = speechStream->Markers;
78-
for (auto&& marker : markers) {
79-
if (markersStr->length() > 0) {
80-
*markersStr += L"|";
81-
}
82-
*markersStr += marker->Text->Data();
83-
*markersStr += L":";
84-
*markersStr += to_wstring(marker->Time.Duration);
77+
// speechStream.Size() is 64 bit, but Buffer can only take 32 bit.
78+
// We shouldn't get values above 32 bit in reality.
79+
const unsigned int size = static_cast<unsigned int>(speechStream.Size());
80+
Buffer buffer = Buffer{ size };
81+
IVectorView<IMediaMarker> markers = speechStream.Markers();
82+
for (auto const& marker : markers) {
83+
if (markersStr->length() > 0) {
84+
*markersStr += L"|";
8585
}
86-
auto t = create_task(speechStream->ReadAsync(buffer, size, Windows::Storage::Streams::InputStreamOptions::None));
87-
return t;
88-
}).then([instance, markersStr] (IBuffer^ buffer) {
86+
*markersStr += marker.Text();
87+
*markersStr += L":";
88+
*markersStr += to_wstring(marker.Time().count());
89+
}
90+
try {
91+
co_await speechStream.ReadAsync(buffer, size, InputStreamOptions::None);
8992
// Data has been read from the speech stream.
9093
// Pass it to the callback.
91-
BYTE* bytes = getBytes(buffer);
92-
instance->callback(bytes, buffer->Length, markersStr->c_str());
93-
}).then([instance] (task<void> previous) {
94-
// Catch any unhandled exceptions that occurred during these tasks.
95-
try {
96-
previous.get();
97-
} catch (Platform::Exception^ e) {
98-
LOG_ERROR(L"Error " << e->HResult << L": " << e->Message->Data());
99-
instance->callback(NULL, 0, NULL);
100-
}
101-
});
94+
BYTE* bytes = buffer.data();
95+
callback(bytes, buffer.Length(), markersStr->c_str());
96+
} catch (hresult_error const& e) {
97+
LOG_ERROR(L"Error " << e.code() << L": " << e.message().c_str());
98+
callback(NULL, 0, NULL);
99+
}
102100
}
103101

104-
// We use BSTR because we need the string to stay around until the caller is done with it
105-
// but the caller then needs to free it.
106-
// We can't just use malloc because the caller might be using a different CRT
107-
// and calling malloc and free from different CRTs isn't safe.
108-
BSTR __stdcall ocSpeech_getVoices(OcSpeech* instance) {
102+
void __stdcall ocSpeech_speak(OcSpeech* instance, wchar_t* text) {
103+
instance->speak(text);
104+
}
105+
106+
wstring OcSpeech::getVoices() {
109107
wstring voices;
110-
for (unsigned int i = 0; i < instance->synth->AllVoices->Size; ++i) {
111-
VoiceInformation^ info = instance->synth->AllVoices->GetAt(i);
112-
voices += info->Id->Data();
108+
auto const& allVoices = synth.AllVoices();
109+
for (unsigned int i = 0; i < allVoices.Size(); ++i) {
110+
VoiceInformation const& voiceInfo = allVoices.GetAt(i);
111+
voices += voiceInfo.Id();
113112
voices += L":";
114-
voices += info->Language->Data();
113+
voices += voiceInfo.Language();
115114
voices += L":";
116-
voices += info->DisplayName->Data();
117-
if (i != instance->synth->AllVoices->Size - 1) {
115+
voices += voiceInfo.DisplayName();
116+
if (i != allVoices.Size() - 1) {
118117
voices += L"|";
119118
}
120119
}
121-
return SysAllocString(voices.c_str());
120+
return voices;
122121
}
123122

124-
const char16* __stdcall ocSpeech_getCurrentVoiceId(OcSpeech* instance) {
125-
return instance->synth->Voice->Id->Data();
123+
// We use BSTR because we need the string to stay around until the caller is done with it
124+
// but the caller then needs to free it.
125+
// We can't just use malloc because the caller might be using a different CRT
126+
// and calling malloc and free from different CRTs isn't safe.
127+
BSTR __stdcall ocSpeech_getVoices(OcSpeech* instance) {
128+
return SysAllocString(instance->getVoices().c_str());
129+
}
130+
131+
hstring OcSpeech::getCurrentVoiceId() {
132+
return synth.Voice().Id();
133+
}
134+
135+
const wchar_t* __stdcall ocSpeech_getCurrentVoiceId(OcSpeech* instance) {
136+
return instance->getCurrentVoiceId().c_str();
137+
}
138+
139+
void OcSpeech::setVoice(int index) {
140+
synth.Voice(synth.AllVoices().GetAt(index));
126141
}
127142

128143
void __stdcall ocSpeech_setVoice(OcSpeech* instance, int index) {
129-
instance->synth->Voice = instance->synth->AllVoices->GetAt(index);
144+
instance->setVoice(index);
145+
}
146+
147+
hstring OcSpeech::getCurrentVoiceLanguage() {
148+
return synth.Voice().Language();
149+
}
150+
151+
const wchar_t* __stdcall ocSpeech_getCurrentVoiceLanguage(OcSpeech* instance) {
152+
return instance->getCurrentVoiceLanguage().c_str();
130153
}
131154

132-
const char16 * __stdcall ocSpeech_getCurrentVoiceLanguage(OcSpeech* instance) {
133-
return instance->synth->Voice->Language->Data();
155+
double OcSpeech::getPitch() {
156+
return synth.Options().AudioPitch();
134157
}
135158

136159
double __stdcall ocSpeech_getPitch(OcSpeech* instance) {
137-
return instance->synth->Options->AudioPitch;
160+
return instance->getPitch();
161+
}
162+
163+
void OcSpeech::setPitch(double pitch) {
164+
synth.Options().AudioPitch(pitch);
138165
}
139166

140167
void __stdcall ocSpeech_setPitch(OcSpeech* instance, double pitch) {
141-
instance->synth->Options->AudioPitch = pitch;
168+
instance->setPitch(pitch);
169+
}
170+
171+
double OcSpeech::getVolume() {
172+
return synth.Options().AudioVolume();
142173
}
143174

144175
double __stdcall ocSpeech_getVolume(OcSpeech* instance) {
145-
return instance->synth->Options->AudioVolume;
176+
return instance->getVolume();
177+
}
178+
179+
void OcSpeech::setVolume(double volume) {
180+
synth.Options().AudioVolume(volume);
146181
}
147182

148183
void __stdcall ocSpeech_setVolume(OcSpeech* instance, double volume) {
149-
instance->synth->Options->AudioVolume = volume;
184+
instance->setVolume(volume);
185+
}
186+
187+
double OcSpeech::getRate() {
188+
return synth.Options().SpeakingRate();
150189
}
151190

152191
double __stdcall ocSpeech_getRate(OcSpeech* instance) {
153-
return instance->synth->Options->SpeakingRate;
192+
return instance->getRate();
193+
}
194+
195+
void OcSpeech::setRate(double rate) {
196+
synth.Options().SpeakingRate(rate);
154197
}
155198

156199
void __stdcall ocSpeech_setRate(OcSpeech* instance, double rate) {
157-
instance->synth->Options->SpeakingRate = rate;
200+
instance->setRate(rate);
158201
}

nvdaHelper/localWin10/oneCoreSpeech.h

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,45 @@ This license can be found at:
1414
*/
1515

1616
#pragma once
17-
#define export __declspec(dllexport)
17+
#define export __declspec(dllexport)
1818

19-
typedef void (*ocSpeech_Callback)(byte* data, int length, const char16* markers);
20-
typedef struct {
21-
Windows::Media::SpeechSynthesis::SpeechSynthesizer ^synth;
19+
typedef void (*ocSpeech_Callback)(byte* data, int length, const wchar_t* markers);
20+
21+
class OcSpeech {
22+
private:
23+
winrt::Windows::Media::SpeechSynthesis::SpeechSynthesizer synth{ nullptr };
2224
ocSpeech_Callback callback;
23-
} OcSpeech;
25+
26+
public:
27+
OcSpeech();
28+
winrt::fire_and_forget speak(winrt::hstring text);
29+
void setCallback(ocSpeech_Callback fn);
30+
std::wstring getVoices();
31+
winrt::hstring getCurrentVoiceId();
32+
void setVoice(int index);
33+
winrt::hstring getCurrentVoiceLanguage();
34+
double getPitch();
35+
void setPitch(double pitch);
36+
double getVolume();
37+
void setVolume(double volume);
38+
double getRate();
39+
void setRate(double rate);
40+
};
2441

2542
extern "C" {
26-
export bool __stdcall ocSpeech_supportsProsodyOptions();
27-
export OcSpeech* __stdcall ocSpeech_initialize();
28-
export void __stdcall ocSpeech_terminate(OcSpeech* instance);
29-
export void __stdcall ocSpeech_setCallback(OcSpeech* instance, ocSpeech_Callback fn);
30-
export void __stdcall ocSpeech_speak(OcSpeech* instance, char16 *text);
31-
export BSTR __stdcall ocSpeech_getVoices(OcSpeech* instance);
32-
export const char16* __stdcall ocSpeech_getCurrentVoiceId(OcSpeech* instance);
33-
export void __stdcall ocSpeech_setVoice(OcSpeech* instance, int index);
34-
export const char16* __stdcall ocSpeech_getCurrentVoiceLanguage(OcSpeech* instance);
35-
export double __stdcall ocSpeech_getPitch(OcSpeech* instance);
36-
export void __stdcall ocSpeech_setPitch(OcSpeech* instance, double pitch);
37-
export double __stdcall ocSpeech_getVolume(OcSpeech* instance);
38-
export void __stdcall ocSpeech_setVolume(OcSpeech* instance, double volume);
39-
export double __stdcall ocSpeech_getRate(OcSpeech* instance);
40-
export void __stdcall ocSpeech_setRate(OcSpeech* instance, double rate);
43+
export bool __stdcall ocSpeech_supportsProsodyOptions();
44+
export OcSpeech* __stdcall ocSpeech_initialize();
45+
export void __stdcall ocSpeech_terminate(OcSpeech* instance);
46+
export void __stdcall ocSpeech_setCallback(OcSpeech* instance, ocSpeech_Callback fn);
47+
export void __stdcall ocSpeech_speak(OcSpeech* instance, wchar_t* text);
48+
export BSTR __stdcall ocSpeech_getVoices(OcSpeech* instance);
49+
export const wchar_t* __stdcall ocSpeech_getCurrentVoiceId(OcSpeech* instance);
50+
export void __stdcall ocSpeech_setVoice(OcSpeech* instance, int index);
51+
export const wchar_t* __stdcall ocSpeech_getCurrentVoiceLanguage(OcSpeech* instance);
52+
export double __stdcall ocSpeech_getPitch(OcSpeech* instance);
53+
export void __stdcall ocSpeech_setPitch(OcSpeech* instance, double pitch);
54+
export double __stdcall ocSpeech_getVolume(OcSpeech* instance);
55+
export void __stdcall ocSpeech_setVolume(OcSpeech* instance, double volume);
56+
export double __stdcall ocSpeech_getRate(OcSpeech* instance);
57+
export void __stdcall ocSpeech_setRate(OcSpeech* instance, double rate);
4158
}

0 commit comments

Comments
 (0)