nvaccess
diff --git a/‎nvdaHelper/archBuild_sconscript‎
Lines changed: 3 additions & 2 deletions b/‎nvdaHelper/archBuild_sconscript‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎nvdaHelper/localWin10/oneCoreSpeech.cpp‎
Lines changed: 123 additions & 80 deletions b/‎nvdaHelper/localWin10/oneCoreSpeech.cpp‎
Lines changed: 123 additions & 80 deletions
diff --git a/‎nvdaHelper/localWin10/oneCoreSpeech.h‎
Lines changed: 37 additions & 20 deletions b/‎nvdaHelper/localWin10/oneCoreSpeech.h‎
Lines changed: 37 additions & 20 deletions
@@ -146,8 +146,9 @@ if 'RTC' in debug:
 env.Append(PDB='${TARGET}.pdb')
 env.Append(LINKFLAGS='/OPT:REF') #having symbols usually turns this off but we have no need for unused symbols
 
-win10env=env.Clone()
-win10env.Append(CXXFLAGS='/ZW')
+win10env = env.Clone()
+# Add C++ co-routine support
+win10env.Append(CXXFLAGS='/await')
 Export('win10env')
 
 if 'debugCRT' in debug:
 
@@ -2,7 +2,7 @@
 Code for C dll bridge to Windows OneCore voices.
 This file is a part of the NVDA project.
 URL: http://www.nvaccess.org/
-Copyright 2016-2017 Tyler Spivey, NV Access Limited.
+Copyright 2016-2020 Tyler Spivey, NV Access Limited, Leonard de Ruijter.
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License version 2.0, as published by
     the Free Software Foundation.
@@ -14,39 +14,38 @@ This license can be found at:
 */
 
 #include <string>
-#include <collection.h>
-#include <ppltasks.h>
-#include <wrl.h>
-#include <robuffer.h>
+#include <winrt/Windows.Media.SpeechSynthesis.h>
+#include <winrt/Windows.Storage.Streams.h>
+#include <winrt/Windows.Foundation.h>
+#include <winrt/Windows.Foundation.Collections.h>
+#include <winrt/Windows.Foundation.Metadata.h>
 #include <common/log.h>
-#include "utils.h"
 #include "oneCoreSpeech.h"
 
 using namespace std;
-using namespace Platform;
-using namespace Windows::Media::SpeechSynthesis;
-using namespace concurrency;
-using namespace Windows::Storage::Streams;
-using namespace Microsoft::WRL;
-using namespace Windows::Media;
-using namespace Windows::Foundation::Collections;
-using Windows::Foundation::Metadata::ApiInformation;
+using namespace winrt;
+using namespace winrt::Windows::Media::SpeechSynthesis;
+using namespace winrt::Windows::Storage::Streams;
+using namespace winrt::Windows::Media;
+using namespace winrt::Windows::Foundation::Collections;
+using winrt::Windows::Foundation::Metadata::ApiInformation;
 
 bool __stdcall ocSpeech_supportsProsodyOptions() {
-	return ApiInformation::IsApiContractPresent("Windows.Foundation.UniversalApiContract", 5, 0);
+	return ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 5, 0);
 }
 
-OcSpeech* __stdcall ocSpeech_initialize() {
-	auto instance = new OcSpeech;
-	instance->synth = ref new SpeechSynthesizer();
+OcSpeech::OcSpeech() : synth(SpeechSynthesizer{}) {
 	// By default, OneCore speech appends a  large annoying chunk of silence at the end of every utterance.
 	// Newer versions of OneCore speech allow disabling this feature, so turn it off where possible.
-	if (ApiInformation::IsApiContractPresent("Windows.Foundation.UniversalApiContract", 6, 0)) {
-		auto options = instance->synth->Options;
-		options->AppendedSilence = SpeechAppendedSilence::Min;
+	if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 6, 0)) {
+		synth.Options().AppendedSilence(SpeechAppendedSilence::Min);
 	} else {
 		LOG_DEBUGWARNING(L"AppendedSilence not supported");
 	}
+}
+
+OcSpeech* __stdcall ocSpeech_initialize() {
+	auto instance = new OcSpeech;
 	return instance;
 }
 
@@ -55,104 +54,148 @@ void __stdcall ocSpeech_terminate(OcSpeech* instance) {
 }
 
 void __stdcall ocSpeech_setCallback(OcSpeech* instance, ocSpeech_Callback fn) {
-	instance->callback = fn;
+	instance->setCallback(fn);
 }
 
-void __stdcall ocSpeech_speak(OcSpeech* instance, char16 *text) {
-	String^ textStr = ref new String(text);
+void OcSpeech::setCallback(ocSpeech_Callback fn) {
+	callback = fn;
+}
+
+fire_and_forget OcSpeech::speak(hstring text) {
+	// Ensure that work is performed on a background thread.
+	co_await resume_background();
+
 	auto markersStr = make_shared<wstring>();
-	task<SpeechSynthesisStream ^>  speakTask;
+	SpeechSynthesisStream speechStream{ nullptr };
 	try {
-		speakTask = create_task(instance->synth->SynthesizeSsmlToStreamAsync(textStr));
-	} catch (Platform::Exception ^e) {
-		LOG_ERROR(L"Error " << e->HResult << L": " << e->Message->Data());
-		instance->callback(NULL, 0, NULL);
-		return;
+		speechStream = co_await synth.SynthesizeSsmlToStreamAsync(text);
+	} catch (hresult_error const& e) {
+		LOG_ERROR(L"Error " << e.code() << L": " << e.message().c_str());
+		callback(NULL, 0, NULL);
+		co_return;
 	}
-	speakTask.then([markersStr] (SpeechSynthesisStream^ speechStream) {
-		// speechStream->Size is 64 bit, but Buffer can only take 32 bit.
-		// We shouldn't get values above 32 bit in reality.
-		const unsigned int size = static_cast<unsigned int>(speechStream->Size);
-		Buffer^ buffer = ref new Buffer(size);
-		IVectorView<IMediaMarker^>^ markers = speechStream->Markers;
-		for (auto&& marker : markers) {
-			if (markersStr->length() > 0) {
-				*markersStr += L"|";
-			}
-			*markersStr += marker->Text->Data();
-			*markersStr += L":";
-			*markersStr += to_wstring(marker->Time.Duration);
+	// speechStream.Size() is 64 bit, but Buffer can only take 32 bit.
+	// We shouldn't get values above 32 bit in reality.
+	const unsigned int size = static_cast<unsigned int>(speechStream.Size());
+	Buffer buffer = Buffer{ size };
+	IVectorView<IMediaMarker> markers = speechStream.Markers();
+	for (auto const& marker : markers) {
+		if (markersStr->length() > 0) {
+			*markersStr += L"|";
 		}
-		auto t = create_task(speechStream->ReadAsync(buffer, size, Windows::Storage::Streams::InputStreamOptions::None));
-		return t;
-	}).then([instance, markersStr] (IBuffer^ buffer) {
+		*markersStr += marker.Text();
+		*markersStr += L":";
+		*markersStr += to_wstring(marker.Time().count());
+	}
+	try {
+		co_await speechStream.ReadAsync(buffer, size, InputStreamOptions::None);
 		// Data has been read from the speech stream.
 		// Pass it to the callback.
-		BYTE* bytes = getBytes(buffer);
-		instance->callback(bytes, buffer->Length, markersStr->c_str());
-	}).then([instance] (task<void> previous) {
-		// Catch any unhandled exceptions that occurred during these tasks.
-		try {
-			previous.get();
-		} catch (Platform::Exception^ e) {
-			LOG_ERROR(L"Error " << e->HResult << L": " << e->Message->Data());
-			instance->callback(NULL, 0, NULL);
-		}
-	});
+		BYTE* bytes = buffer.data();
+		callback(bytes, buffer.Length(), markersStr->c_str());
+	} catch (hresult_error const& e) {
+		LOG_ERROR(L"Error " << e.code() << L": " << e.message().c_str());
+		callback(NULL, 0, NULL);
+	}
 }
 
-// We use BSTR because we need the string to stay around until the caller is done with it
-// but the caller then needs to free it.
-// We can't just use malloc because the caller might be using a different CRT
-// and calling malloc and free from different CRTs isn't safe.
-BSTR __stdcall ocSpeech_getVoices(OcSpeech* instance) {
+void __stdcall ocSpeech_speak(OcSpeech* instance, wchar_t* text) {
+	instance->speak(text);
+}
+
+wstring OcSpeech::getVoices() {
 	wstring voices;
-	for (unsigned int i = 0; i < instance->synth->AllVoices->Size; ++i) {
-		VoiceInformation^ info = instance->synth->AllVoices->GetAt(i);
-		voices += info->Id->Data();
+	auto const& allVoices = synth.AllVoices();
+	for (unsigned int i = 0; i < allVoices.Size(); ++i) {
+		VoiceInformation const& voiceInfo = allVoices.GetAt(i);
+		voices += voiceInfo.Id();
 		voices += L":";
-		voices += info->Language->Data();
+		voices += voiceInfo.Language();
 		voices += L":";
-		voices += info->DisplayName->Data();
-		if (i != instance->synth->AllVoices->Size - 1) {
+		voices += voiceInfo.DisplayName();
+		if (i != allVoices.Size() - 1) {
 			voices += L"|";
 		}
 	}
-	return SysAllocString(voices.c_str());
+	return voices;
 }
 
-const char16* __stdcall ocSpeech_getCurrentVoiceId(OcSpeech* instance) {
-	return instance->synth->Voice->Id->Data();
+// We use BSTR because we need the string to stay around until the caller is done with it
+// but the caller then needs to free it.
+// We can't just use malloc because the caller might be using a different CRT
+// and calling malloc and free from different CRTs isn't safe.
+BSTR __stdcall ocSpeech_getVoices(OcSpeech* instance) {
+	return SysAllocString(instance->getVoices().c_str());
+}
+
+hstring OcSpeech::getCurrentVoiceId() {
+	return synth.Voice().Id();
+}
+
+const wchar_t* __stdcall ocSpeech_getCurrentVoiceId(OcSpeech* instance) {
+	return instance->getCurrentVoiceId().c_str();
+}
+
+void OcSpeech::setVoice(int index) {
+	synth.Voice(synth.AllVoices().GetAt(index));
 }
 
 void __stdcall ocSpeech_setVoice(OcSpeech* instance, int index) {
-	instance->synth->Voice = instance->synth->AllVoices->GetAt(index);
+	instance->setVoice(index);
+}
+
+hstring OcSpeech::getCurrentVoiceLanguage() {
+	return synth.Voice().Language();
+}
+
+const wchar_t* __stdcall ocSpeech_getCurrentVoiceLanguage(OcSpeech* instance) {
+	return instance->getCurrentVoiceLanguage().c_str();
 }
 
-const char16 * __stdcall ocSpeech_getCurrentVoiceLanguage(OcSpeech* instance) {
-	return instance->synth->Voice->Language->Data();
+double OcSpeech::getPitch() {
+	return synth.Options().AudioPitch();
 }
 
 double __stdcall ocSpeech_getPitch(OcSpeech* instance) {
-	return instance->synth->Options->AudioPitch;
+	return instance->getPitch();
+}
+
+void OcSpeech::setPitch(double pitch) {
+	synth.Options().AudioPitch(pitch);
 }
 
 void __stdcall ocSpeech_setPitch(OcSpeech* instance, double pitch) {
-	instance->synth->Options->AudioPitch = pitch;
+	instance->setPitch(pitch);
+}
+
+double OcSpeech::getVolume() {
+	return synth.Options().AudioVolume();
 }
 
 double __stdcall ocSpeech_getVolume(OcSpeech* instance) {
-	return instance->synth->Options->AudioVolume;
+	return instance->getVolume();
+}
+
+void OcSpeech::setVolume(double volume) {
+	synth.Options().AudioVolume(volume);
 }
 
 void __stdcall ocSpeech_setVolume(OcSpeech* instance, double volume) {
-	instance->synth->Options->AudioVolume = volume;
+	instance->setVolume(volume);
+}
+
+double OcSpeech::getRate() {
+	return synth.Options().SpeakingRate();
 }
 
 double __stdcall ocSpeech_getRate(OcSpeech* instance) {
-	return instance->synth->Options->SpeakingRate;
+	return instance->getRate();
+}
+
+void OcSpeech::setRate(double rate) {
+	synth.Options().SpeakingRate(rate);
 }
 
 void __stdcall ocSpeech_setRate(OcSpeech* instance, double rate) {
-	instance->synth->Options->SpeakingRate = rate;
+	instance->setRate(rate);
 }
@@ -14,28 +14,45 @@ This license can be found at:
 */
 
 #pragma once
-#define export __declspec(dllexport) 
+#define export __declspec(dllexport)
 
-typedef void (*ocSpeech_Callback)(byte* data, int length, const char16* markers);
-typedef struct {
-	Windows::Media::SpeechSynthesis::SpeechSynthesizer ^synth;
+typedef void (*ocSpeech_Callback)(byte* data, int length, const wchar_t* markers);
+
+class OcSpeech {
+private:
+	winrt::Windows::Media::SpeechSynthesis::SpeechSynthesizer synth{ nullptr };
 	ocSpeech_Callback callback;
-} OcSpeech;
+
+public:
+	OcSpeech();
+	winrt::fire_and_forget speak(winrt::hstring text);
+	void setCallback(ocSpeech_Callback fn);
+	std::wstring getVoices();
+	winrt::hstring getCurrentVoiceId();
+	void setVoice(int index);
+	winrt::hstring getCurrentVoiceLanguage();
+	double getPitch();
+	void setPitch(double pitch);
+	double getVolume();
+	void setVolume(double volume);
+	double getRate();
+	void setRate(double rate);
+};
 
 extern "C" {
-export bool __stdcall ocSpeech_supportsProsodyOptions();
-export OcSpeech* __stdcall ocSpeech_initialize();
-export void __stdcall ocSpeech_terminate(OcSpeech* instance);
-export void __stdcall ocSpeech_setCallback(OcSpeech* instance, ocSpeech_Callback fn);
-export void __stdcall ocSpeech_speak(OcSpeech* instance, char16 *text);
-export BSTR __stdcall ocSpeech_getVoices(OcSpeech* instance);
-export const char16* __stdcall ocSpeech_getCurrentVoiceId(OcSpeech* instance);
-export void __stdcall ocSpeech_setVoice(OcSpeech* instance, int index);
-export const char16* __stdcall ocSpeech_getCurrentVoiceLanguage(OcSpeech* instance);
-export double __stdcall ocSpeech_getPitch(OcSpeech* instance);
-export void __stdcall ocSpeech_setPitch(OcSpeech* instance, double pitch);
-export double __stdcall ocSpeech_getVolume(OcSpeech* instance);
-export void __stdcall ocSpeech_setVolume(OcSpeech* instance, double volume);
-export double __stdcall ocSpeech_getRate(OcSpeech* instance);
-export void __stdcall ocSpeech_setRate(OcSpeech* instance, double rate);
+	export bool __stdcall ocSpeech_supportsProsodyOptions();
+	export OcSpeech* __stdcall ocSpeech_initialize();
+	export void __stdcall ocSpeech_terminate(OcSpeech* instance);
+	export void __stdcall ocSpeech_setCallback(OcSpeech* instance, ocSpeech_Callback fn);
+	export void __stdcall ocSpeech_speak(OcSpeech* instance, wchar_t* text);
+	export BSTR __stdcall ocSpeech_getVoices(OcSpeech* instance);
+	export const wchar_t* __stdcall ocSpeech_getCurrentVoiceId(OcSpeech* instance);
+	export void __stdcall ocSpeech_setVoice(OcSpeech* instance, int index);
+	export const wchar_t* __stdcall ocSpeech_getCurrentVoiceLanguage(OcSpeech* instance);
+	export double __stdcall ocSpeech_getPitch(OcSpeech* instance);
+	export void __stdcall ocSpeech_setPitch(OcSpeech* instance, double pitch);
+	export double __stdcall ocSpeech_getVolume(OcSpeech* instance);
+	export void __stdcall ocSpeech_setVolume(OcSpeech* instance, double volume);
+	export double __stdcall ocSpeech_getRate(OcSpeech* instance);
+	export void __stdcall ocSpeech_setRate(OcSpeech* instance, double rate);
 }