Skip to content

Commit 20d5a25

Browse files
support audio ducking for SAPI5 on Windows 11 (#13118)
Fixes #12913 Summary of the issue: NVDA's SAPI5 synthDriver instructed Windows to duck and unduck background audio via hooked winmm waveOut functions. However, on Windows 11, it seems that these functions are no longer used by SAPI5, and therefore audio ducking no longer worked for SAPI5. Description of how this pull request fixes the issue: Rather than hooking winmm functions, instead make use of SAPI5's own events, and other SynthDriver methods to enable and disable ducking. Specifically: * On SAPISink.StartStream: enable ducking * On SAPISink.EndStream: disable ducking * SynthDriver.cancel: disable ducking * SynthDriver.pause: disable ducking if pausing and enable ducking if unpausing. * SynthDriver.speak: temporarily enable audio ducking around the call to speak so that audio ducking can enforce its initial delay before speaking (as StartStream and EndStream are asynchronous).
1 parent aa351c5 commit 20d5a25

2 files changed

Lines changed: 81 additions & 80 deletions

File tree

source/synthDrivers/sapi5.py

Lines changed: 80 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# This file is covered by the GNU General Public License.
55
# See the file COPYING for more details.
66

7+
from typing import Optional
78
from enum import IntEnum
89
import locale
910
from collections import OrderedDict
@@ -13,7 +14,6 @@
1314
from comtypes import COMError
1415
import winreg
1516
import audioDucking
16-
import NVDAHelper
1717
from synthDriverHandler import SynthDriver, VoiceInfo, synthIndexReached, synthDoneSpeaking
1818
import config
1919
import nvwave
@@ -50,85 +50,11 @@ class SpeechVoiceSpeakFlags(IntEnum):
5050

5151
class SpeechVoiceEvents(IntEnum):
5252
# https://msdn.microsoft.com/en-us/previous-versions/windows/desktop/ms720886(v=vs.85)
53+
StartInputStream = 2
5354
EndInputStream = 4
5455
Bookmark = 16
5556

5657

57-
class FunctionHooker(object):
58-
def __init__(
59-
self,
60-
targetDll: str,
61-
importDll: str,
62-
funcName: str,
63-
newFunction # result of ctypes.WINFUNCTYPE
64-
):
65-
# dllImportTableHooks_hookSingle expects byte strings.
66-
try:
67-
self._hook=NVDAHelper.localLib.dllImportTableHooks_hookSingle(
68-
targetDll.encode("mbcs"),
69-
importDll.encode("mbcs"),
70-
funcName.encode("mbcs"),
71-
newFunction
72-
)
73-
except UnicodeEncodeError:
74-
log.error("Error encoding FunctionHooker input parameters", exc_info=True)
75-
self._hook = None
76-
if self._hook:
77-
log.debug(f"Hooked {funcName}")
78-
else:
79-
log.error(f"Could not hook {funcName}")
80-
raise RuntimeError(f"Could not hook {funcName}")
81-
82-
def __del__(self):
83-
if self._hook:
84-
NVDAHelper.localLib.dllImportTableHooks_unhookSingle(self._hook)
85-
86-
87-
_duckersByHandle={}
88-
89-
90-
@WINFUNCTYPE(windll.winmm.waveOutOpen.restype,*windll.winmm.waveOutOpen.argtypes,use_errno=False,use_last_error=False)
91-
def waveOutOpen(pWaveOutHandle,deviceID,wfx,callback,callbackInstance,flags):
92-
if audioDucking._isDebug():
93-
log.debugWarning("Ducking audio requested for SAPI5 synthdriver")
94-
try:
95-
res=windll.winmm.waveOutOpen(pWaveOutHandle,deviceID,wfx,callback,callbackInstance,flags) or 0
96-
except WindowsError as e:
97-
res=e.winerror
98-
if res==0 and pWaveOutHandle:
99-
h=pWaveOutHandle.contents.value
100-
d=audioDucking.AudioDucker()
101-
if not d.enable():
102-
log.warning("Ducking audio failed for SAPI5 synthdriver")
103-
_duckersByHandle[h]=d
104-
else:
105-
log.warning("Opening wave out failed for SAPI5 synthdriver")
106-
log.debugWarning(f"Win Error: {res}\n WaveOutHandle: {pWaveOutHandle}")
107-
return res
108-
109-
@WINFUNCTYPE(c_long,c_long)
110-
def waveOutClose(waveOutHandle):
111-
if audioDucking._isDebug():
112-
log.debugWarning("End ducking audio requested for SAPI5 synthdriver")
113-
try:
114-
res=windll.winmm.waveOutClose(waveOutHandle) or 0
115-
except WindowsError as e:
116-
res=e.winerror
117-
if res==0 and waveOutHandle:
118-
_duckersByHandle.pop(waveOutHandle,None)
119-
else:
120-
log.warning("Closing wave out failed for SAPI5 synthdriver")
121-
log.debugWarning(f"Res: {res}\n waveOutHandle: {waveOutHandle}")
122-
return res
123-
124-
_waveOutHooks=[]
125-
def ensureWaveOutHooks():
126-
if not _waveOutHooks and audioDucking.isAudioDuckingSupported():
127-
sapiPath=os.path.join(os.path.expandvars("$SYSTEMROOT"),"system32","speech","common","sapi.dll")
128-
_waveOutHooks.append(FunctionHooker(sapiPath,"WINMM.dll","waveOutOpen",waveOutOpen))
129-
_waveOutHooks.append(FunctionHooker(sapiPath,"WINMM.dll","waveOutClose",waveOutClose))
130-
131-
13258
class SapiSink(object):
13359
"""Handles SAPI event notifications.
13460
See https://msdn.microsoft.com/en-us/library/ms723587(v=vs.85).aspx
@@ -137,6 +63,16 @@ class SapiSink(object):
13763
def __init__(self, synthRef: weakref.ReferenceType):
13864
self.synthRef = synthRef
13965

66+
def StartStream(self, streamNum, pos):
67+
synth = self.synthRef()
68+
if synth is None:
69+
log.debugWarning("Called StartStream method on SapiSink while driver is dead")
70+
return
71+
if synth._audioDucker:
72+
if audioDucking._isDebug():
73+
log.debug("Enabling audio ducking due to starting speech stream")
74+
synth._audioDucker.enable()
75+
14076
def Bookmark(self, streamNum, pos, bookmark, bookmarkId):
14177
synth = self.synthRef()
14278
if synth is None:
@@ -150,6 +86,10 @@ def EndStream(self, streamNum, pos):
15086
log.debugWarning("Called Bookmark method on EndStream while driver is dead")
15187
return
15288
synthDoneSpeaking.notify(synth=synth)
89+
if synth._audioDucker:
90+
if audioDucking._isDebug():
91+
log.debug("Disabling audio ducking due to speech stream end")
92+
synth._audioDucker.disable()
15393

15494

15595
class SynthDriver(SynthDriver):
@@ -181,13 +121,15 @@ def check(cls):
181121
return False
182122

183123
ttsAudioStream=None #: Holds the ISPAudio interface for the current voice, to aid in stopping and pausing audio
124+
_audioDucker: Optional[audioDucking.AudioDucker] = None
184125

185126
def __init__(self,_defaultVoiceToken=None):
186127
"""
187128
@param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses)
188129
@type _defaultVoiceToken: ISpeechObjectToken
189130
"""
190-
ensureWaveOutHooks()
131+
if audioDucking.isAudioDuckingSupported():
132+
self._audioDucker = audioDucking.AudioDucker()
191133
self._pitch=50
192134
self._initTts(_defaultVoiceToken)
193135

@@ -261,7 +203,9 @@ def _initTts(self, voice=None):
261203
if outputDeviceID>=0:
262204
self.tts.audioOutput=self.tts.getAudioOutputs()[outputDeviceID]
263205
self._eventsConnection = comtypes.client.GetEvents(self.tts, SapiSink(weakref.ref(self)))
264-
self.tts.EventInterests = SpeechVoiceEvents.Bookmark | SpeechVoiceEvents.EndInputStream
206+
self.tts.EventInterests = (
207+
SpeechVoiceEvents.StartInputStream | SpeechVoiceEvents.Bookmark | SpeechVoiceEvents.EndInputStream
208+
)
265209
from comInterfaces.SpeechLib import ISpAudio
266210
try:
267211
self.ttsAudioStream=self.tts.audioOutputStream.QueryInterface(ISpAudio)
@@ -396,18 +340,74 @@ def outputTags():
396340

397341
text = "".join(textList)
398342
flags = SpeechVoiceSpeakFlags.IsXML | SpeechVoiceSpeakFlags.Async
399-
self.tts.Speak(text, flags)
343+
# Ducking should be complete before the synth starts producing audio.
344+
# For this to happen, the speech method must block until ducking is complete.
345+
# Ducking should be disabled when the synth is finished producing audio.
346+
# Note that there may be calls to speak with a string that results in no audio,
347+
# it is important that in this case the audio does not get stuck ducked.
348+
# When there is no audio produced the startStream and endStream handlers are not called.
349+
# To prevent audio getting stuck ducked, it is unducked at the end of speech.
350+
# There are some known issues:
351+
# - When there is no audio produced by the synth, a user may notice volume lowering (ducking) temporarily.
352+
# - If the call to startStream handler is delayed significantly, users may notice a variation in volume
353+
# (as ducking is disabled at the end of speak, and re-enabled when the startStream handler is called)
354+
355+
# A note on the synchronicity of components of this approach:
356+
# SAPISink.StartStream event handler (callback):
357+
# the synth speech is not blocked by this event callback.
358+
# SAPISink.EndStream event handler (callback):
359+
# assumed also to be async but not confirmed. Synchronicity is irrelevant to the current approach.
360+
# AudioDucker.disable returns before the audio is completely unducked.
361+
# AudioDucker.enable() ducking will complete before the function returns.
362+
# It is not possible to "double duck the audio", calling twice yields the same result as calling once.
363+
# AudioDucker class instances count the number of enables/disables,
364+
# in order to unduck there must be no remaining enabled audio ducker instances.
365+
# Due to this a temporary audio ducker is used around the call to speak.
366+
# SAPISink.StartStream: Ducking here may allow the early speech to start before ducking is completed.
367+
if audioDucking.isAudioDuckingSupported():
368+
tempAudioDucker = audioDucking.AudioDucker()
369+
else:
370+
tempAudioDucker = None
371+
if tempAudioDucker:
372+
if audioDucking._isDebug():
373+
log.debug("Enabling audio ducking due to speak call")
374+
tempAudioDucker.enable()
375+
try:
376+
self.tts.Speak(text, flags)
377+
finally:
378+
if tempAudioDucker:
379+
if audioDucking._isDebug():
380+
log.debug("Disabling audio ducking after speak call")
381+
tempAudioDucker.disable()
400382

401383
def cancel(self):
402384
# SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices.
403385
# Therefore instruct the underlying audio interface to stop first, before interupting and purging any remaining speech.
404386
if self.ttsAudioStream:
405387
self.ttsAudioStream.setState(SPAudioState.STOP, 0)
406388
self.tts.Speak(None, SpeechVoiceSpeakFlags.Async | SpeechVoiceSpeakFlags.PurgeBeforeSpeak)
389+
if self._audioDucker:
390+
if audioDucking._isDebug():
391+
log.debug("Disabling audio ducking due to setting output audio state to stop")
392+
self._audioDucker.disable()
407393

408394
def pause(self, switch: bool):
409395
# SAPI5's default means of pausing in most cases is either extremely slow
410396
# (e.g. takes more than half a second) or does not work at all.
411397
# Therefore instruct the underlying audio interface to pause instead.
412398
if self.ttsAudioStream:
413-
self.ttsAudioStream.setState(SPAudioState.PAUSE if switch else SPAudioState.RUN, 0)
399+
oldState = self.ttsAudioStream.GetStatus().State
400+
if switch and oldState == SPAudioState.RUN:
401+
# pausing
402+
if self._audioDucker:
403+
if audioDucking._isDebug():
404+
log.debug("Disabling audio ducking due to setting output audio state to pause")
405+
self._audioDucker.disable()
406+
self.ttsAudioStream.setState(SPAudioState.PAUSE, 0)
407+
elif not switch and oldState == SPAudioState.PAUSE:
408+
# unpausing
409+
if self._audioDucker:
410+
if audioDucking._isDebug():
411+
log.debug("Enabling audio ducking due to setting output audio state to run")
412+
self._audioDucker.enable()
413+
self.ttsAudioStream.setState(SPAudioState.RUN, 0)

user_docs/en/changes.t2t

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ What's New in NVDA
3939
- MS word with UIA: heading quick nav in browse mode no longer gets stuck on the final heading of a document, nor is this heading shown twice in the NVDA elements list. (#9540)
4040
- In Windows 8 and later , the File Explorer status bar can now be retrieved using the standard gesture NVDA+end (desktop) / NVDA+shift+end (laptop). (#12845)
4141
- Incoming messages in the chat of Skype for Business are reported again. (#9295)
42+
- NVDA can again duck audio when using the SAPI5 synthesizer on Windows 11. (#12913)
4243
-
4344

4445
== Changes for Developers ==

0 commit comments

Comments
 (0)