44# This file is covered by the GNU General Public License.
55# See the file COPYING for more details.
66
7+ from typing import Optional
78from enum import IntEnum
89import locale
910from collections import OrderedDict
1314from comtypes import COMError
1415import winreg
1516import audioDucking
16- import NVDAHelper
1717from synthDriverHandler import SynthDriver , VoiceInfo , synthIndexReached , synthDoneSpeaking
1818import config
1919import nvwave
@@ -50,85 +50,11 @@ class SpeechVoiceSpeakFlags(IntEnum):
5050
5151class SpeechVoiceEvents (IntEnum ):
5252 # https://msdn.microsoft.com/en-us/previous-versions/windows/desktop/ms720886(v=vs.85)
53+ StartInputStream = 2
5354 EndInputStream = 4
5455 Bookmark = 16
5556
5657
57- class FunctionHooker (object ):
58- def __init__ (
59- self ,
60- targetDll : str ,
61- importDll : str ,
62- funcName : str ,
63- newFunction # result of ctypes.WINFUNCTYPE
64- ):
65- # dllImportTableHooks_hookSingle expects byte strings.
66- try :
67- self ._hook = NVDAHelper .localLib .dllImportTableHooks_hookSingle (
68- targetDll .encode ("mbcs" ),
69- importDll .encode ("mbcs" ),
70- funcName .encode ("mbcs" ),
71- newFunction
72- )
73- except UnicodeEncodeError :
74- log .error ("Error encoding FunctionHooker input parameters" , exc_info = True )
75- self ._hook = None
76- if self ._hook :
77- log .debug (f"Hooked { funcName } " )
78- else :
79- log .error (f"Could not hook { funcName } " )
80- raise RuntimeError (f"Could not hook { funcName } " )
81-
82- def __del__ (self ):
83- if self ._hook :
84- NVDAHelper .localLib .dllImportTableHooks_unhookSingle (self ._hook )
85-
86-
87- _duckersByHandle = {}
88-
89-
90- @WINFUNCTYPE (windll .winmm .waveOutOpen .restype ,* windll .winmm .waveOutOpen .argtypes ,use_errno = False ,use_last_error = False )
91- def waveOutOpen (pWaveOutHandle ,deviceID ,wfx ,callback ,callbackInstance ,flags ):
92- if audioDucking ._isDebug ():
93- log .debugWarning ("Ducking audio requested for SAPI5 synthdriver" )
94- try :
95- res = windll .winmm .waveOutOpen (pWaveOutHandle ,deviceID ,wfx ,callback ,callbackInstance ,flags ) or 0
96- except WindowsError as e :
97- res = e .winerror
98- if res == 0 and pWaveOutHandle :
99- h = pWaveOutHandle .contents .value
100- d = audioDucking .AudioDucker ()
101- if not d .enable ():
102- log .warning ("Ducking audio failed for SAPI5 synthdriver" )
103- _duckersByHandle [h ]= d
104- else :
105- log .warning ("Opening wave out failed for SAPI5 synthdriver" )
106- log .debugWarning (f"Win Error: { res } \n WaveOutHandle: { pWaveOutHandle } " )
107- return res
108-
109- @WINFUNCTYPE (c_long ,c_long )
110- def waveOutClose (waveOutHandle ):
111- if audioDucking ._isDebug ():
112- log .debugWarning ("End ducking audio requested for SAPI5 synthdriver" )
113- try :
114- res = windll .winmm .waveOutClose (waveOutHandle ) or 0
115- except WindowsError as e :
116- res = e .winerror
117- if res == 0 and waveOutHandle :
118- _duckersByHandle .pop (waveOutHandle ,None )
119- else :
120- log .warning ("Closing wave out failed for SAPI5 synthdriver" )
121- log .debugWarning (f"Res: { res } \n waveOutHandle: { waveOutHandle } " )
122- return res
123-
124- _waveOutHooks = []
125- def ensureWaveOutHooks ():
126- if not _waveOutHooks and audioDucking .isAudioDuckingSupported ():
127- sapiPath = os .path .join (os .path .expandvars ("$SYSTEMROOT" ),"system32" ,"speech" ,"common" ,"sapi.dll" )
128- _waveOutHooks .append (FunctionHooker (sapiPath ,"WINMM.dll" ,"waveOutOpen" ,waveOutOpen ))
129- _waveOutHooks .append (FunctionHooker (sapiPath ,"WINMM.dll" ,"waveOutClose" ,waveOutClose ))
130-
131-
13258class SapiSink (object ):
13359 """Handles SAPI event notifications.
13460 See https://msdn.microsoft.com/en-us/library/ms723587(v=vs.85).aspx
@@ -137,6 +63,16 @@ class SapiSink(object):
13763 def __init__ (self , synthRef : weakref .ReferenceType ):
13864 self .synthRef = synthRef
13965
66+ def StartStream (self , streamNum , pos ):
67+ synth = self .synthRef ()
68+ if synth is None :
69+ log .debugWarning ("Called StartStream method on SapiSink while driver is dead" )
70+ return
71+ if synth ._audioDucker :
72+ if audioDucking ._isDebug ():
73+ log .debug ("Enabling audio ducking due to starting speech stream" )
74+ synth ._audioDucker .enable ()
75+
14076 def Bookmark (self , streamNum , pos , bookmark , bookmarkId ):
14177 synth = self .synthRef ()
14278 if synth is None :
@@ -150,6 +86,10 @@ def EndStream(self, streamNum, pos):
15086 log .debugWarning ("Called Bookmark method on EndStream while driver is dead" )
15187 return
15288 synthDoneSpeaking .notify (synth = synth )
89+ if synth ._audioDucker :
90+ if audioDucking ._isDebug ():
91+ log .debug ("Disabling audio ducking due to speech stream end" )
92+ synth ._audioDucker .disable ()
15393
15494
15595class SynthDriver (SynthDriver ):
@@ -181,13 +121,15 @@ def check(cls):
181121 return False
182122
183123 ttsAudioStream = None #: Holds the ISPAudio interface for the current voice, to aid in stopping and pausing audio
124+ _audioDucker : Optional [audioDucking .AudioDucker ] = None
184125
185126 def __init__ (self ,_defaultVoiceToken = None ):
186127 """
187128 @param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses)
188129 @type _defaultVoiceToken: ISpeechObjectToken
189130 """
190- ensureWaveOutHooks ()
131+ if audioDucking .isAudioDuckingSupported ():
132+ self ._audioDucker = audioDucking .AudioDucker ()
191133 self ._pitch = 50
192134 self ._initTts (_defaultVoiceToken )
193135
@@ -261,7 +203,9 @@ def _initTts(self, voice=None):
261203 if outputDeviceID >= 0 :
262204 self .tts .audioOutput = self .tts .getAudioOutputs ()[outputDeviceID ]
263205 self ._eventsConnection = comtypes .client .GetEvents (self .tts , SapiSink (weakref .ref (self )))
264- self .tts .EventInterests = SpeechVoiceEvents .Bookmark | SpeechVoiceEvents .EndInputStream
206+ self .tts .EventInterests = (
207+ SpeechVoiceEvents .StartInputStream | SpeechVoiceEvents .Bookmark | SpeechVoiceEvents .EndInputStream
208+ )
265209 from comInterfaces .SpeechLib import ISpAudio
266210 try :
267211 self .ttsAudioStream = self .tts .audioOutputStream .QueryInterface (ISpAudio )
@@ -396,18 +340,74 @@ def outputTags():
396340
397341 text = "" .join (textList )
398342 flags = SpeechVoiceSpeakFlags .IsXML | SpeechVoiceSpeakFlags .Async
399- self .tts .Speak (text , flags )
343+ # Ducking should be complete before the synth starts producing audio.
344+ # For this to happen, the speech method must block until ducking is complete.
345+ # Ducking should be disabled when the synth is finished producing audio.
346+ # Note that there may be calls to speak with a string that results in no audio,
347+ # it is important that in this case the audio does not get stuck ducked.
348+ # When there is no audio produced the startStream and endStream handlers are not called.
349+ # To prevent audio getting stuck ducked, it is unducked at the end of speech.
350+ # There are some known issues:
351+ # - When there is no audio produced by the synth, a user may notice volume lowering (ducking) temporarily.
352+ # - If the call to startStream handler is delayed significantly, users may notice a variation in volume
353+ # (as ducking is disabled at the end of speak, and re-enabled when the startStream handler is called)
354+
355+ # A note on the synchronicity of components of this approach:
356+ # SAPISink.StartStream event handler (callback):
357+ # the synth speech is not blocked by this event callback.
358+ # SAPISink.EndStream event handler (callback):
359+ # assumed also to be async but not confirmed. Synchronicity is irrelevant to the current approach.
360+ # AudioDucker.disable returns before the audio is completely unducked.
361+ # AudioDucker.enable() ducking will complete before the function returns.
362+ # It is not possible to "double duck the audio", calling twice yields the same result as calling once.
363+ # AudioDucker class instances count the number of enables/disables,
364+ # in order to unduck there must be no remaining enabled audio ducker instances.
365+ # Due to this a temporary audio ducker is used around the call to speak.
366+ # SAPISink.StartStream: Ducking here may allow the early speech to start before ducking is completed.
367+ if audioDucking .isAudioDuckingSupported ():
368+ tempAudioDucker = audioDucking .AudioDucker ()
369+ else :
370+ tempAudioDucker = None
371+ if tempAudioDucker :
372+ if audioDucking ._isDebug ():
373+ log .debug ("Enabling audio ducking due to speak call" )
374+ tempAudioDucker .enable ()
375+ try :
376+ self .tts .Speak (text , flags )
377+ finally :
378+ if tempAudioDucker :
379+ if audioDucking ._isDebug ():
380+ log .debug ("Disabling audio ducking after speak call" )
381+ tempAudioDucker .disable ()
400382
401383 def cancel (self ):
402384 # SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices.
403385 # Therefore instruct the underlying audio interface to stop first, before interupting and purging any remaining speech.
404386 if self .ttsAudioStream :
405387 self .ttsAudioStream .setState (SPAudioState .STOP , 0 )
406388 self .tts .Speak (None , SpeechVoiceSpeakFlags .Async | SpeechVoiceSpeakFlags .PurgeBeforeSpeak )
389+ if self ._audioDucker :
390+ if audioDucking ._isDebug ():
391+ log .debug ("Disabling audio ducking due to setting output audio state to stop" )
392+ self ._audioDucker .disable ()
407393
408394 def pause (self , switch : bool ):
409395 # SAPI5's default means of pausing in most cases is either extremely slow
410396 # (e.g. takes more than half a second) or does not work at all.
411397 # Therefore instruct the underlying audio interface to pause instead.
412398 if self .ttsAudioStream :
413- self .ttsAudioStream .setState (SPAudioState .PAUSE if switch else SPAudioState .RUN , 0 )
399+ oldState = self .ttsAudioStream .GetStatus ().State
400+ if switch and oldState == SPAudioState .RUN :
401+ # pausing
402+ if self ._audioDucker :
403+ if audioDucking ._isDebug ():
404+ log .debug ("Disabling audio ducking due to setting output audio state to pause" )
405+ self ._audioDucker .disable ()
406+ self .ttsAudioStream .setState (SPAudioState .PAUSE , 0 )
407+ elif not switch and oldState == SPAudioState .PAUSE :
408+ # unpausing
409+ if self ._audioDucker :
410+ if audioDucking ._isDebug ():
411+ log .debug ("Enabling audio ducking due to setting output audio state to run" )
412+ self ._audioDucker .enable ()
413+ self .ttsAudioStream .setState (SPAudioState .RUN , 0 )
0 commit comments