Skip to content

Commit 3dddd95

Browse files
authored
Merge 79a4ed0 into 20cec0f
2 parents 20cec0f + 79a4ed0 commit 3dddd95

8 files changed

Lines changed: 268 additions & 13 deletions

File tree

source/speech/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# A part of NonVisual Desktop Access (NVDA)
22
# This file is covered by the GNU General Public License.
33
# See the file COPYING for more details.
4-
# Copyright (C) 2006-2021 NV Access Limited, Peter Vágner, Aleksey Sadovoy, Babbage B.V., Bill Dengler,
5-
# Julien Cochuyt
4+
# Copyright (C) 2006-2023 NV Access Limited, Peter Vágner, Aleksey Sadovoy, Babbage B.V., Bill Dengler,
5+
# Julien Cochuyt, Leonard de Ruijter
66

77
from .speech import (
88
_extendSpeechSequence_addMathForTextInfo,
@@ -47,6 +47,7 @@
4747
setSpeechMode,
4848
speak,
4949
speakMessage,
50+
speakSsml,
5051
speakObject,
5152
speakObjectProperties,
5253
speakPreselectedText,
@@ -62,7 +63,6 @@
6263
spellTextInfo,
6364
splitTextIndentation,
6465
)
65-
6666
from .priorities import Spri
6767

6868
from .types import (
@@ -124,6 +124,7 @@
124124
"RE_INDENTATION_SPLIT",
125125
"setSpeechMode",
126126
"speak",
127+
"speakSsml",
127128
"speakMessage",
128129
"speakObject",
129130
"speakObjectProperties",

source/speech/commands.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# A part of NonVisual Desktop Access (NVDA)
33
# This file is covered by the GNU General Public License.
44
# See the file COPYING for more details.
5-
# Copyright (C) 2006-2020 NV Access Limited
5+
# Copyright (C) 2006-2023 NV Access Limited, Leonard de Ruijter
66

77
"""
88
Commands that can be embedded in a speech sequence for changing synth parameters, playing sounds or running
@@ -109,6 +109,14 @@ def __init__(self,index):
109109
def __repr__(self):
110110
return "IndexCommand(%r)" % self.index
111111

112+
def __eq__(self, __o: object) -> bool:
113+
if __o is self:
114+
return True
115+
if type(self) is not type(__o):
116+
return super().__eq__(__o)
117+
return self.index == __o.index
118+
119+
112120
class SynthParamCommand(SynthCommand):
113121
"""A synth command which changes a parameter for subsequent speech.
114122
"""
@@ -133,6 +141,14 @@ def __init__(self,state):
133141
def __repr__(self):
134142
return "CharacterModeCommand(%r)" % self.state
135143

144+
def __eq__(self, __o: object) -> bool:
145+
if __o is self:
146+
return True
147+
if type(self) is not type(__o):
148+
return super().__eq__(__o)
149+
return self.state == __o.state
150+
151+
136152
class LangChangeCommand(SynthParamCommand):
137153
"""A command to switch the language within speech."""
138154

@@ -155,6 +171,7 @@ def __eq__(self, __o: object) -> bool:
155171
return self.lang == __o.lang
156172
return super().__eq__(__o)
157173

174+
158175
class BreakCommand(SynthCommand):
159176
"""Insert a break between words.
160177
"""
@@ -169,6 +186,14 @@ def __init__(self, time: int = 0):
169186
def __repr__(self):
170187
return f"BreakCommand(time={self.time})"
171188

189+
def __eq__(self, __o: object) -> bool:
190+
if __o is self:
191+
return True
192+
if type(self) is not type(__o):
193+
return super().__eq__(__o)
194+
return self.time == __o.time
195+
196+
172197
class EndUtteranceCommand(SpeechCommand):
173198
"""End the current utterance at this point in the speech.
174199
Any text after this will be sent to the synthesizer as a separate utterance.
@@ -264,6 +289,21 @@ def __repr__(self):
264289
return "{type}({param})".format(
265290
type=type(self).__name__, param=param)
266291

292+
def __eq__(self, __o: object) -> bool:
293+
if __o is self:
294+
return True
295+
if type(self) is not type(__o):
296+
return super().__eq__(__o)
297+
return self._offset == __o._offset and self._multiplier == __o._multiplier
298+
299+
def __ne__(self, __o) -> bool:
300+
if __o is self:
301+
return False
302+
if type(self) is not type(__o):
303+
return super().__ne__(__o)
304+
return self._offset != __o._offset or self._multiplier != __o._multiplier
305+
306+
267307
class PitchCommand(BaseProsodyCommand):
268308
"""Change the pitch of the voice.
269309
"""
@@ -303,6 +343,14 @@ def __repr__(self):
303343
out += ", text=%r" % self.text
304344
return out + ")"
305345

346+
def __eq__(self, __o: object) -> bool:
347+
if __o is self:
348+
return True
349+
if type(self) is not type(__o):
350+
return super().__eq__(__o)
351+
return self.ipa == __o.ipa and self.text == __o.text
352+
353+
306354
class BaseCallbackCommand(SpeechCommand, metaclass=ABCMeta):
307355
"""Base class for commands which cause a function to be called when speech reaches them.
308356
This class should not be instantiated directly.

source/speech/speech.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,42 @@ def speakMessage(
194194
speak(seq, symbolLevel=None, priority=priority)
195195

196196

197+
def _getSpeakSsmlSpeech(
198+
ssml: str,
199+
_prefixSpeechCommand: Optional[SpeechCommand] = None,
200+
) -> SpeechSequence:
201+
"""Gets the speech sequence for given SSML.
202+
@param ssml: The SSML data to speak
203+
@param _prefixSpeechCommand: A SpeechCommand to append before the sequence.
204+
"""
205+
if ssml is None:
206+
return []
207+
from speechXml import SsmlParser
208+
parser = SsmlParser()
209+
sequence = parser.convertFromXml(ssml)
210+
if sequence:
211+
if _prefixSpeechCommand is not None:
212+
sequence.insert(0, _prefixSpeechCommand)
213+
return sequence
214+
215+
216+
def speakSsml(
217+
ssml: str,
218+
symbolLevel: Optional[int] = None,
219+
_prefixSpeechCommand: Optional[SpeechCommand] = None,
220+
priority: Optional[Spri] = None
221+
) -> None:
222+
"""Speaks a given speech sequence provided as ssml.
223+
@param ssml: The SSML data to speak.
224+
@param symbolLevel: The symbol verbosity level.
225+
@param _prefixSpeechCommand: A SpeechCommand to append before the sequence.
226+
@param priority: The speech priority.
227+
"""
228+
seq = _getSpeakSsmlSpeech(ssml, _prefixSpeechCommand)
229+
if seq:
230+
speak(seq, symbolLevel=symbolLevel, priority=priority)
231+
232+
197233
def getCurrentLanguage() -> str:
198234
synth = getSynth()
199235
language=None

source/speech/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from .commands import SpeechCommand
2020

2121
SequenceItemT = Union[SpeechCommand, str]
22-
SpeechSequence = List[SequenceItemT]
22+
SpeechSequence = list[SequenceItemT]
2323
SpeechIterable = Iterable[SequenceItemT]
2424

2525
_IndexT = int # Type for indexes.

source/speechXml.py

Lines changed: 136 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,32 @@
11
# A part of NonVisual Desktop Access (NVDA)
2-
# Copyright (C) 2016-2022 NV Access Limited
2+
# Copyright (C) 2016-2023 NV Access Limited, Leonard de Ruijter
33
# This file is covered by the GNU General Public License.
44
# See the file COPYING for more details.
55

6-
"""Utilities for converting NVDA speech sequences to XML.
6+
"""Utilities for converting NVDA speech sequences to XML and vice versa.
77
Several synthesizers accept XML, either SSML or their own schemas.
88
L{SpeechXmlConverter} is the base class for conversion to XML.
99
You can subclass this to support specific XML schemas.
1010
L{SsmlConverter} is an implementation for conversion to SSML.
1111
"""
1212

13-
from collections import namedtuple, OrderedDict
1413
import re
15-
import speech
14+
from collections import OrderedDict, namedtuple
15+
from collections.abc import Callable, Generator
16+
from xml.parsers import expat
17+
1618
import textUtils
17-
from speech.commands import LangChangeCommand, SpeechCommand
1819
from logHandler import log
20+
from speech.commands import (
21+
BreakCommand,
22+
CharacterModeCommand,
23+
LangChangeCommand,
24+
PitchCommand,
25+
RateCommand,
26+
SpeechCommand,
27+
VolumeCommand,
28+
)
29+
from speech.types import SpeechSequence
1930

2031
XML_ESCAPES = {
2132
0x3C: u"&lt;", # <
@@ -47,6 +58,8 @@ def _buildInvalidXmlRegexp():
4758
trailing=trailingSurrogate))
4859

4960
RE_INVALID_XML_CHARS = _buildInvalidXmlRegexp()
61+
RE_TIME_MS = re.compile(r"^(?P<time>\d+)ms$", re.IGNORECASE)
62+
RE_PERCENTAGE = re.compile(r"^(?P<percentage>\d+(\.\d+)?)%$")
5063
REPLACEMENT_CHAR = textUtils.REPLACEMENT_CHAR
5164

5265

@@ -55,6 +68,13 @@ def toXmlLang(nvdaLang: str) -> str:
5568
"""
5669
return nvdaLang.replace("_", "-")
5770

71+
72+
def toNvdaLang(xmlLang: str) -> str:
73+
"""Convert an XML language to an NVDA language.
74+
"""
75+
return xmlLang.replace("-", "_")
76+
77+
5878
#: An XMLBalancer command to enclose the entire output in a tag.
5979
#: This must be the first command.
6080
EncloseAllCommand = namedtuple("EncloseAllCommand", ("tag", "attrs"))
@@ -79,7 +99,8 @@ def _escapeXml(text):
7999
text = RE_INVALID_XML_CHARS.sub(REPLACEMENT_CHAR, text)
80100
return text
81101

82-
class XmlBalancer(object):
102+
103+
class XmlBalancer:
83104
"""Generates balanced XML given a set of commands.
84105
NVDA speech sequences are linear, but XML is hierarchical, which makes conversion challenging.
85106
For example, a speech sequence might change the pitch, then change the volume, then reset the pitch to default.
@@ -185,7 +206,8 @@ def generateXml(self, commands) -> str:
185206
self._closeTag(tag)
186207
return u"".join(self._out)
187208

188-
class SpeechXmlConverter(object):
209+
210+
class SpeechXmlConverter:
189211
"""Base class for conversion of NVDA speech sequences to XML.
190212
This class converts an NVDA speech sequence into XmlBalancer commands
191213
which can then be passed to L{XmlBalancer} to produce correct XML.
@@ -232,6 +254,7 @@ def convertToXml(self, speechSequence):
232254
balCommands = self.generateBalancerCommands(speechSequence)
233255
return bal.generateXml(balCommands)
234256

257+
235258
class SsmlConverter(SpeechXmlConverter):
236259
"""Converts an NVDA speech sequence to SSML.
237260
"""
@@ -280,3 +303,109 @@ def convertVolumeCommand(self, command):
280303

281304
def convertPhonemeCommand(self, command):
282305
return StandAloneTagCommand("phoneme", {"alphabet": "ipa", "ph": command.ipa}, command.text)
306+
307+
308+
class SpeechXmlParser:
309+
"""Base class for parsing of NVDA speech sequences from XML.
310+
This class converts XML to an NVDA speech sequence.
311+
312+
Callers can call L{convertFromXml} with XML to generate a speech sequence.
313+
314+
Subclasses implement specific XML schemas by implementing generators which convert each XML tag supported.
315+
The method for a tag should be named with the prefix "parse" followed by the tag.
316+
For example, the handler for <volume /> should be named C{parseVolume}.
317+
These generators receive an optional dictionary containing the attributes and values.
318+
When the attributes value is None, it is a closing tag.
319+
They should yield one or more appropriate SPeechCommand instances.
320+
"""
321+
322+
_speechSequence: SpeechSequence
323+
324+
def _elementHandler(self, tagName: str, attrs: dict | None = None):
325+
processedTagName = "".join(tagName.title().split("-"))
326+
funcName = f"parse{processedTagName}"
327+
if (func := getattr(self, funcName, None)) is None:
328+
log.debugWarning(f"Unsupported tag: {tagName}")
329+
return
330+
for command in func(attrs):
331+
# If the last command in the sequence is of the same type, we can remove it.
332+
if self._speechSequence and type(self._speechSequence[-1]) is type(command):
333+
self._speechSequence.pop()
334+
# Look up the previous command of the same class, if any.
335+
# If the last instance of this command in the sequence is equal to this command, we don't have to add it.
336+
prevCommand = next((c for c in reversed(self._speechSequence) if type(c) is type(command)), None)
337+
if prevCommand != command:
338+
self._speechSequence.append(command)
339+
340+
def convertFromXml(self, xml: str) -> SpeechSequence:
341+
"""Convert XML to a speech sequence.
342+
"""
343+
self._speechSequence = SpeechSequence()
344+
parser = expat.ParserCreate('utf-8')
345+
parser.StartElementHandler = parser.EndElementHandler = self._elementHandler
346+
parser.CharacterDataHandler = self._speechSequence.append
347+
try:
348+
parser.Parse(xml)
349+
except Exception as e:
350+
raise ValueError(f"XML: {xml}") from e
351+
return self._speechSequence
352+
353+
354+
ParseGeneratorT = Generator[SpeechCommand, None, None]
355+
ParseFuncT = Callable[[dict[str, str] | None], ParseGeneratorT]
356+
357+
358+
class SsmlParser(SpeechXmlParser):
359+
"""Parses SSML into an NVDA speech sequence.
360+
"""
361+
362+
def parseSayAs(self, attrs: dict[str, str] | None) -> ParseGeneratorT:
363+
state = attrs is not None and attrs.get("interpret-as") == "characters"
364+
yield CharacterModeCommand(state)
365+
366+
def parseVoice(self, attrs: dict[str, str] | None) -> ParseGeneratorT:
367+
if attrs is None:
368+
return None
369+
if (xmlLang := attrs.get("xml:lang")) is None:
370+
return None
371+
yield LangChangeCommand(toNvdaLang(xmlLang))
372+
373+
def parseBreak(self, attrs: dict[str, str] | None) -> ParseGeneratorT:
374+
if attrs is None or "time" not in attrs:
375+
return None
376+
if (time := RE_TIME_MS.match(attrs["time"])) is None:
377+
log.debugWarning(f"Unknown attributes for break tag: {attrs}")
378+
return None
379+
yield BreakCommand(int(time.group("time")))
380+
381+
_cachedProsodyAttrs: list[dict]
382+
383+
def parseProsody(self, attrs: dict[str, str] | None) -> ParseGeneratorT:
384+
if isOpenTag := attrs is not None:
385+
self._cachedProsodyAttrs.append(attrs)
386+
else: # attrs is None
387+
# Pop the attrs from the cache so we can add commands to reset them.
388+
attrs = self._cachedProsodyAttrs.pop()
389+
for attr, val in attrs.items():
390+
if (percentage := RE_PERCENTAGE.match(val)) is None:
391+
log.debugWarning(f"Attribute {attr!r} for prosody tag has unparseable value: {val!r}")
392+
continue
393+
multiplier = float(percentage.group("percentage")) / 100 if isOpenTag else 1
394+
match attr:
395+
case "pitch":
396+
yield PitchCommand(multiplier=multiplier)
397+
case "volume":
398+
yield VolumeCommand(multiplier=multiplier)
399+
case "rate":
400+
yield RateCommand(multiplier=multiplier)
401+
case _:
402+
log.debugWarning(f"Unknown prosody attribute: {attr!r}")
403+
continue
404+
405+
def parseSpeak(self, attrs: dict[str, str] | None) -> ParseGeneratorT:
406+
return
407+
yield
408+
409+
def convertFromXml(self, xml: str) -> SpeechSequence:
410+
self._cachedProsodyAttrs = []
411+
return super().convertFromXml(xml)

source/winAPI/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ class HResult(enum.IntEnum):
1515

1616
class SystemErrorCodes(enum.IntEnum):
1717
# https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
18+
SUCCESS = 0x0
1819
ACCESS_DENIED = 0x5
20+
INVALID_DATA = 0xD
21+
NOT_READY = 0x15
1922
INVALID_PARAMETER = 0x57
2023
MOD_NOT_FOUND = 0x7E

0 commit comments

Comments
 (0)