Merge 79a4ed0 into 20cec0f

LeonarddeR · web-flow · commit 3dddd95fc430 · 2023-10-28T19:14:01.000+10:00
diff --git a/source/speech/__init__.py b/source/speech/__init__.py
@@ -1,8 +1,8 @@
 # A part of NonVisual Desktop Access (NVDA)
 # This file is covered by the GNU General Public License.
 # See the file COPYING for more details.
-# Copyright (C) 2006-2021 NV Access Limited, Peter Vágner, Aleksey Sadovoy, Babbage B.V., Bill Dengler,
-# Julien Cochuyt
+# Copyright (C) 2006-2023 NV Access Limited, Peter Vágner, Aleksey Sadovoy, Babbage B.V., Bill Dengler,
+# Julien Cochuyt, Leonard de Ruijter
 
 from .speech import (
 	_extendSpeechSequence_addMathForTextInfo,
@@ -47,6 +47,7 @@
 	setSpeechMode,
 	speak,
 	speakMessage,
+	speakSsml,
 	speakObject,
 	speakObjectProperties,
 	speakPreselectedText,
@@ -62,7 +63,6 @@
 	spellTextInfo,
 	splitTextIndentation,
 )
-
 from .priorities import Spri
 
 from .types import (
@@ -124,6 +124,7 @@
 	"RE_INDENTATION_SPLIT",
 	"setSpeechMode",
 	"speak",
+	"speakSsml",
 	"speakMessage",
 	"speakObject",
 	"speakObjectProperties",
diff --git a/source/speech/commands.py b/source/speech/commands.py
@@ -2,7 +2,7 @@
 # A part of NonVisual Desktop Access (NVDA)
 # This file is covered by the GNU General Public License.
 # See the file COPYING for more details.
-# Copyright (C) 2006-2020 NV Access Limited
+# Copyright (C) 2006-2023 NV Access Limited, Leonard de Ruijter
 
 """
 Commands that can be embedded in a speech sequence for changing synth parameters, playing sounds or running
@@ -109,6 +109,14 @@ def __init__(self,index):
 	def __repr__(self):
 		return "IndexCommand(%r)" % self.index
 
+	def __eq__(self, __o: object) -> bool:
+		if __o is self:
+			return True
+		if type(self) is not type(__o):
+			return super().__eq__(__o)
+		return self.index == __o.index
+
+
 class SynthParamCommand(SynthCommand):
 	"""A synth command which changes a parameter for subsequent speech.
 	"""
@@ -133,6 +141,14 @@ def __init__(self,state):
 	def __repr__(self):
 		return "CharacterModeCommand(%r)" % self.state
 
+	def __eq__(self, __o: object) -> bool:
+		if __o is self:
+			return True
+		if type(self) is not type(__o):
+			return super().__eq__(__o)
+		return self.state == __o.state
+
+
 class LangChangeCommand(SynthParamCommand):
 	"""A command to switch the language within speech."""
 
@@ -155,6 +171,7 @@ def __eq__(self, __o: object) -> bool:
 			return self.lang == __o.lang
 		return super().__eq__(__o)
 
+
 class BreakCommand(SynthCommand):
 	"""Insert a break between words.
 	"""
@@ -169,6 +186,14 @@ def __init__(self, time: int = 0):
 	def __repr__(self):
 		return f"BreakCommand(time={self.time})"
 
+	def __eq__(self, __o: object) -> bool:
+		if __o is self:
+			return True
+		if type(self) is not type(__o):
+			return super().__eq__(__o)
+		return self.time == __o.time
+
+
 class EndUtteranceCommand(SpeechCommand):
 	"""End the current utterance at this point in the speech.
 	Any text after this will be sent to the synthesizer as a separate utterance.
@@ -264,6 +289,21 @@ def __repr__(self):
 		return "{type}({param})".format(
 			type=type(self).__name__, param=param)
 
+	def __eq__(self, __o: object) -> bool:
+		if __o is self:
+			return True
+		if type(self) is not type(__o):
+			return super().__eq__(__o)
+		return self._offset == __o._offset and self._multiplier == __o._multiplier
+
+	def __ne__(self, __o) -> bool:
+		if __o is self:
+			return False
+		if type(self) is not type(__o):
+			return super().__ne__(__o)
+		return self._offset != __o._offset or self._multiplier != __o._multiplier
+
+
 class PitchCommand(BaseProsodyCommand):
 	"""Change the pitch of the voice.
 	"""
@@ -303,6 +343,14 @@ def __repr__(self):
 			out += ", text=%r" % self.text
 		return out + ")"
 
+	def __eq__(self, __o: object) -> bool:
+		if __o is self:
+			return True
+		if type(self) is not type(__o):
+			return super().__eq__(__o)
+		return self.ipa == __o.ipa and self.text == __o.text
+
+
 class BaseCallbackCommand(SpeechCommand, metaclass=ABCMeta):
 	"""Base class for commands which cause a function to be called when speech reaches them.
 	This class should not be instantiated directly.
diff --git a/source/speech/speech.py b/source/speech/speech.py
@@ -194,6 +194,42 @@ def speakMessage(
 		speak(seq, symbolLevel=None, priority=priority)
 
 
+def _getSpeakSsmlSpeech(
+		ssml: str,
+		_prefixSpeechCommand: Optional[SpeechCommand] = None,
+) -> SpeechSequence:
+	"""Gets the speech sequence for given SSML.
+	@param ssml: The SSML data to speak
+	@param _prefixSpeechCommand: A SpeechCommand to append before the sequence.
+	"""
+	if ssml is None:
+		return []
+	from speechXml import SsmlParser
+	parser = SsmlParser()
+	sequence = parser.convertFromXml(ssml)
+	if sequence:
+		if _prefixSpeechCommand is not None:
+			sequence.insert(0, _prefixSpeechCommand)
+	return sequence
+
+
+def speakSsml(
+		ssml: str,
+		symbolLevel: Optional[int] = None,
+		_prefixSpeechCommand: Optional[SpeechCommand] = None,
+		priority: Optional[Spri] = None
+) -> None:
+	"""Speaks a given speech sequence provided as ssml.
+	@param ssml: The SSML data to speak.
+	@param symbolLevel: The symbol verbosity level.
+	@param _prefixSpeechCommand: A SpeechCommand to append before the sequence.
+	@param priority: The speech priority.
+	"""
+	seq = _getSpeakSsmlSpeech(ssml, _prefixSpeechCommand)
+	if seq:
+		speak(seq, symbolLevel=symbolLevel, priority=priority)
+
+
 def getCurrentLanguage() -> str:
 	synth = getSynth()
 	language=None
diff --git a/source/speech/types.py b/source/speech/types.py
@@ -19,7 +19,7 @@
 from .commands import SpeechCommand
 
 SequenceItemT = Union[SpeechCommand, str]
-SpeechSequence = List[SequenceItemT]
+SpeechSequence = list[SequenceItemT]
 SpeechIterable = Iterable[SequenceItemT]
 
 _IndexT = int  # Type for indexes.
diff --git a/source/speechXml.py b/source/speechXml.py
@@ -1,21 +1,32 @@
 # A part of NonVisual Desktop Access (NVDA)
-# Copyright (C) 2016-2022 NV Access Limited
+# Copyright (C) 2016-2023 NV Access Limited, Leonard de Ruijter
 # This file is covered by the GNU General Public License.
 # See the file COPYING for more details.
 
-"""Utilities for converting NVDA speech sequences to XML.
+"""Utilities for converting NVDA speech sequences to XML and vice versa.
 Several synthesizers accept XML, either SSML or their own schemas.
 L{SpeechXmlConverter} is the base class for conversion to XML.
 You can subclass this to support specific XML schemas.
 L{SsmlConverter} is an implementation for conversion to SSML.
 """
 
-from collections import namedtuple, OrderedDict
 import re
-import speech
+from collections import OrderedDict, namedtuple
+from collections.abc import Callable, Generator
+from xml.parsers import expat
+
 import textUtils
-from speech.commands import LangChangeCommand, SpeechCommand
 from logHandler import log
+from speech.commands import (
+	BreakCommand,
+	CharacterModeCommand,
+	LangChangeCommand,
+	PitchCommand,
+	RateCommand,
+	SpeechCommand,
+	VolumeCommand,
+)
+from speech.types import SpeechSequence
 
 XML_ESCAPES = {
 	0x3C: u"&lt;", # <
@@ -47,6 +58,8 @@ def _buildInvalidXmlRegexp():
 			trailing=trailingSurrogate))
 
 RE_INVALID_XML_CHARS = _buildInvalidXmlRegexp()
+RE_TIME_MS = re.compile(r"^(?P<time>\d+)ms$", re.IGNORECASE)
+RE_PERCENTAGE = re.compile(r"^(?P<percentage>\d+(\.\d+)?)%$")
 REPLACEMENT_CHAR = textUtils.REPLACEMENT_CHAR
 
 
@@ -55,6 +68,13 @@ def toXmlLang(nvdaLang: str) -> str:
 	"""
 	return nvdaLang.replace("_", "-")
 
+
+def toNvdaLang(xmlLang: str) -> str:
+	"""Convert an XML language to an NVDA language.
+	"""
+	return xmlLang.replace("-", "_")
+
+
 #: An XMLBalancer command to enclose the entire output in a tag.
 #: This must be the first command.
 EncloseAllCommand = namedtuple("EncloseAllCommand", ("tag", "attrs"))
@@ -79,7 +99,8 @@ def _escapeXml(text):
 	text = RE_INVALID_XML_CHARS.sub(REPLACEMENT_CHAR, text)
 	return text
 
-class XmlBalancer(object):
+
+class XmlBalancer:
 	"""Generates balanced XML given a set of commands.
 	NVDA speech sequences are linear, but XML is hierarchical, which makes conversion challenging.
 	For example, a speech sequence might change the pitch, then change the volume, then reset the pitch to default.
@@ -185,7 +206,8 @@ def generateXml(self, commands) -> str:
 			self._closeTag(tag)
 		return u"".join(self._out)
 
-class SpeechXmlConverter(object):
+
+class SpeechXmlConverter:
 	"""Base class for conversion of NVDA speech sequences to XML.
 	This class converts an NVDA speech sequence into XmlBalancer commands
 	which can then be passed to L{XmlBalancer} to produce correct XML.
@@ -232,6 +254,7 @@ def convertToXml(self, speechSequence):
 		balCommands = self.generateBalancerCommands(speechSequence)
 		return bal.generateXml(balCommands)
 
+
 class SsmlConverter(SpeechXmlConverter):
 	"""Converts an NVDA speech sequence to SSML.
 	"""
@@ -280,3 +303,109 @@ def convertVolumeCommand(self, command):
 
 	def convertPhonemeCommand(self, command):
 		return StandAloneTagCommand("phoneme", {"alphabet": "ipa", "ph": command.ipa}, command.text)
+
+
+class SpeechXmlParser:
+	"""Base class for parsing of NVDA speech sequences from XML.
+	This class converts XML to an NVDA speech sequence.
+
+	Callers can call L{convertFromXml} with XML to generate a speech sequence.
+
+	Subclasses implement specific XML schemas by implementing generators which convert each XML tag supported.
+	The method for a tag should be named with the prefix "parse" followed by the tag.
+	For example, the handler for <volume /> should be named C{parseVolume}.
+	These generators receive an optional dictionary containing the attributes and values.
+	When the attributes value is None, it is a closing tag.
+	They should yield one or more appropriate SPeechCommand instances.
+	"""
+
+	_speechSequence: SpeechSequence
+
+	def _elementHandler(self, tagName: str, attrs: dict | None = None):
+		processedTagName = "".join(tagName.title().split("-"))
+		funcName = f"parse{processedTagName}"
+		if (func := getattr(self, funcName, None)) is None:
+			log.debugWarning(f"Unsupported tag: {tagName}")
+			return
+		for command in func(attrs):
+			# If the last command in the sequence is of the same type, we can remove it.
+			if self._speechSequence and type(self._speechSequence[-1]) is type(command):
+				self._speechSequence.pop()
+			# Look up the previous command of the same class, if any.
+			# If the last instance of this command in the sequence is equal to this command, we don't have to add it.
+			prevCommand = next((c for c in reversed(self._speechSequence) if type(c) is type(command)), None)
+			if prevCommand != command:
+				self._speechSequence.append(command)
+
+	def convertFromXml(self, xml: str) -> SpeechSequence:
+		"""Convert XML to a speech sequence.
+		"""
+		self._speechSequence = SpeechSequence()
+		parser = expat.ParserCreate('utf-8')
+		parser.StartElementHandler = parser.EndElementHandler = self._elementHandler
+		parser.CharacterDataHandler = self._speechSequence.append
+		try:
+			parser.Parse(xml)
+		except Exception as e:
+			raise ValueError(f"XML: {xml}") from e
+		return self._speechSequence
+
+
+ParseGeneratorT = Generator[SpeechCommand, None, None]
+ParseFuncT = Callable[[dict[str, str] | None], ParseGeneratorT]
+
+
+class SsmlParser(SpeechXmlParser):
+	"""Parses SSML into an NVDA speech sequence.
+	"""
+
+	def parseSayAs(self, attrs: dict[str, str] | None) -> ParseGeneratorT:
+		state = attrs is not None and attrs.get("interpret-as") == "characters"
+		yield CharacterModeCommand(state)
+
+	def parseVoice(self, attrs: dict[str, str] | None) -> ParseGeneratorT:
+		if attrs is None:
+			return None
+		if (xmlLang := attrs.get("xml:lang")) is None:
+			return None
+		yield LangChangeCommand(toNvdaLang(xmlLang))
+
+	def parseBreak(self, attrs: dict[str, str] | None) -> ParseGeneratorT:
+		if attrs is None or "time" not in attrs:
+			return None
+		if (time := RE_TIME_MS.match(attrs["time"])) is None:
+			log.debugWarning(f"Unknown attributes for break tag: {attrs}")
+			return None
+		yield BreakCommand(int(time.group("time")))
+
+	_cachedProsodyAttrs: list[dict]
+
+	def parseProsody(self, attrs: dict[str, str] | None) -> ParseGeneratorT:
+		if isOpenTag := attrs is not None:
+			self._cachedProsodyAttrs.append(attrs)
+		else:  # attrs is None
+			# Pop the attrs from the cache so we can add commands to reset them.
+			attrs = self._cachedProsodyAttrs.pop()
+		for attr, val in attrs.items():
+			if (percentage := RE_PERCENTAGE.match(val)) is None:
+				log.debugWarning(f"Attribute {attr!r} for prosody tag has unparseable value: {val!r}")
+				continue
+			multiplier = float(percentage.group("percentage")) / 100 if isOpenTag else 1
+			match attr:
+				case "pitch":
+					yield PitchCommand(multiplier=multiplier)
+				case "volume":
+					yield VolumeCommand(multiplier=multiplier)
+				case "rate":
+					yield RateCommand(multiplier=multiplier)
+				case _:
+					log.debugWarning(f"Unknown prosody attribute: {attr!r}")
+					continue
+
+	def parseSpeak(self, attrs: dict[str, str] | None) -> ParseGeneratorT:
+		return
+		yield
+
+	def convertFromXml(self, xml: str) -> SpeechSequence:
+		self._cachedProsodyAttrs = []
+		return super().convertFromXml(xml)
diff --git a/source/winAPI/constants.py b/source/winAPI/constants.py
@@ -15,6 +15,9 @@ class HResult(enum.IntEnum):
 
 class SystemErrorCodes(enum.IntEnum):
 	# https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
+	SUCCESS = 0x0
 	ACCESS_DENIED = 0x5
+	INVALID_DATA = 0xD
+	NOT_READY = 0x15
 	INVALID_PARAMETER = 0x57
 	MOD_NOT_FOUND = 0x7E
diff --git a/tests/unit/test_speechXml.py b/tests/unit/test_speechXml.py
diff --git a/user_docs/en/changes.t2t b/user_docs/en/changes.t2t