11# A part of NonVisual Desktop Access (NVDA)
2- # Copyright (C) 2016-2022 NV Access Limited
2+ # Copyright (C) 2016-2023 NV Access Limited, Leonard de Ruijter
33# This file is covered by the GNU General Public License.
44# See the file COPYING for more details.
55
6- """Utilities for converting NVDA speech sequences to XML.
6+ """Utilities for converting NVDA speech sequences to XML and vice versa .
77Several synthesizers accept XML, either SSML or their own schemas.
88L{SpeechXmlConverter} is the base class for conversion to XML.
99You can subclass this to support specific XML schemas.
1010L{SsmlConverter} is an implementation for conversion to SSML.
1111"""
1212
13- from collections import namedtuple , OrderedDict
1413import re
15- import speech
14+ from collections import OrderedDict , namedtuple
15+ from collections .abc import Callable , Generator
16+ from xml .parsers import expat
17+
1618import textUtils
17- from speech .commands import LangChangeCommand , SpeechCommand
1819from logHandler import log
20+ from speech .commands import (
21+ BreakCommand ,
22+ CharacterModeCommand ,
23+ LangChangeCommand ,
24+ PitchCommand ,
25+ RateCommand ,
26+ SpeechCommand ,
27+ VolumeCommand ,
28+ )
29+ from speech .types import SpeechSequence
1930
2031XML_ESCAPES = {
2132 0x3C : u"<" , # <
@@ -47,6 +58,8 @@ def _buildInvalidXmlRegexp():
4758 trailing = trailingSurrogate ))
4859
4960RE_INVALID_XML_CHARS = _buildInvalidXmlRegexp ()
61+ RE_TIME_MS = re .compile (r"^(?P<time>\d+)ms$" , re .IGNORECASE )
62+ RE_PERCENTAGE = re .compile (r"^(?P<percentage>\d+(\.\d+)?)%$" )
5063REPLACEMENT_CHAR = textUtils .REPLACEMENT_CHAR
5164
5265
@@ -55,6 +68,13 @@ def toXmlLang(nvdaLang: str) -> str:
5568 """
5669 return nvdaLang .replace ("_" , "-" )
5770
71+
72+ def toNvdaLang (xmlLang : str ) -> str :
73+ """Convert an XML language to an NVDA language.
74+ """
75+ return xmlLang .replace ("-" , "_" )
76+
77+
5878#: An XMLBalancer command to enclose the entire output in a tag.
5979#: This must be the first command.
6080EncloseAllCommand = namedtuple ("EncloseAllCommand" , ("tag" , "attrs" ))
@@ -79,7 +99,8 @@ def _escapeXml(text):
7999 text = RE_INVALID_XML_CHARS .sub (REPLACEMENT_CHAR , text )
80100 return text
81101
82- class XmlBalancer (object ):
102+
103+ class XmlBalancer :
83104 """Generates balanced XML given a set of commands.
84105 NVDA speech sequences are linear, but XML is hierarchical, which makes conversion challenging.
85106 For example, a speech sequence might change the pitch, then change the volume, then reset the pitch to default.
@@ -185,7 +206,8 @@ def generateXml(self, commands) -> str:
185206 self ._closeTag (tag )
186207 return u"" .join (self ._out )
187208
188- class SpeechXmlConverter (object ):
209+
210+ class SpeechXmlConverter :
189211 """Base class for conversion of NVDA speech sequences to XML.
190212 This class converts an NVDA speech sequence into XmlBalancer commands
191213 which can then be passed to L{XmlBalancer} to produce correct XML.
@@ -232,6 +254,7 @@ def convertToXml(self, speechSequence):
232254 balCommands = self .generateBalancerCommands (speechSequence )
233255 return bal .generateXml (balCommands )
234256
257+
235258class SsmlConverter (SpeechXmlConverter ):
236259 """Converts an NVDA speech sequence to SSML.
237260 """
@@ -280,3 +303,109 @@ def convertVolumeCommand(self, command):
280303
281304 def convertPhonemeCommand (self , command ):
282305 return StandAloneTagCommand ("phoneme" , {"alphabet" : "ipa" , "ph" : command .ipa }, command .text )
306+
307+
308+ class SpeechXmlParser :
309+ """Base class for parsing of NVDA speech sequences from XML.
310+ This class converts XML to an NVDA speech sequence.
311+
312+ Callers can call L{convertFromXml} with XML to generate a speech sequence.
313+
314+ Subclasses implement specific XML schemas by implementing generators which convert each XML tag supported.
315+ The method for a tag should be named with the prefix "parse" followed by the tag.
316+ For example, the handler for <volume /> should be named C{parseVolume}.
317+ These generators receive an optional dictionary containing the attributes and values.
318+ When the attributes value is None, it is a closing tag.
319+ They should yield one or more appropriate SPeechCommand instances.
320+ """
321+
322+ _speechSequence : SpeechSequence
323+
324+ def _elementHandler (self , tagName : str , attrs : dict | None = None ):
325+ processedTagName = "" .join (tagName .title ().split ("-" ))
326+ funcName = f"parse{ processedTagName } "
327+ if (func := getattr (self , funcName , None )) is None :
328+ log .debugWarning (f"Unsupported tag: { tagName } " )
329+ return
330+ for command in func (attrs ):
331+ # If the last command in the sequence is of the same type, we can remove it.
332+ if self ._speechSequence and type (self ._speechSequence [- 1 ]) is type (command ):
333+ self ._speechSequence .pop ()
334+ # Look up the previous command of the same class, if any.
335+ # If the last instance of this command in the sequence is equal to this command, we don't have to add it.
336+ prevCommand = next ((c for c in reversed (self ._speechSequence ) if type (c ) is type (command )), None )
337+ if prevCommand != command :
338+ self ._speechSequence .append (command )
339+
340+ def convertFromXml (self , xml : str ) -> SpeechSequence :
341+ """Convert XML to a speech sequence.
342+ """
343+ self ._speechSequence = SpeechSequence ()
344+ parser = expat .ParserCreate ('utf-8' )
345+ parser .StartElementHandler = parser .EndElementHandler = self ._elementHandler
346+ parser .CharacterDataHandler = self ._speechSequence .append
347+ try :
348+ parser .Parse (xml )
349+ except Exception as e :
350+ raise ValueError (f"XML: { xml } " ) from e
351+ return self ._speechSequence
352+
353+
354+ ParseGeneratorT = Generator [SpeechCommand , None , None ]
355+ ParseFuncT = Callable [[dict [str , str ] | None ], ParseGeneratorT ]
356+
357+
358+ class SsmlParser (SpeechXmlParser ):
359+ """Parses SSML into an NVDA speech sequence.
360+ """
361+
362+ def parseSayAs (self , attrs : dict [str , str ] | None ) -> ParseGeneratorT :
363+ state = attrs is not None and attrs .get ("interpret-as" ) == "characters"
364+ yield CharacterModeCommand (state )
365+
366+ def parseVoice (self , attrs : dict [str , str ] | None ) -> ParseGeneratorT :
367+ if attrs is None :
368+ return None
369+ if (xmlLang := attrs .get ("xml:lang" )) is None :
370+ return None
371+ yield LangChangeCommand (toNvdaLang (xmlLang ))
372+
373+ def parseBreak (self , attrs : dict [str , str ] | None ) -> ParseGeneratorT :
374+ if attrs is None or "time" not in attrs :
375+ return None
376+ if (time := RE_TIME_MS .match (attrs ["time" ])) is None :
377+ log .debugWarning (f"Unknown attributes for break tag: { attrs } " )
378+ return None
379+ yield BreakCommand (int (time .group ("time" )))
380+
381+ _cachedProsodyAttrs : list [dict ]
382+
383+ def parseProsody (self , attrs : dict [str , str ] | None ) -> ParseGeneratorT :
384+ if isOpenTag := attrs is not None :
385+ self ._cachedProsodyAttrs .append (attrs )
386+ else : # attrs is None
387+ # Pop the attrs from the cache so we can add commands to reset them.
388+ attrs = self ._cachedProsodyAttrs .pop ()
389+ for attr , val in attrs .items ():
390+ if (percentage := RE_PERCENTAGE .match (val )) is None :
391+ log .debugWarning (f"Attribute { attr !r} for prosody tag has unparseable value: { val !r} " )
392+ continue
393+ multiplier = float (percentage .group ("percentage" )) / 100 if isOpenTag else 1
394+ match attr :
395+ case "pitch" :
396+ yield PitchCommand (multiplier = multiplier )
397+ case "volume" :
398+ yield VolumeCommand (multiplier = multiplier )
399+ case "rate" :
400+ yield RateCommand (multiplier = multiplier )
401+ case _:
402+ log .debugWarning (f"Unknown prosody attribute: { attr !r} " )
403+ continue
404+
405+ def parseSpeak (self , attrs : dict [str , str ] | None ) -> ParseGeneratorT :
406+ return
407+ yield
408+
409+ def convertFromXml (self , xml : str ) -> SpeechSequence :
410+ self ._cachedProsodyAttrs = []
411+ return super ().convertFromXml (xml )
0 commit comments