11# A part of NonVisual Desktop Access (NVDA)
22# Copyright (C) 2010-2024 NV Access Limited, World Light Information Limited,
3- # Hong Kong Blind Union, Babbage B.V., Julien Cochuyt, Cyrille Bougot
3+ # Hong Kong Blind Union, Babbage B.V., Julien Cochuyt, Cyrille Bougot, Leonard de Ruijter
44# This file is covered by the GNU General Public License.
55# See the file COPYING for more details.
66
7+ import dataclasses
78from enum import IntEnum
9+ from functools import cached_property
10+ import glob
811import os
912import codecs
1013import collections
1114import re
1215from typing import (
1316 Callable ,
1417 Dict ,
18+ Generator ,
1519 Generic ,
1620 List ,
1721 Optional ,
18- Tuple ,
1922 TypeVar ,
2023)
2124
@@ -53,17 +56,17 @@ def fetchLocaleData(self, locale: str, fallback: bool = True) -> _LocaleDataT:
5356 localeList = [locale ]
5457 if fallback and "_" in locale :
5558 localeList .append (locale .split ("_" )[0 ])
56- for l in localeList : # noqa: E741
57- data = self ._dataMap .get (l )
59+ for loc in localeList :
60+ data = self ._dataMap .get (loc )
5861 if data :
59- return data # noqa: E701
62+ return data
6063 try :
61- data = self ._localeDataFactory (l )
64+ data = self ._localeDataFactory (loc )
6265 except LookupError :
6366 data = None
6467 if not data :
65- continue # noqa: E701
66- self ._dataMap [l ] = data
68+ continue
69+ self ._dataMap [loc ] = data
6770 return data
6871 raise LookupError (locale )
6972
@@ -220,7 +223,7 @@ def __repr__(self):
220223 return "SpeechSymbol(%s)" % ", " .join (attrs )
221224
222225
223- class SpeechSymbols ( object ) :
226+ class SpeechSymbols :
224227 """
225228 Contains raw information about the pronunciation of symbols.
226229 It does not handle inheritance of data from other sources, processing of text, etc.
@@ -235,9 +238,9 @@ def __init__(self):
235238
236239 def load (self , fileName : str , allowComplexSymbols : bool = True ) -> None :
237240 """Load symbol information from a file.
238- @ param fileName: The name of the file from which to load symbol information.
239- @ param allowComplexSymbols: Whether to allow complex symbols.
240- @ raise IOError: If the file cannot be read.
241+ : param fileName: The name of the file from which to load symbol information.
242+ : param allowComplexSymbols: Whether to allow complex symbols.
243+ : raise IOError: If the file cannot be read.
241244 """
242245 self .fileName = fileName
243246 with codecs .open (fileName , "r" , "utf_8_sig" , errors = "replace" ) as f :
@@ -340,11 +343,11 @@ def _loadSymbol(self, line):
340343 def save (self , fileName = None ):
341344 """Save symbol information to a file.
342345 @param fileName: The name of the file to which to save symbol information,
343- C{None} to use the file name last passed to L{load} or L{save}.
346+ C{None} to use the file name last passed to L{load} or L{save}.
344347 @type fileName: str
345348 @raise IOError: If the file cannot be written.
346349 @raise ValueError: If C{fileName} is C{None}
347- and L{load} or L{save} has not been called.
350+ and L{load} or L{save} has not been called.
348351 """
349352 if fileName :
350353 self .fileName = fileName
@@ -402,82 +405,56 @@ def _saveSymbol(self, symbol):
402405 return "\t " .join (fields )
403406
404407
405- _noSymbolLocalesCache = set ()
406- _noCLDRLocalesCache = set ()
407-
408-
409- def _getSpeechSymbolsForLocale (locale : str ) -> Tuple [SpeechSymbols , SpeechSymbols ]:
410- if locale in _noSymbolLocalesCache and (
411- locale in _noCLDRLocalesCache or not config .conf ["speech" ]["includeCLDR" ]
412- ):
413- raise LookupError
414- builtinDataImported = False
415- builtin = SpeechSymbols ()
416- if config .conf ["speech" ]["includeCLDR" ]:
417- # Try to load CLDR data when processing is on.
418- # Load the data before loading other symbols,
419- # in order to allow translators to override them.
408+ def _getSpeechSymbolsForLocale (locale : str ) -> list [SpeechSymbols ]:
409+ symbols : list [SpeechSymbols ] = []
410+ for definition in getAvailableSymbolDictionaryDefinitions ():
411+ if not definition .enabled :
412+ continue
420413 try :
421- builtin .load (
422- os .path .join (globalVars .appDir , "locale" , locale , "cldr.dic" ),
423- allowComplexSymbols = False ,
414+ symbols .append (definition .getSymbols (locale ))
415+ except (LookupError , FileNotFoundError ):
416+ log .debugWarning (
417+ f"Error loading { definition .name !r} symbols for locale { locale !r} " ,
418+ exc_info = True ,
424419 )
425- builtinDataImported = True
426- except IOError :
427- _noCLDRLocalesCache .add (locale )
428- log .debugWarning ("No CLDR data for locale %s" % locale )
429- try :
430- builtin .load (os .path .join (globalVars .appDir , "locale" , locale , "symbols.dic" ))
431- builtinDataImported = True
432- except IOError :
433- _noSymbolLocalesCache .add (locale )
434- log .debugWarning ("No symbol data for locale %s" % locale )
435- if not builtinDataImported :
436- raise LookupError ("No symbol information for locale %s" % locale )
437- user = SpeechSymbols ()
438- pathToSymbolsDic = WritePaths .getSymbolsConfigFile (locale )
439- try :
440- # Don't allow users to specify complex symbols
441- # because an error will cause the whole processor to fail.
442- user .load (pathToSymbolsDic , allowComplexSymbols = False )
443- except IOError :
444- # An empty user SpeechSymbols is okay.
445- pass
446- return builtin , user
420+ return symbols
421+ if len (symbols ) <= 1 :
422+ raise LookupError (f"No symbol information for locale { locale !r} " )
423+ return symbols
447424
448425
449- class SpeechSymbolProcessor ( object ) :
426+ class SpeechSymbolProcessor :
450427 """
451428 Handles processing of symbol pronunciation for a locale.
452429 Pronunciation information is taken from one or more L{SpeechSymbols} instances.
453430 """
454431
455432 #: Caches symbol data for locales.
456- localeSymbols : LocaleDataMap [Tuple [SpeechSymbols , SpeechSymbols ]] = LocaleDataMap (
457- _getSpeechSymbolsForLocale ,
458- )
433+ localeSymbols : LocaleDataMap [list [SpeechSymbols ]] = LocaleDataMap (_getSpeechSymbolsForLocale )
434+ sources : list [SpeechSymbols ]
459435
460- def __init__ (self , locale ):
436+ def __init__ (self , locale : str ):
461437 """Constructor.
462438 @param locale: The locale for which symbol pronunciation should be processed.
463- @type locale: str
464439 """
465440 self .locale = locale
466441
467442 # We need to merge symbol data from several sources.
468443 sources = self .sources = []
469- builtin , user = self .localeSymbols .fetchLocaleData (locale , fallback = False )
470- self .builtinSources = [builtin ]
471- self .userSymbols = user
472- sources .append (user )
473- sources .append (builtin )
444+ fetched = self .localeSymbols .fetchLocaleData (locale , fallback = False )
445+ # A slice that reverses a list and ignores the last item (which is the user dictionary)
446+ builtinSlice = slice (- 2 , None , - 1 )
447+ self .builtinSources = fetched [builtinSlice ]
448+ self .userSymbols = fetched [- 1 ]
449+ sources .append (self .userSymbols )
450+ sources .extend (self .builtinSources )
474451
475452 # Always use English as a base.
476453 if locale != "en" :
477454 # Only the builtin data.
478- enBaseSymbols = self .localeSymbols .fetchLocaleData ("en" )[0 ]
479- sources .append ( enBaseSymbols )
480- self .builtinSources .append ( enBaseSymbols )
455+ enBuiltin = self .localeSymbols .fetchLocaleData ("en" )[builtinSlice ]
456+ sources .extend ( enBuiltin )
457+ self .builtinSources .extend ( enBuiltin )
481458
482459 # The computed symbol information from all sources.
483460 symbols = self .computedSymbols = collections .OrderedDict ()
@@ -725,7 +702,7 @@ def isBuiltin(self, symbolIdentifier: str) -> bool:
725702 """Determine whether a symbol is built in.
726703 @param symbolIdentifier: The identifier of the symbol in question.
727704 @return: C{True} if the symbol is built in,
728- C{False} if it was added by the user.
705+ C{False} if it was added by the user.
729706 """
730707 return any (symbolIdentifier in source .symbols for source in self .builtinSources )
731708
@@ -779,9 +756,146 @@ def clearSpeechSymbols():
779756def handlePostConfigProfileSwitch (prevConf = None ):
780757 if not prevConf :
781758 return
782- if prevConf ["speech" ]["includeCLDR" ] is not config .conf ["speech" ]["includeCLDR" ] :
783- # Either included or excluded CLDR data , so clear the cache.
759+ if set ( prevConf ["speech" ]["symbolDictionaries" ]) != set ( config .conf ["speech" ]["symbolDictionaries" ]) :
760+ # Either included or excluded dictionaries , so clear the cache.
784761 clearSpeechSymbols ()
785762
786763
787- config .post_configProfileSwitch .register (handlePostConfigProfileSwitch )
764+ @dataclasses .dataclass (frozen = True , kw_only = True )
765+ class SymbolDictionaryDefinition :
766+ name : str
767+ """The name of the dictionary."""
768+ path : str
769+ """The path to the dictionary.
770+ This should be a formattable string, where {locale} is replaced by the locale to fetch a dictionary for.
771+ """
772+ displayName : str | None = None
773+ """The translatable name of the dictionary.
774+ When not provided, the dictionary can not be visible to the end user.
775+ """
776+ allowComplexSymbols : bool = False
777+ """Whether this dictionary allows complex symbols."""
778+ mandatory : bool = False
779+ """Whether this dictionary is mandatory.
780+ Mandatory dictionaries are always enabled."""
781+ user : bool = False
782+ """Whether this is a user dictionary definition."""
783+ symbols : LocaleDataMap [SpeechSymbols ] = dataclasses .field (init = False , repr = False , compare = False )
784+
785+ def __post_init__ (self ):
786+ if self .path .count ("{locale}" ) != 1 :
787+ raise ValueError (f"Invalid formatable path for dictionary: { self .path !r} " )
788+ if not self .displayName and not self .mandatory :
789+ raise ValueError ("A nonmandatory dictionary without a display name is unsupported" )
790+ object .__setattr__ (self , "symbols" , LocaleDataMap (self ._getSymbols ))
791+
792+ @cached_property
793+ def userVisible (self ) -> bool :
794+ """Whether this dictionary is visible to end users (i.e. in the GUI).
795+ Mandatory dictionaries are hidden.
796+ """
797+ return not self .mandatory and bool (self .displayName )
798+
799+ @property
800+ def enabled (self ) -> bool :
801+ return self .mandatory or self .name in config .conf ["speech" ]["symbolDictionaries" ]
802+
803+ def getSymbols (self , locale : str ) -> SpeechSymbols :
804+ """Gets the symbols for a given locale.
805+ :param locale: The locale to get symbols for.
806+ :raises FileNotFoundError: When this is not a user dictionary and the locale wasn't found.
807+ """
808+ return self .symbols .fetchLocaleData (locale , fallback = False )
809+
810+ def _getSymbols (self , locale : str ) -> SpeechSymbols :
811+ raiseOnError = not self .user
812+ symbols = SpeechSymbols ()
813+ if locale not in self .availableLocales :
814+ msg = f"No { self .name !r} data for locale { locale !r} "
815+ if raiseOnError :
816+ raise FileNotFoundError (msg )
817+ log .debug (msg )
818+ else :
819+ try :
820+ symbols .load (self .path .format (locale = locale ), self .allowComplexSymbols )
821+ except IOError :
822+ if raiseOnError :
823+ raise
824+ log .error (f"Error loading { self .name !r} data for locale { locale !r} " , exc_info = True )
825+ return symbols
826+
827+ @cached_property
828+ def availableLocales (self ) -> dict [str , str ]:
829+ """Gets dictionary paths for all available locales."""
830+ prefix , suffix = self .path .split ("{locale}" , 1 )
831+ pattern = f"{ prefix } *{ suffix } "
832+ paths = glob .glob (pattern )
833+ dct = {}
834+ for p in paths :
835+ locale = p [len (prefix ) : (- 1 * len (suffix ))]
836+ dct [locale ] = p
837+ return dct
838+
839+
840+ _symbolDictionaryDefinitions : list [SymbolDictionaryDefinition ] = []
841+
842+
843+ def getAvailableSymbolDictionaryDefinitions () -> Generator [SymbolDictionaryDefinition , None , None ]:
844+ yield from _symbolDictionaryDefinitions
845+
846+
847+ def initialize ():
848+ # Add symbol dictionary definitions
849+ _symbolDictionaryDefinitions .append (
850+ SymbolDictionaryDefinition (
851+ name = "cldr" ,
852+ path = os .path .join (globalVars .appDir , "locale" , "{locale}" , "cldr.dic" ),
853+ # Translators: The name of a symbols dictionary with data from the unicode CLDR.
854+ displayName = _ ("Unicode Consortium data (including emoji)" ),
855+ ),
856+ )
857+ _symbolDictionaryDefinitions .append (
858+ SymbolDictionaryDefinition (
859+ name = "builtin" ,
860+ path = os .path .join (globalVars .appDir , "locale" , "{locale}" , "symbols.dic" ),
861+ allowComplexSymbols = True ,
862+ mandatory = True ,
863+ ),
864+ )
865+ import addonHandler
866+
867+ for addon in addonHandler .getRunningAddons ():
868+ symbolsDict = addon .manifest .get ("symbolDictionaries" )
869+ if not symbolsDict :
870+ continue
871+ log .debug (f"Found { len (symbolsDict )} symbol dictionary entries in manifest for add-on { addon .name !r} " )
872+ directory = os .path .join (addon .path , "locale" , "{locale}" )
873+ for name , dictConfig in symbolsDict .items ():
874+ definition = SymbolDictionaryDefinition (
875+ name = name ,
876+ path = os .path .join (directory , "symbols-{name}.dic" ),
877+ displayName = dictConfig ["displayName" ],
878+ allowComplexSymbols = False ,
879+ mandatory = dictConfig ["mandatory" ],
880+ )
881+ if not definition .availableLocales :
882+ log .error (f"No { name !r} symbol dictionary files found for add-on { addon .name !r} " )
883+ continue
884+ _symbolDictionaryDefinitions .append (definition )
885+ _symbolDictionaryDefinitions .append (
886+ SymbolDictionaryDefinition (
887+ name = "user" ,
888+ path = WritePaths .getSymbolsConfigFile ("{locale}" ),
889+ allowComplexSymbols = False ,
890+ mandatory = True ,
891+ user = True ,
892+ ),
893+ )
894+
895+ config .post_configProfileSwitch .register (handlePostConfigProfileSwitch )
896+
897+
898+ def terminate ():
899+ config .post_configProfileSwitch .unregister (handlePostConfigProfileSwitch )
900+ clearSpeechSymbols ()
901+ _symbolDictionaryDefinitions .clear ()
0 commit comments