Skip to content

Commit 11e0eb3

Browse files
authored
Merge ee95170 into 4c50375
2 parents 4c50375 + ee95170 commit 11e0eb3

6 files changed

Lines changed: 262 additions & 98 deletions

File tree

source/addonHandler/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,13 @@ class AddonManifest(ConfigObj):
10141014
input = boolean(default=true)
10151015
output = boolean(default=true)
10161016
1017+
# Symbol Pronunciation
1018+
[symbolDictionaries]
1019+
# The key is the symbol dictionary file name (not the full path)
1020+
[[__many__]]
1021+
displayName = string()
1022+
mandatory = boolean(default=false)
1023+
10171024
# NOTE: apiVersion:
10181025
# EG: 2019.1.0 or 0.0.0
10191026
# Must have 3 integers separated by dots.
@@ -1054,6 +1061,10 @@ def __init__(self, input, translatedInput=None):
10541061
value = tableConfig.get("displayName")
10551062
if value:
10561063
self["brailleTables"][fileName]["displayName"] = value
1064+
for fileName, dictConfig in self._translatedConfig.get("symbolDictionaries", {}).items():
1065+
value = dictConfig.get("displayName")
1066+
if value:
1067+
self["symbolDictionaries"][fileName]["displayName"] = value
10571068

10581069
@property
10591070
def errors(self):

source/characterProcessing.py

Lines changed: 186 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
11
# A part of NonVisual Desktop Access (NVDA)
22
# Copyright (C) 2010-2024 NV Access Limited, World Light Information Limited,
3-
# Hong Kong Blind Union, Babbage B.V., Julien Cochuyt, Cyrille Bougot
3+
# Hong Kong Blind Union, Babbage B.V., Julien Cochuyt, Cyrille Bougot, Leonard de Ruijter
44
# This file is covered by the GNU General Public License.
55
# See the file COPYING for more details.
66

7+
import dataclasses
78
from enum import IntEnum
9+
from functools import cached_property
10+
import glob
811
import os
912
import codecs
1013
import collections
1114
import re
1215
from typing import (
1316
Callable,
1417
Dict,
18+
Generator,
1519
Generic,
1620
List,
1721
Optional,
18-
Tuple,
1922
TypeVar,
2023
)
2124

@@ -53,17 +56,17 @@ def fetchLocaleData(self, locale: str, fallback: bool = True) -> _LocaleDataT:
5356
localeList = [locale]
5457
if fallback and "_" in locale:
5558
localeList.append(locale.split("_")[0])
56-
for l in localeList: # noqa: E741
57-
data = self._dataMap.get(l)
59+
for loc in localeList:
60+
data = self._dataMap.get(loc)
5861
if data:
59-
return data # noqa: E701
62+
return data
6063
try:
61-
data = self._localeDataFactory(l)
64+
data = self._localeDataFactory(loc)
6265
except LookupError:
6366
data = None
6467
if not data:
65-
continue # noqa: E701
66-
self._dataMap[l] = data
68+
continue
69+
self._dataMap[loc] = data
6770
return data
6871
raise LookupError(locale)
6972

@@ -220,7 +223,7 @@ def __repr__(self):
220223
return "SpeechSymbol(%s)" % ", ".join(attrs)
221224

222225

223-
class SpeechSymbols(object):
226+
class SpeechSymbols:
224227
"""
225228
Contains raw information about the pronunciation of symbols.
226229
It does not handle inheritance of data from other sources, processing of text, etc.
@@ -235,9 +238,9 @@ def __init__(self):
235238

236239
def load(self, fileName: str, allowComplexSymbols: bool = True) -> None:
237240
"""Load symbol information from a file.
238-
@param fileName: The name of the file from which to load symbol information.
239-
@param allowComplexSymbols: Whether to allow complex symbols.
240-
@raise IOError: If the file cannot be read.
241+
:param fileName: The name of the file from which to load symbol information.
242+
:param allowComplexSymbols: Whether to allow complex symbols.
243+
:raise IOError: If the file cannot be read.
241244
"""
242245
self.fileName = fileName
243246
with codecs.open(fileName, "r", "utf_8_sig", errors="replace") as f:
@@ -340,11 +343,11 @@ def _loadSymbol(self, line):
340343
def save(self, fileName=None):
341344
"""Save symbol information to a file.
342345
@param fileName: The name of the file to which to save symbol information,
343-
C{None} to use the file name last passed to L{load} or L{save}.
346+
C{None} to use the file name last passed to L{load} or L{save}.
344347
@type fileName: str
345348
@raise IOError: If the file cannot be written.
346349
@raise ValueError: If C{fileName} is C{None}
347-
and L{load} or L{save} has not been called.
350+
and L{load} or L{save} has not been called.
348351
"""
349352
if fileName:
350353
self.fileName = fileName
@@ -402,82 +405,56 @@ def _saveSymbol(self, symbol):
402405
return "\t".join(fields)
403406

404407

405-
_noSymbolLocalesCache = set()
406-
_noCLDRLocalesCache = set()
407-
408-
409-
def _getSpeechSymbolsForLocale(locale: str) -> Tuple[SpeechSymbols, SpeechSymbols]:
410-
if locale in _noSymbolLocalesCache and (
411-
locale in _noCLDRLocalesCache or not config.conf["speech"]["includeCLDR"]
412-
):
413-
raise LookupError
414-
builtinDataImported = False
415-
builtin = SpeechSymbols()
416-
if config.conf["speech"]["includeCLDR"]:
417-
# Try to load CLDR data when processing is on.
418-
# Load the data before loading other symbols,
419-
# in order to allow translators to override them.
408+
def _getSpeechSymbolsForLocale(locale: str) -> list[SpeechSymbols]:
409+
symbols: list[SpeechSymbols] = []
410+
for definition in getAvailableSymbolDictionaryDefinitions():
411+
if not definition.enabled:
412+
continue
420413
try:
421-
builtin.load(
422-
os.path.join(globalVars.appDir, "locale", locale, "cldr.dic"),
423-
allowComplexSymbols=False,
414+
symbols.append(definition.getSymbols(locale))
415+
except (LookupError, FileNotFoundError):
416+
log.debugWarning(
417+
f"Error loading {definition.name!r} symbols for locale {locale!r}",
418+
exc_info=True,
424419
)
425-
builtinDataImported = True
426-
except IOError:
427-
_noCLDRLocalesCache.add(locale)
428-
log.debugWarning("No CLDR data for locale %s" % locale)
429-
try:
430-
builtin.load(os.path.join(globalVars.appDir, "locale", locale, "symbols.dic"))
431-
builtinDataImported = True
432-
except IOError:
433-
_noSymbolLocalesCache.add(locale)
434-
log.debugWarning("No symbol data for locale %s" % locale)
435-
if not builtinDataImported:
436-
raise LookupError("No symbol information for locale %s" % locale)
437-
user = SpeechSymbols()
438-
pathToSymbolsDic = WritePaths.getSymbolsConfigFile(locale)
439-
try:
440-
# Don't allow users to specify complex symbols
441-
# because an error will cause the whole processor to fail.
442-
user.load(pathToSymbolsDic, allowComplexSymbols=False)
443-
except IOError:
444-
# An empty user SpeechSymbols is okay.
445-
pass
446-
return builtin, user
420+
return symbols
421+
if len(symbols) <= 1:
422+
raise LookupError(f"No symbol information for locale {locale!r}")
423+
return symbols
447424

448425

449-
class SpeechSymbolProcessor(object):
426+
class SpeechSymbolProcessor:
450427
"""
451428
Handles processing of symbol pronunciation for a locale.
452429
Pronunciation information is taken from one or more L{SpeechSymbols} instances.
453430
"""
454431

455432
#: Caches symbol data for locales.
456-
localeSymbols: LocaleDataMap[Tuple[SpeechSymbols, SpeechSymbols]] = LocaleDataMap(
457-
_getSpeechSymbolsForLocale,
458-
)
433+
localeSymbols: LocaleDataMap[list[SpeechSymbols]] = LocaleDataMap(_getSpeechSymbolsForLocale)
434+
sources: list[SpeechSymbols]
459435

460-
def __init__(self, locale):
436+
def __init__(self, locale: str):
461437
"""Constructor.
462438
@param locale: The locale for which symbol pronunciation should be processed.
463-
@type locale: str
464439
"""
465440
self.locale = locale
466441

467442
# We need to merge symbol data from several sources.
468443
sources = self.sources = []
469-
builtin, user = self.localeSymbols.fetchLocaleData(locale, fallback=False)
470-
self.builtinSources = [builtin]
471-
self.userSymbols = user
472-
sources.append(user)
473-
sources.append(builtin)
444+
fetched = self.localeSymbols.fetchLocaleData(locale, fallback=False)
445+
# A slice that reverses a list and ignores the last item (which is the user dictionary)
446+
builtinSlice = slice(-2, None, -1)
447+
self.builtinSources = fetched[builtinSlice]
448+
self.userSymbols = fetched[-1]
449+
sources.append(self.userSymbols)
450+
sources.extend(self.builtinSources)
474451

475452
# Always use English as a base.
476453
if locale != "en":
477454
# Only the builtin data.
478-
enBaseSymbols = self.localeSymbols.fetchLocaleData("en")[0]
479-
sources.append(enBaseSymbols)
480-
self.builtinSources.append(enBaseSymbols)
455+
enBuiltin = self.localeSymbols.fetchLocaleData("en")[builtinSlice]
456+
sources.extend(enBuiltin)
457+
self.builtinSources.extend(enBuiltin)
481458

482459
# The computed symbol information from all sources.
483460
symbols = self.computedSymbols = collections.OrderedDict()
@@ -725,7 +702,7 @@ def isBuiltin(self, symbolIdentifier: str) -> bool:
725702
"""Determine whether a symbol is built in.
726703
@param symbolIdentifier: The identifier of the symbol in question.
727704
@return: C{True} if the symbol is built in,
728-
C{False} if it was added by the user.
705+
C{False} if it was added by the user.
729706
"""
730707
return any(symbolIdentifier in source.symbols for source in self.builtinSources)
731708

@@ -779,9 +756,146 @@ def clearSpeechSymbols():
779756
def handlePostConfigProfileSwitch(prevConf=None):
780757
if not prevConf:
781758
return
782-
if prevConf["speech"]["includeCLDR"] is not config.conf["speech"]["includeCLDR"]:
783-
# Either included or excluded CLDR data, so clear the cache.
759+
if set(prevConf["speech"]["symbolDictionaries"]) != set(config.conf["speech"]["symbolDictionaries"]):
760+
# Either included or excluded dictionaries, so clear the cache.
784761
clearSpeechSymbols()
785762

786763

787-
config.post_configProfileSwitch.register(handlePostConfigProfileSwitch)
764+
@dataclasses.dataclass(frozen=True, kw_only=True)
765+
class SymbolDictionaryDefinition:
766+
name: str
767+
"""The name of the dictionary."""
768+
path: str
769+
"""The path to the dictionary.
770+
This should be a formattable string, where {locale} is replaced by the locale to fetch a dictionary for.
771+
"""
772+
displayName: str | None = None
773+
"""The translatable name of the dictionary.
774+
When not provided, the dictionary can not be visible to the end user.
775+
"""
776+
allowComplexSymbols: bool = False
777+
"""Whether this dictionary allows complex symbols."""
778+
mandatory: bool = False
779+
"""Whether this dictionary is mandatory.
780+
Mandatory dictionaries are always enabled."""
781+
user: bool = False
782+
"""Whether this is a user dictionary definition."""
783+
symbols: LocaleDataMap[SpeechSymbols] = dataclasses.field(init=False, repr=False, compare=False)
784+
785+
def __post_init__(self):
786+
if self.path.count("{locale}") != 1:
787+
raise ValueError(f"Invalid formatable path for dictionary: {self.path!r}")
788+
if not self.displayName and not self.mandatory:
789+
raise ValueError("A nonmandatory dictionary without a display name is unsupported")
790+
object.__setattr__(self, "symbols", LocaleDataMap(self._getSymbols))
791+
792+
@cached_property
793+
def userVisible(self) -> bool:
794+
"""Whether this dictionary is visible to end users (i.e. in the GUI).
795+
Mandatory dictionaries are hidden.
796+
"""
797+
return not self.mandatory and bool(self.displayName)
798+
799+
@property
800+
def enabled(self) -> bool:
801+
return self.mandatory or self.name in config.conf["speech"]["symbolDictionaries"]
802+
803+
def getSymbols(self, locale: str) -> SpeechSymbols:
804+
"""Gets the symbols for a given locale.
805+
:param locale: The locale to get symbols for.
806+
:raises FileNotFoundError: When this is not a user dictionary and the locale wasn't found.
807+
"""
808+
return self.symbols.fetchLocaleData(locale, fallback=False)
809+
810+
def _getSymbols(self, locale: str) -> SpeechSymbols:
811+
raiseOnError = not self.user
812+
symbols = SpeechSymbols()
813+
if locale not in self.availableLocales:
814+
msg = f"No {self.name!r} data for locale {locale!r}"
815+
if raiseOnError:
816+
raise FileNotFoundError(msg)
817+
log.debug(msg)
818+
else:
819+
try:
820+
symbols.load(self.path.format(locale=locale), self.allowComplexSymbols)
821+
except IOError:
822+
if raiseOnError:
823+
raise
824+
log.error(f"Error loading {self.name!r} data for locale {locale!r}", exc_info=True)
825+
return symbols
826+
827+
@cached_property
828+
def availableLocales(self) -> dict[str, str]:
829+
"""Gets dictionary paths for all available locales."""
830+
prefix, suffix = self.path.split("{locale}", 1)
831+
pattern = f"{prefix}*{suffix}"
832+
paths = glob.glob(pattern)
833+
dct = {}
834+
for p in paths:
835+
locale = p[len(prefix) : (-1 * len(suffix))]
836+
dct[locale] = p
837+
return dct
838+
839+
840+
_symbolDictionaryDefinitions: list[SymbolDictionaryDefinition] = []
841+
842+
843+
def getAvailableSymbolDictionaryDefinitions() -> Generator[SymbolDictionaryDefinition, None, None]:
844+
yield from _symbolDictionaryDefinitions
845+
846+
847+
def initialize():
848+
# Add symbol dictionary definitions
849+
_symbolDictionaryDefinitions.append(
850+
SymbolDictionaryDefinition(
851+
name="cldr",
852+
path=os.path.join(globalVars.appDir, "locale", "{locale}", "cldr.dic"),
853+
# Translators: The name of a symbols dictionary with data from the unicode CLDR.
854+
displayName=_("Unicode Consortium data (including emoji)"),
855+
),
856+
)
857+
_symbolDictionaryDefinitions.append(
858+
SymbolDictionaryDefinition(
859+
name="builtin",
860+
path=os.path.join(globalVars.appDir, "locale", "{locale}", "symbols.dic"),
861+
allowComplexSymbols=True,
862+
mandatory=True,
863+
),
864+
)
865+
import addonHandler
866+
867+
for addon in addonHandler.getRunningAddons():
868+
symbolsDict = addon.manifest.get("symbolDictionaries")
869+
if not symbolsDict:
870+
continue
871+
log.debug(f"Found {len(symbolsDict)} symbol dictionary entries in manifest for add-on {addon.name!r}")
872+
directory = os.path.join(addon.path, "locale", "{locale}")
873+
for name, dictConfig in symbolsDict.items():
874+
definition = SymbolDictionaryDefinition(
875+
name=name,
876+
path=os.path.join(directory, "symbols-{name}.dic"),
877+
displayName=dictConfig["displayName"],
878+
allowComplexSymbols=False,
879+
mandatory=dictConfig["mandatory"],
880+
)
881+
if not definition.availableLocales:
882+
log.error(f"No {name!r} symbol dictionary files found for add-on {addon.name!r}")
883+
continue
884+
_symbolDictionaryDefinitions.append(definition)
885+
_symbolDictionaryDefinitions.append(
886+
SymbolDictionaryDefinition(
887+
name="user",
888+
path=WritePaths.getSymbolsConfigFile("{locale}"),
889+
allowComplexSymbols=False,
890+
mandatory=True,
891+
user=True,
892+
),
893+
)
894+
895+
config.post_configProfileSwitch.register(handlePostConfigProfileSwitch)
896+
897+
898+
def terminate():
899+
config.post_configProfileSwitch.unregister(handlePostConfigProfileSwitch)
900+
clearSpeechSymbols()
901+
_symbolDictionaryDefinitions.clear()

source/config/configSpec.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#: provide an upgrade step (@see profileUpgradeSteps.py). An upgrade step does not need to be added when
1414
#: just adding a new element to (or removing from) the schema, only when old versions of the config
1515
#: (conforming to old schema versions) will not work correctly with the new schema.
16-
latestSchemaVersion = 11
16+
latestSchemaVersion = 12
1717

1818
#: The configuration specification string
1919
#: @type: String
@@ -37,7 +37,7 @@
3737
trustVoiceLanguage = boolean(default=true)
3838
unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled")
3939
reportNormalizedForCharacterNavigation = boolean(default=true)
40-
includeCLDR = boolean(default=True)
40+
symbolDictionaries = string_list(default=list("cldr"))
4141
beepSpeechModePitch = integer(default=10000,min=50,max=11025)
4242
outputDevice = string(default=default)
4343
autoLanguageSwitching = boolean(default=true)

0 commit comments

Comments
 (0)