Skip to content

Commit ddc8590

Browse files
authored
Merge 56cec12 into 1ef4dfb
2 parents 1ef4dfb + 56cec12 commit ddc8590

3 files changed

Lines changed: 166 additions & 2 deletions

File tree

devDocs/developerGuide.t2t

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ Subsequent lines contain a textual identifier used to identify the symbol, a tab
7272
For example:
7373
```
7474
. sentence ending (?<=[^\s.])\.(?=[\"')\s]|$)
75+
dates . \b(\d\d)\.(\d\d)\.(\d{2}|\d{4})\b
7576
```
7677

7778
Again, the English symbols are inherited by all other locales, so you need not include any complex symbols already defined for English.
@@ -98,6 +99,8 @@ Certain characters cannot be typed into the file, so the following special seque
9899
- \f: form feed
99100
- \#: # character (needed because # at the start of a line denotes a comment)
100101
- replacement: The text which should be spoken for the symbol.
102+
If the symbol is a complex symbol, \1, \2, etc. can be used to refer to the groups matches, which will be inlined in the replacement, allowing for simpler rules.
103+
This also means that to get a \ character in the replacement, one has to type \\.
101104
- level: The symbol level at which the symbol should be spoken.
102105
The symbol level is configured by the user and specifies the amount of symbols that should be spoken.
103106
This field should contain one of the levels "none", "some", "most", "all" or "char", or "-" to use the default.
@@ -133,6 +136,13 @@ It means that the ". sentence ending" complex symbol should be spoken as "point"
133136
Level and preserve are not specified, so they will be taken from English.
134137
A display name is provided so that French users will know what the symbol represents.
135138

139+
```
140+
dates . \1 point \2 point \3 all norep # point de date
141+
```
142+
This line appears in the French symbols.dic file.
143+
It means that the first, second, and third groups of the match will be included, separated by spaces.
144+
The effect is thus to remove the dots from the date.
145+
136146
Please see the file locale\en\symbols.dic for the English definitions which are inherited for all locales.
137147
This is also a good full example.
138148

source/characterProcessing.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,35 @@ def __init__(self, locale):
519519
log.error("Invalid complex symbol regular expression in locale %s: %s" % (locale, e))
520520
raise LookupError
521521

522+
def _replaceGroups(self, m: re.Match, string: str) -> str:
523+
"""Replace matching group references (\\1, \\2, ...) with the corresponding matched groups.
524+
Also replace \\\\ with \\ and reject other escapes, for escaping coherency.
525+
@param m: The currently-matched group
526+
@param string: The match replacement string which may contain group references
527+
"""
528+
result = ''
529+
530+
in_escape = False
531+
for char in string:
532+
if not in_escape:
533+
if char == '\\':
534+
in_escape = True
535+
else:
536+
result += char
537+
else:
538+
if char == '\\':
539+
result += '\\'
540+
elif char >= '0' and char <= '9':
541+
result += m.group(m.lastindex + ord(char) - ord('0'))
542+
else:
543+
log.error("Invalid reference \\%string" % char)
544+
raise LookupError
545+
in_escape = False
546+
if in_escape:
547+
log.error("Unterminated backslash")
548+
raise LookupError
549+
return result
550+
522551
def _regexpRepl(self, m):
523552
group = m.lastgroup
524553

@@ -540,16 +569,19 @@ def _regexpRepl(self, m):
540569
if group == "simple":
541570
# Simple symbol.
542571
symbol = self.computedSymbols[text]
572+
replacement = symbol.replacement
543573
else:
544574
# Complex symbol.
545575
index = int(group[1:])
546576
symbol = self._computedComplexSymbolsList[index]
577+
replacement = self._replaceGroups(m, symbol.replacement)
578+
547579
if symbol.preserve == SYMPRES_ALWAYS or (symbol.preserve == SYMPRES_NOREP and self._level < symbol.level):
548580
suffix = text
549581
else:
550582
suffix = " "
551-
if self._level >= symbol.level and symbol.replacement:
552-
return u" {repl}{suffix}".format(repl=symbol.replacement, suffix=suffix)
583+
if self._level >= symbol.level and replacement:
584+
return u" {repl}{suffix}".format(repl=replacement, suffix=suffix)
553585
else:
554586
return suffix
555587

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# A part of NonVisual Desktop Access (NVDA)
2+
# This file is covered by the GNU General Public License.
3+
# See the file COPYING for more details.
4+
# Copyright (C) 2020 NV Access Limited
5+
6+
"""Unit tests for the characterProcessing module.
7+
"""
8+
9+
import unittest
10+
import re
11+
from characterProcessing import SYMLVL_ALL
12+
from characterProcessing import SpeechSymbolProcessor
13+
from characterProcessing import processSpeechSymbols as process
14+
15+
16+
class TestComplex(unittest.TestCase):
17+
"""Test the complex symbols rules.
18+
"""
19+
20+
def _replace_cb(self, replacement, name=None):
21+
"""Return a regexp callback which replaces matches of the given
22+
group name (or all groups if no name is given) with the
23+
replacement string, with support for replacement of group
24+
references.
25+
"""
26+
def replace(m):
27+
if name is None or m.lastgroup == name:
28+
return SpeechSymbolProcessor._replaceGroups(self, m, replacement)
29+
return m.group()
30+
return replace
31+
32+
def _replace(self, string, pattern, replacement, name=None):
33+
"""Perform a pattern-based replacement on a string, for the
34+
given named group (or all groups if no name is given), with
35+
support for replacement of group references.
36+
"""
37+
regexp = re.compile(pattern, re.UNICODE)
38+
return regexp.sub(self._replace_cb(replacement, name), string)
39+
40+
def test_group_replacement(self):
41+
"""Test that plain text gets properly replaced
42+
"""
43+
replaced = self._replace(
44+
string="1",
45+
pattern=r"(\d)",
46+
replacement="a"
47+
)
48+
self.assertEqual(replaced, "a")
49+
50+
def test_backslash_replacement(self):
51+
"""Test that backslashes get properly replaced
52+
"""
53+
replaced = self._replace(
54+
string="1",
55+
pattern=r"(\d)",
56+
replacement=r"\\"
57+
)
58+
self.assertEqual(replaced, "\\")
59+
60+
def test_double_backslash_replacement(self):
61+
"""Test that double backslashes get properly replaced
62+
"""
63+
replaced = self._replace(
64+
string="1",
65+
pattern=r"(\d)",
66+
replacement=r"\\\\"
67+
)
68+
self.assertEqual(replaced, r"\\")
69+
70+
def test_unknown_escape(self):
71+
"""Test that a non-supported escaped character (i.e. not \\1,
72+
\\2, ... \\9 and \\\\) in the replacement raises an error
73+
"""
74+
with self.assertRaises(LookupError):
75+
self._replace(
76+
string="1",
77+
pattern=r"(\d)",
78+
replacement=r"\a"
79+
)
80+
81+
def test_missing_group(self):
82+
"""Test that a reference in the replacement to an non-existing
83+
group raises an error
84+
"""
85+
with self.assertRaises(IndexError):
86+
self._replace(
87+
string="1",
88+
pattern=r"(\d)",
89+
replacement=r"\2"
90+
)
91+
92+
def test_unterminated_escape(self):
93+
"""Test that an escape at the end of replacement raises an
94+
error, since there is nothing to be escaped there
95+
"""
96+
with self.assertRaises(LookupError):
97+
self._replace(
98+
string="1",
99+
pattern=r"(\d)",
100+
replacement="\\"
101+
)
102+
103+
def test_group_replacements(self):
104+
"""Test that group references get properly replaced
105+
"""
106+
replaced = self._replace(
107+
string="bar.BAT",
108+
pattern=r"(([a-z]*)\.([A-Z]*))",
109+
replacement=r"\2>\1"
110+
)
111+
self.assertEqual(replaced, "BAT>bar")
112+
113+
def test_multiple_group_replacement(self):
114+
"""Test that group indexing is correct with multiple groups
115+
"""
116+
replaced = self._replace(
117+
string="bar.BAT",
118+
pattern=r"(baz)|(?P<foo>([a-z]*)\.([A-Z]*))",
119+
replacement=r"\2>\1",
120+
name="foo"
121+
)
122+
self.assertEqual(replaced, "BAT>bar")

0 commit comments

Comments
 (0)