Skip to content

Commit 432c167

Browse files
authored
Merge 68880c5 into 04ef2f1
2 parents 04ef2f1 + 68880c5 commit 432c167

6 files changed

Lines changed: 480 additions & 90 deletions

File tree

source/NVDAObjects/UIA/winConsoleUIA.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def _getCurrentOffsetInThisLine(self, lineInfo):
296296
charInfo = lineInfo.copy()
297297
charInfo.setEndPoint(self, "endToStart")
298298
text = charInfo._rangeObj.getText(-1)
299-
offset = textUtils.WideStringOffsetConverter(text).wideStringLength
299+
offset = textUtils.WideStringOffsetConverter(text).encodedStringLength
300300
return offset
301301

302302
def _getWordOffsetsInThisLine(self, offset, lineInfo):
@@ -310,7 +310,7 @@ def _getWordOffsetsInThisLine(self, offset, lineInfo):
310310
# not more than two alphanumeric chars in a row.
311311
# Inject two alphanumeric characters at the end to fix this.
312312
lineText += "xx"
313-
lineTextLen = textUtils.WideStringOffsetConverter(lineText).wideStringLength
313+
lineTextLen = textUtils.WideStringOffsetConverter(lineText).encodedStringLength
314314
NVDAHelper.localLib.calculateWordOffsets(
315315
lineText,
316316
lineTextLen,

source/compoundDocuments.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import (
77
Optional,
88
Dict,
9+
Self,
910
)
1011

1112
import textUtils
@@ -202,6 +203,22 @@ def __hash__(self):
202203

203204
def __ne__(self, other):
204205
return not self == other
206+
207+
def moveToPythonicOffset(
208+
self,
209+
pythonicOffset: int,
210+
) -> Self:
211+
if self._start == self._end:
212+
# This is an optimization: if nested TextInfo is an OffsetsTextInfo,
213+
# it will do the job faster.
214+
nested = self._start.moveToPythonicOffset(pythonicOffset)
215+
result = self.copy()
216+
result._start = result._end = nested
217+
return result
218+
else:
219+
return super().moveByPythonicOffset(pythonicOffset)
220+
221+
205222

206223
class TreeCompoundTextInfo(CompoundTextInfo):
207224
#: Units contained within a single TextInfo.

source/displayModel.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def processWindowChunksInLine(commandList,rects,startIndex,startOffset,endIndex,
7070
for index in range(startIndex,endIndex+1):
7171
item=commandList[index] if index<endIndex else None
7272
if isinstance(item,str):
73-
lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
73+
lastEndOffset += textUtils.WideStringOffsetConverter(item).encodedStringLength
7474
else:
7575
hwnd=item.field['hwnd'] if item else None
7676
if lastHwnd is not None and hwnd!=lastHwnd:
@@ -122,7 +122,7 @@ def processFieldsAndRectsRangeReadingdirection(commandList,rects,startIndex,star
122122
for index in range(startIndex,endIndex+1):
123123
item=commandList[index] if index<endIndex else None
124124
if isinstance(item,str):
125-
lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
125+
lastEndOffset += textUtils.WideStringOffsetConverter(item).encodedStringLength
126126
elif not item or (isinstance(item,textInfos.FieldCommand) and isinstance(item.field,textInfos.FormatField)):
127127
direction=item.field['direction'] if item else None
128128
if direction is None or (direction!=runDirection):
@@ -136,7 +136,7 @@ def processFieldsAndRectsRangeReadingdirection(commandList,rects,startIndex,star
136136
for i in range(runStartIndex,index,2):
137137
command=commandList[i]
138138
text=commandList[i+1]
139-
rectsEnd = rectsStart + textUtils.WideStringOffsetConverter(text).wideStringLength
139+
rectsEnd = rectsStart + textUtils.WideStringOffsetConverter(text).encodedStringLength
140140
commandList[i+1]=command
141141
shouldReverseText=command.field.get('shouldReverseText',True)
142142
commandList[i]=normalizeRtlString(text[::-1] if shouldReverseText else text)
@@ -278,7 +278,7 @@ def _getSelectionOffsets(self):
278278
if startOffset is None:
279279
startOffset=curOffset
280280
elif isinstance(item,str):
281-
curOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
281+
curOffset += textUtils.WideStringOffsetConverter(item).encodedStringLength
282282
if inHighlightChunk:
283283
endOffset=curOffset
284284
else:
@@ -337,7 +337,7 @@ def _get__storyFieldsAndRects(self) -> Tuple[
337337
for index in range(len(commandList)):
338338
item=commandList[index]
339339
if isinstance(item,str):
340-
lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
340+
lastEndOffset += textUtils.WideStringOffsetConverter(item).encodedStringLength
341341
displayChunkEndOffsets.append(lastEndOffset)
342342
elif isinstance(item,textInfos.FieldCommand):
343343
if isinstance(item.field,textInfos.FormatField):
@@ -376,7 +376,7 @@ def _getStoryOffsetLocations(self):
376376
baseline=item.field['baseline']
377377
direction=item.field['direction']
378378
elif isinstance(item,str):
379-
endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).wideStringLength
379+
endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).encodedStringLength
380380
for rect in rects[lastEndOffset:endOffset]:
381381
yield rect,baseline,direction
382382
lastEndOffset=endOffset
@@ -391,7 +391,7 @@ def _getFieldsInRange(self,start,end):
391391
for index in range(len(storyFields)):
392392
item=storyFields[index]
393393
if isinstance(item,str):
394-
endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).wideStringLength
394+
endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).encodedStringLength
395395
if lastEndOffset<=start<endOffset:
396396
startIndex=index-1
397397
relStart=start-lastEndOffset

source/textInfos/__init__.py

Lines changed: 182 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
Optional,
2222
Dict,
2323
Tuple,
24+
Self,
2425
)
2526

2627
import baseObject
@@ -400,13 +401,11 @@ def _get_boundingRects(self):
400401
"""
401402
raise NotImplementedError
402403

403-
def unitIndex(self,unit):
404+
def unitIndex(self,unit: str) -> int:
405+
"""
406+
@param unit: a unit constant for which you want to retrieve an index
407+
@returns: The 1-based index of this unit, out of all the units of this type in the object
404408
"""
405-
@param unit: a unit constant for which you want to retreave an index
406-
@type: string
407-
@returns: The 1-based index of this unit, out of all the units of this type in the object
408-
@rtype: int
409-
"""
410409
raise NotImplementedError
411410

412411
def unitCount(self,unit):
@@ -656,6 +655,183 @@ def getMathMl(self, field):
656655
@raise LookupError: If MathML can't be retrieved for this field.
657656
"""
658657
raise NotImplementedError
658+
659+
def moveToPythonicOffset(
660+
self,
661+
pythonicOffset: int,
662+
) -> Self:
663+
"""
664+
This function moves textInfos by Pythonic characters.
665+
666+
Illustration:
667+
Suppose we have TextInfo that represents a paragraph of text:
668+
```
669+
> s = paragraphInfo.text
670+
> s
671+
'Hello, world!\r'
672+
```
673+
Suppose that we would like to put the cursor at the first letter of the word 'world'.
674+
That means jumping to index 7:
675+
```
676+
> s[7:]
677+
'world!\r'
678+
```
679+
Here is how this can be done:
680+
```
681+
> info = paragraphInfo.moveToPythonicOffset(7)
682+
> info.setEndPoint(paragraphInfo, "endToEnd")
683+
> info.text
684+
'world!\r'
685+
```
686+
687+
Background:
688+
In many applications there is no one-to-one mapping of Pythonic characters and TextInfo characters,
689+
e.g. when calling TextInfo.move(UNIT_CHARACTER, n).
690+
There are a couple of reasons for this discrepancy:
691+
1. In Wide character encoding, some 4-byte unicode characters are represented as two surrogate characters,
692+
whereas in pythonic string they would be represented by a single character.
693+
2. In non-offset TextInfos (e.g. UIATextInfo)
694+
there is no guarantee on the fact that TextInfos.move(UNIT_CHARACTER, 1)would actually move by
695+
exactly 1 character.
696+
A good illustration of this is in Microsoft Word with UIA enabled always,
697+
the first character of a bullet list item would be represented by three pythonic characters:
698+
* Bullet character "•"
699+
* Tab character \t
700+
* And the first character of of list item per se.
701+
702+
In many use cases (e.g., sentence navigation, style navigation),
703+
we identify pythonic character that we would like to move our TextInfo to.
704+
TextInfos.move(UNIT_CHARACTER, n) would cause many side effects.
705+
This function provides a clean and reliable way to jump to a given pythonic offset.
706+
707+
Assumptions:
708+
1. This function operates on a non-collapsed TextInfo only. In a typical scenario, we might want
709+
to jump to a certain offset within a paragraph or a line. In this case this function
710+
should be called on TextInfo representing said paragraph or line.
711+
The reason for that is that for some implementations we might
712+
need to access text of paragraph/line in order to accurately compute result offset.
713+
2. It assumes that 1 character of application-specific TextInfo representation
714+
maps to 1 or more characters of pythonic representation.
715+
3. This function is also written with an assumption that a character
716+
in application-specific TextInfo representation might not map to any pythonic characters,
717+
although this scenario has never been observed in any applications.
718+
4. Also this function assumes that most characters have 1:1 mapping between pythonic
719+
and application-specific representations.
720+
This assumption is not required, however if this assumption is True, the function will converge fast.
721+
If this assumption is false, then it might take many iterations to find the right TextInfo.
722+
723+
Algorithm:
724+
This generic implementation essentially a biased binary search.
725+
On every iteration we operate on a pythonic string and its TextInfo counterpart stored in info variable.
726+
We would like to reach a certain offset within that pythonic string,
727+
that is stored in pythonicOffsetLeft variable.
728+
In every iteration of the loop:
729+
1. We try to either move from the left end of info by pythonicOffsetLeft characters
730+
or from the right end by -pythonicOffsetRight characters - depending which move is shorter.
731+
We store destination point as collapsed TextInfo tmpInfo.
732+
2. We compute number of pythonic characters from the beginning of info until tmpInfo
733+
and store it in actualPythonicOffset variable.
734+
3. We will compare actualPythonicOffset with pythonicOffsetLeft : if they are equal,
735+
then we just found desired TextInfo.
736+
Otherwise we use tmpInfo as the middle point of binary search and we recurse either to the left
737+
or to the right, depending where desired offset lies.
738+
739+
One extra part of the algorithm serves to prevent certain conditions:
740+
if we happen to move on the step 1 from the same point twice
741+
in two consecutive iterations of the loop, then on the second time we will move tmpInfo
742+
exactly to the opposite end of info,
743+
and the algorithm will fail on sanity check condition in the for loop.
744+
To avoid this situation we track last move and the direction of last divide
745+
in variables lastMove and lastRecursedLeft.
746+
If we detect that we are about to move from the same endpoint for the second time,
747+
we reduce the count of characters by 1 to make sure
748+
the algorithm makes some progress on each iteration.
749+
"""
750+
text = self.text
751+
if pythonicOffset < 0 or pythonicOffset > len(text):
752+
raise ValueError
753+
if pythonicOffset == 0 or pythonicOffset == len(text):
754+
result = self.copy()
755+
result.collapse(end=pythonicOffset > 0)
756+
return result
757+
758+
info = self.copy()
759+
# Total Pythonic Length represents length in python characters of Current TextInfo we're workoing with.
760+
# We start with self, and then gradually divide and conquer in order to find desired offset.
761+
totalPythonicOffset = len(text)
762+
763+
# pythonicOffsetLeft and pythonicOffsetRight represent distance in pythonic characters
764+
# from left and right ends of info correspondingly until desired location.
765+
pythonicOffsetLeft = pythonicOffset
766+
pythonicOffsetRight = totalPythonicOffset - pythonicOffsetLeft
767+
768+
# We store lastMove - by how many characters we moved last time, and
769+
# lastRecursedLeft - whether last recursion happened to the left and not to the right -
770+
# in order to avoid certain corner cases.
771+
lastMove: int | None = None
772+
lastRecursedLeft: bool | None = None
773+
774+
MAX_BINARY_SEARCH_ITERATIONS = 1000
775+
for __ in range(MAX_BINARY_SEARCH_ITERATIONS):
776+
tmpInfo = info.copy()
777+
if pythonicOffsetLeft <= pythonicOffsetRight:
778+
# Move from the left end of info. Let's compute by how many characters in moveCharacters variable.
779+
tmpInfo.collapse()
780+
if lastRecursedLeft is not None and lastRecursedLeft is True and lastMove > 0:
781+
# Here we check that last time we also attempted to move from the same left end.
782+
# And apparently we overshot last time. In order to avoid infinite loop
783+
# or overshooting again, reduce movement by 1.
784+
moveCharacters = lastMove - 1
785+
if moveCharacters == 0:
786+
raise RuntimeError("Unable to find desired offset in TextInfo.")
787+
else:
788+
moveCharacters = pythonicOffsetLeft
789+
790+
code = tmpInfo.move(UNIT_CHARACTER, moveCharacters, endPoint="end")
791+
lastMove = moveCharacters
792+
tmpText = tmpInfo.text
793+
actualPythonicOffset = len(tmpText)
794+
tmpInfo.collapse(end=True)
795+
else:
796+
# Move from the right end of info.
797+
tmpInfo.collapse(end=True)
798+
if lastRecursedLeft is not None and lastRecursedLeft is False and lastMove < 0:
799+
# lastMove was negative, so adding +1 to reduce its absolute value
800+
moveCharacters = lastMove + 1
801+
if moveCharacters == 0:
802+
raise RuntimeError("Unable to find desired offset in TextInfo.")
803+
else:
804+
moveCharacters = -pythonicOffsetRight
805+
code = tmpInfo.move(UNIT_CHARACTER, moveCharacters, endPoint="start")
806+
lastMove = moveCharacters
807+
tmpText = tmpInfo.text
808+
actualPythonicOffset = totalPythonicOffset - len(tmpText)
809+
tmpInfo.collapse()
810+
if code == 0:
811+
raise RuntimeError("Move by character operation unexpectedly failed.")
812+
if actualPythonicOffset <= 0 or actualPythonicOffset >= totalPythonicOffset:
813+
raise RuntimeError(f"InvalidState: {actualPythonicOffset=} {totalPythonicOffset=}")
814+
if actualPythonicOffset == pythonicOffsetLeft:
815+
return tmpInfo
816+
elif actualPythonicOffset < pythonicOffsetLeft:
817+
# Recursing right
818+
lastRecursedLeft = False
819+
text = text[actualPythonicOffset:]
820+
pythonicOffsetLeft -= actualPythonicOffset
821+
totalPythonicOffset = pythonicOffsetLeft + pythonicOffsetRight
822+
info.setEndPoint(tmpInfo, which="startToStart")
823+
else: # actualPythonicOffset > pythonicOffsetLeft
824+
# Recursing left
825+
lastRecursedLeft = True
826+
text = text[:actualPythonicOffset]
827+
totalPythonicOffset = actualPythonicOffset
828+
pythonicOffsetRight = totalPythonicOffset - pythonicOffsetLeft
829+
info.setEndPoint(tmpInfo, which="endToEnd")
830+
raise RuntimeError("Infinite loop during binary search.")
831+
832+
833+
834+
659835

660836
RE_EOL = re.compile("\r\n|[\n\r]")
661837
def convertToCrlf(text):

0 commit comments

Comments
 (0)