Skip to content

Commit 18fc288

Browse files
authored
Merge d89dc3f into 91f4f14
2 parents 91f4f14 + d89dc3f commit 18fc288

6 files changed

Lines changed: 478 additions & 89 deletions

File tree

source/NVDAObjects/UIA/winConsoleUIA.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def _getCurrentOffsetInThisLine(self, lineInfo):
296296
charInfo = lineInfo.copy()
297297
charInfo.setEndPoint(self, "endToStart")
298298
text = charInfo._rangeObj.getText(-1)
299-
offset = textUtils.WideStringOffsetConverter(text).wideStringLength
299+
offset = textUtils.WideStringOffsetConverter(text).encodedStringLength
300300
return offset
301301

302302
def _getWordOffsetsInThisLine(self, offset, lineInfo):
@@ -310,7 +310,7 @@ def _getWordOffsetsInThisLine(self, offset, lineInfo):
310310
# not more than two alphanumeric chars in a row.
311311
# Inject two alphanumeric characters at the end to fix this.
312312
lineText += "xx"
313-
lineTextLen = textUtils.WideStringOffsetConverter(lineText).wideStringLength
313+
lineTextLen = textUtils.WideStringOffsetConverter(lineText).encodedStringLength
314314
NVDAHelper.localLib.calculateWordOffsets(
315315
lineText,
316316
lineTextLen,

source/compoundDocuments.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import (
77
Optional,
88
Dict,
9+
Self,
910
)
1011

1112
import textUtils
@@ -202,6 +203,22 @@ def __hash__(self):
202203

203204
def __ne__(self, other):
204205
return not self == other
206+
207+
def moveToPythonicOffset(
208+
self,
209+
pythonicOffset: int,
210+
) -> Self:
211+
if self._start == self._end:
212+
# This is an optimization: if nested TextInfo is an OffsetsTextInfo,
213+
# it will do the job faster.
214+
nested = self._start.moveToPythonicOffset(pythonicOffset)
215+
result = self.copy()
216+
result._start = result._end = nested
217+
return result
218+
else:
219+
return super().moveByPythonicOffset(pythonicOffset)
220+
221+
205222

206223
class TreeCompoundTextInfo(CompoundTextInfo):
207224
#: Units contained within a single TextInfo.

source/displayModel.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def processWindowChunksInLine(commandList,rects,startIndex,startOffset,endIndex,
7070
for index in range(startIndex,endIndex+1):
7171
item=commandList[index] if index<endIndex else None
7272
if isinstance(item,str):
73-
lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
73+
lastEndOffset += textUtils.WideStringOffsetConverter(item).encodedStringLength
7474
else:
7575
hwnd=item.field['hwnd'] if item else None
7676
if lastHwnd is not None and hwnd!=lastHwnd:
@@ -122,7 +122,7 @@ def processFieldsAndRectsRangeReadingdirection(commandList,rects,startIndex,star
122122
for index in range(startIndex,endIndex+1):
123123
item=commandList[index] if index<endIndex else None
124124
if isinstance(item,str):
125-
lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
125+
lastEndOffset += textUtils.WideStringOffsetConverter(item).encodedStringLength
126126
elif not item or (isinstance(item,textInfos.FieldCommand) and isinstance(item.field,textInfos.FormatField)):
127127
direction=item.field['direction'] if item else None
128128
if direction is None or (direction!=runDirection):
@@ -136,7 +136,7 @@ def processFieldsAndRectsRangeReadingdirection(commandList,rects,startIndex,star
136136
for i in range(runStartIndex,index,2):
137137
command=commandList[i]
138138
text=commandList[i+1]
139-
rectsEnd = rectsStart + textUtils.WideStringOffsetConverter(text).wideStringLength
139+
rectsEnd = rectsStart + textUtils.WideStringOffsetConverter(text).encodedStringLength
140140
commandList[i+1]=command
141141
shouldReverseText=command.field.get('shouldReverseText',True)
142142
commandList[i]=normalizeRtlString(text[::-1] if shouldReverseText else text)
@@ -278,7 +278,7 @@ def _getSelectionOffsets(self):
278278
if startOffset is None:
279279
startOffset=curOffset
280280
elif isinstance(item,str):
281-
curOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
281+
curOffset += textUtils.WideStringOffsetConverter(item).encodedStringLength
282282
if inHighlightChunk:
283283
endOffset=curOffset
284284
else:
@@ -337,7 +337,7 @@ def _get__storyFieldsAndRects(self) -> Tuple[
337337
for index in range(len(commandList)):
338338
item=commandList[index]
339339
if isinstance(item,str):
340-
lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
340+
lastEndOffset += textUtils.WideStringOffsetConverter(item).encodedStringLength
341341
displayChunkEndOffsets.append(lastEndOffset)
342342
elif isinstance(item,textInfos.FieldCommand):
343343
if isinstance(item.field,textInfos.FormatField):
@@ -376,7 +376,7 @@ def _getStoryOffsetLocations(self):
376376
baseline=item.field['baseline']
377377
direction=item.field['direction']
378378
elif isinstance(item,str):
379-
endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).wideStringLength
379+
endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).encodedStringLength
380380
for rect in rects[lastEndOffset:endOffset]:
381381
yield rect,baseline,direction
382382
lastEndOffset=endOffset
@@ -391,7 +391,7 @@ def _getFieldsInRange(self,start,end):
391391
for index in range(len(storyFields)):
392392
item=storyFields[index]
393393
if isinstance(item,str):
394-
endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).wideStringLength
394+
endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).encodedStringLength
395395
if lastEndOffset<=start<endOffset:
396396
startIndex=index-1
397397
relStart=start-lastEndOffset

source/textInfos/__init__.py

Lines changed: 183 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
Optional,
2222
Dict,
2323
Tuple,
24+
Self,
2425
)
2526

2627
import baseObject
@@ -402,11 +403,11 @@ def _get_boundingRects(self):
402403

403404
def unitIndex(self,unit):
404405
"""
405-
@param unit: a unit constant for which you want to retreave an index
406-
@type: string
407-
@returns: The 1-based index of this unit, out of all the units of this type in the object
408-
@rtype: int
409-
"""
406+
@param unit: a unit constant for which you want to retreave an index
407+
@type: string
408+
@returns: The 1-based index of this unit, out of all the units of this type in the object
409+
@rtype: int
410+
"""
410411
raise NotImplementedError
411412

412413
def unitCount(self,unit):
@@ -656,6 +657,183 @@ def getMathMl(self, field):
656657
@raise LookupError: If MathML can't be retrieved for this field.
657658
"""
658659
raise NotImplementedError
660+
661+
def moveToPythonicOffset(
662+
self,
663+
pythonicOffset: int,
664+
) -> Self:
665+
"""
666+
This function moves textInfos by Pythonic characters.
667+
668+
Illustration:
669+
Suppose we have TextInfo that represents a paragraph of text:
670+
```
671+
> s = paragraphInfo.text
672+
> s
673+
'Hello, world!\r'
674+
```
675+
Suppose that we would like to put the cursor at the first letter of the word 'world'.
676+
That means jumping to index 7:
677+
```
678+
> s[7:]
679+
'world!\r'
680+
```
681+
Here is how this can be done:
682+
```
683+
> info = paragraphInfo.moveToPythonicOffset(7)
684+
> info.setEndPoint(paragraphInfo, "endToEnd")
685+
> info.text
686+
'world!\r'
687+
```
688+
689+
Background:
690+
In many applications there is no one-to-one mapping of Pythonic characters and TextInfo characters,
691+
e.g. when calling TextInfo.move(UNIT_CHARACTER, n).
692+
There are a couple of reasons for this discrepancy:
693+
1. In Wide character encoding, some 4-byte unicode characters are represented as two surrogate characters,
694+
whereas in pythonic string they would be represented by a single character.
695+
2. In non-offset TextInfos (e.g. UIATextInfo)
696+
there is no guarantee on the fact that TextInfos.move(UNIT_CHARACTER, 1)would actually move by
697+
exactly 1 character.
698+
A good illustration of this is in Microsoft Word with UIA enabled always,
699+
the first character of a bullet list item would be represented by three pythonic characters:
700+
* Bullet character "•"
701+
* Tab character \t
702+
* And the first character of of list item per se.
703+
704+
In many use cases (e.g., sentence navigation, style navigation),
705+
we identify pythonic character that we would like to move our TextInfo to.
706+
TextInfos.move(UNIT_CHARACTER, n) would cause many side effects.
707+
This function provides a clean and reliable way to jump to a given pythonic offset.
708+
709+
Assumptions:
710+
1. This function operates on a non-collapsed TextInfo only. IN a typical scenario, we might want
711+
to jump to a certain offset within a paragraph or a line. In this case this function
712+
should be called on TextInfo representing said paragraph or line.
713+
The reason for that is that for some implementations we might
714+
need to access text of paragraph/line in order to accurately compute result offset.
715+
2. It assumes that 1 character of application-specific TextInfo representation
716+
maps to 1 or more characters of pythonic representation.
717+
3. This function is also written with an assumption that a character
718+
in application-specific TextInfo representation might not map to any pythonic characters,
719+
although this scenario has never been observed in any applications.
720+
4. Also this function assumes that most characters have 1:1 mapping between pythonic
721+
and application-specific representations.
722+
This assumption is not required, however if this assumption is True, the function will converge fast.
723+
If theis assumption is false, then it might take many iterations to find the right TextInfo.
724+
725+
Algorithm:
726+
This generic implementation essentially a biased binary search.
727+
On every iteration we operate on a pythonic string and its TextInfo counterpart stored in info variable.
728+
We would like to reach a certain offset within that pythonic string,
729+
that is stored in pythonicOffsetLeft variable.
730+
In every iteration of the loop:
731+
1. We try to either move from the left end of info by pythonicOffsetLeft characters
732+
or from the right end by -pythonicOffsetRight characters - depending which move is shorter.
733+
We store destination point as collapsed TextInfo tmpInfo.
734+
2. We compute number of pythonic characters from the beginning of info until tmpInfo
735+
and store it in actualPythonicOffset variable.
736+
3. We will compare actualPythonicOffset with pythonicOffsetLeft : if they are equal,
737+
then we just found desired TextInfo.
738+
Otherwise we use tmpInfo as the middle point of binary search and we recurse either to the left
739+
or to the right, depending where desired offset lies.
740+
741+
One extra part of the algorithm serves to prevent certain conditions:
742+
if we happen to move on the step 1 from the same point twice
743+
in two consecutive iterations of the loop, then on the second time we will move tmpInfo
744+
exactly to the opposite end of info,
745+
and the algorithm will fail on sanity check condition in the for loop.
746+
To avoid this situation we track last move and the direction of last divide
747+
in variables lastMove and lastRecursedLeft.
748+
If we detect that we are about to move from the same endpoint for the second time,
749+
we reduce the count of characters by 1 to make sure
750+
the algorithm makes some progress on each iteration.
751+
"""
752+
text = self.text
753+
if pythonicOffset < 0 or pythonicOffset > len(text):
754+
raise ValueError
755+
if pythonicOffset == 0 or pythonicOffset == len(text):
756+
result = self.copy()
757+
result.collapse(end=pythonicOffset > 0)
758+
return result
759+
760+
info = self.copy()
761+
# Total Pythonic Length represents length in python characters of Current TextInfo we're workoing with.
762+
# We start with self, and then gradually divide and conquer in order to find desired offset.
763+
totalPythonicOffset = len(text)
764+
765+
# pythonicOffsetLeft and pythonicOffsetRight represent distance in pythonic characters
766+
# from left and right ends of info correspondingly until desired location.
767+
pythonicOffsetLeft = pythonicOffset
768+
pythonicOffsetRight = totalPythonicOffset - pythonicOffsetLeft
769+
770+
# We store lastMove - by how many characters we moved last time, and
771+
# lastRecursedLeft - whether last recursion happened to the left and not to the right -
772+
# in order to avoid certain corner cases.
773+
lastMove: int | None = None
774+
lastRecursedLeft: bool | None = None
775+
776+
MAX_BINARY_SEARCH_ITERATIONS = 1000
777+
for __ in range(MAX_BINARY_SEARCH_ITERATIONS):
778+
tmpInfo = info.copy()
779+
if pythonicOffsetLeft <= pythonicOffsetRight:
780+
# Move from the left end of info. Let's compute by how many characters in moveCharacters variable.
781+
tmpInfo.collapse()
782+
if lastRecursedLeft is not None and lastRecursedLeft is True and lastMove > 0:
783+
# Here we check that last time we also attempted to move from the same left end.
784+
# And apparently we overshot last time. In order to avoid infinite loop
785+
# or overshooting again, reduce movement by 1.
786+
moveCharacters = lastMove - 1
787+
if moveCharacters == 0:
788+
raise RuntimeError("Unable to find desired offset in TextInfo.")
789+
else:
790+
moveCharacters = pythonicOffsetLeft
791+
792+
code = tmpInfo.move(UNIT_CHARACTER, moveCharacters, endPoint="end")
793+
lastMove = moveCharacters
794+
tmpText = tmpInfo.text
795+
actualPythonicOffset = len(tmpText)
796+
tmpInfo.collapse(end=True)
797+
else:
798+
# Move from the right end of info.
799+
tmpInfo.collapse(end=True)
800+
if lastRecursedLeft is not None and lastRecursedLeft is False and lastMove < 0:
801+
# lastMove was negative, so adding +1 to reduce its absolute value
802+
moveCharacters = lastMove + 1
803+
if moveCharacters == 0:
804+
raise RuntimeError("Unable to find desired offset in TextInfo.")
805+
else:
806+
moveCharacters = -pythonicOffsetRight
807+
code = tmpInfo.move(UNIT_CHARACTER, moveCharacters, endPoint="start")
808+
lastMove = moveCharacters
809+
tmpText = tmpInfo.text
810+
actualPythonicOffset = totalPythonicOffset - len(tmpText)
811+
tmpInfo.collapse()
812+
if code == 0:
813+
raise RuntimeError("Move by character operation unexpectedly failed.")
814+
if actualPythonicOffset <= 0 or actualPythonicOffset >= totalPythonicOffset:
815+
raise RuntimeError(f"InvalidState: {actualPythonicOffset=} {totalPythonicOffset=}")
816+
if actualPythonicOffset == pythonicOffsetLeft:
817+
return tmpInfo
818+
elif actualPythonicOffset < pythonicOffsetLeft:
819+
# Recursing right
820+
lastRecursedLeft = False
821+
text = text[actualPythonicOffset:]
822+
pythonicOffsetLeft -= actualPythonicOffset
823+
totalPythonicOffset = pythonicOffsetLeft + pythonicOffsetRight
824+
info.setEndPoint(tmpInfo, which="startToStart")
825+
else: # actualPythonicOffset > pythonicOffsetLeft
826+
# Recursing left
827+
lastRecursedLeft = True
828+
text = text[:actualPythonicOffset]
829+
totalPythonicOffset = actualPythonicOffset
830+
pythonicOffsetRight = totalPythonicOffset - pythonicOffsetLeft
831+
info.setEndPoint(tmpInfo, which="endToEnd")
832+
raise RuntimeError("Infinite loop during binary search.")
833+
834+
835+
836+
659837

660838
RE_EOL = re.compile("\r\n|[\n\r]")
661839
def convertToCrlf(text):

0 commit comments

Comments
 (0)