Skip to content

Commit c2ef5e7

Browse files
authored
Merge fd3f2b6 into 442476a
2 parents 442476a + fd3f2b6 commit c2ef5e7

7 files changed

Lines changed: 194 additions & 93 deletions

File tree

source/config/configSpec.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
from io import StringIO
1010
from configobj import ConfigObj
1111

12-
#: The version of the schema outlined in this file. Increment this when modifying the schema and
12+
#: The version of the schema outlined in this file. Increment this when modifying the schema and
1313
#: provide an upgrade step (@see profileUpgradeSteps.py). An upgrade step does not need to be added when
14-
#: just adding a new element to (or removing from) the schema, only when old versions of the config
14+
#: just adding a new element to (or removing from) the schema, only when old versions of the config
1515
#: (conforming to old schema versions) will not work correctly with the new schema.
1616
latestSchemaVersion = 10
1717

@@ -297,6 +297,8 @@
297297
298298
[uwpOcr]
299299
language = string(default="")
300+
autoRefresh = boolean(default=false)
301+
autoRefreshInterval = integer(default=1500, min=100)
300302
301303
[upgrade]
302304
newLaptopKeyboardLayout = boolean(default=false)

source/contentRecog/__init__.py

Lines changed: 49 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
#contentRecog/__init__.py
2-
#A part of NonVisual Desktop Access (NVDA)
3-
#Copyright (C) 2017 NV Access Limited
4-
#This file is covered by the GNU General Public License.
5-
#See the file COPYING for more details.
1+
# A part of NonVisual Desktop Access (NVDA)
2+
# Copyright (C) 2017-2023 NV Access Limited, James Teh, Leonard de Ruijter
3+
# This file is covered by the GNU General Public License.
4+
# See the file COPYING for more details.
65

76
"""Framework for recognition of content; OCR, image recognition, etc.
87
When authors don't provide sufficient information for a screen reader user to determine the content of something,
@@ -14,11 +13,16 @@
1413
"""
1514

1615
from collections import namedtuple
16+
import ctypes
17+
from typing import Callable, Dict, List, Union
1718
import garbageHandler
1819
import cursorManager
1920
import textInfos.offsets
2021
from abc import ABCMeta, abstractmethod
2122
from locationHelper import RectLTWH
23+
from NVDAObjects import NVDAObject
24+
25+
onRecognizeResultCallbackT = Callable[[Union["RecognitionResult", Exception]], None]
2226

2327

2428
class BaseContentRecogTextInfo(cursorManager._ReviewCursorManagerTextInfo):
@@ -31,20 +35,25 @@ class ContentRecognizer(garbageHandler.TrackedObject, metaclass=ABCMeta):
3135
"""Implementation of a content recognizer.
3236
"""
3337

34-
def getResizeFactor(self, width, height):
38+
allowAutoRefresh: bool = False
39+
"""
40+
Whether to allow automatic, periodic refresh when using this recognizer.
41+
This allows the user to see live changes as they occur. However, if a
42+
recognizer uses an internet service or is very resource intensive, this
43+
may be undesirable.
44+
"""
45+
46+
def getResizeFactor(self, width: int, height: int) -> Union[int, float]:
3547
"""Return the factor by which an image must be resized
3648
before it is passed to this recognizer.
3749
@param width: The width of the image in pixels.
38-
@type width: int
3950
@param height: The height of the image in pixels.
40-
@type height: int
4151
@return: The resize factor, C{1} for no resizing.
42-
@rtype: int or float
4352
"""
4453
return 1
4554

4655
@abstractmethod
47-
def recognize(self, pixels, imageInfo, onResult):
56+
def recognize(self, pixels: ctypes.Array, imageInfo: "RecogImageInfo", onResult: onRecognizeResultCallbackT):
4857
"""Asynchronously recognize content from an image.
4958
This method should not block.
5059
Only one recognition can be performed at a time.
@@ -56,9 +65,8 @@ def recognize(self, pixels, imageInfo, onResult):
5665
However, the alpha channel should be ignored.
5766
@type pixels: Two dimensional array (y then x) of L{winGDI.RGBQUAD}
5867
@param imageInfo: Information about the image for recognition.
59-
@type imageInfo: L{RecogImageInfo}
60-
@param onResult: A callable which takes a L{RecognitionResult} (or an exception on failure) as its only argument.
61-
@type onResult: callable
68+
@param onResult: A callable which takes a L{RecognitionResult} (or an exception on failure)
69+
as its only argument.
6270
"""
6371
raise NotImplementedError
6472

@@ -73,17 +81,16 @@ def validateCaptureBounds(self, location: RectLTWH) -> bool:
7381
"""
7482
return True
7583

76-
def validateObject(self, nav):
84+
def validateObject(self, nav: NVDAObject) -> bool:
7785
"""Validation to be performed on the navigator object before content recognition
7886
@param nav: The navigator object to be validated
79-
@type nav: L{NVDAObjects.NVDAObject}
8087
@return: C{True} or C{False}, depending on whether the navigator object is valid or not.
8188
C{True} for no validation.
82-
@rtype: bool
8389
"""
8490
return True
8591

86-
class RecogImageInfo(object):
92+
93+
class RecogImageInfo:
8794
"""Encapsulates information about a recognized image and
8895
provides functionality to convert coordinates.
8996
An image captured for recognition can begin at any point on the screen.
@@ -97,18 +104,20 @@ class RecogImageInfo(object):
97104
This is done using the L{convertXToScreen} and L{convertYToScreen} methods.
98105
"""
99106

100-
def __init__(self, screenLeft, screenTop, screenWidth, screenHeight, resizeFactor):
107+
def __init__(
108+
self,
109+
screenLeft: int,
110+
screenTop: int,
111+
screenWidth: int,
112+
screenHeight: int,
113+
resizeFactor: Union[int, float]
114+
):
101115
"""
102116
@param screenLeft: The x screen coordinate of the upper-left corner of the image.
103-
@type screenLeft: int
104117
@param screenTop: The y screen coordinate of the upper-left corner of the image.
105-
@type screenTop: int
106118
@param screenWidth: The width of the image on the screen.
107-
@type screenWidth: int
108119
@param screenHeight: The height of the image on the screen.
109-
@type screenHeight: int
110120
@param resizeFactor: The factor by which the image must be resized for recognition.
111-
@type resizeFactor: int or float
112121
@raise ValueError: If the supplied screen coordinates indicate that
113122
the image is not visible; e.g. width or height of 0.
114123
"""
@@ -125,7 +134,14 @@ def __init__(self, screenLeft, screenTop, screenWidth, screenHeight, resizeFacto
125134
self.recogHeight = int(screenHeight * resizeFactor)
126135

127136
@classmethod
128-
def createFromRecognizer(cls, screenLeft, screenTop, screenWidth, screenHeight, recognizer):
137+
def createFromRecognizer(
138+
cls,
139+
screenLeft: int,
140+
screenTop: int,
141+
screenWidth: int,
142+
screenHeight: int,
143+
recognizer: ContentRecognizer
144+
):
129145
"""Convenience method to construct an instance using a L{ContentRecognizer}.
130146
The resize factor is obtained by calling L{ContentRecognizer.getResizeFactor}.
131147
"""
@@ -172,18 +188,20 @@ def makeTextInfo(self, obj, position) -> BaseContentRecogTextInfo:
172188
"""
173189
raise NotImplementedError
174190

191+
175192
# Used internally by LinesWordsResult.
176193
# (Lwr is short for LinesWordsResult.)
177194
LwrWord = namedtuple("LwrWord", ("offset", "left", "top", "width", "height"))
178195

196+
179197
class LinesWordsResult(RecognitionResult):
180198
"""A L{RecognizerResult} which can create TextInfos based on a simple lines/words data structure.
181199
The data structure is a list of lines, wherein each line is a list of words,
182200
wherein each word is a dict containing the keys x, y, width, height and text.
183201
Several OCR engines produce output in a format which can be easily converted to this.
184202
"""
185203

186-
def __init__(self, data, imageInfo):
204+
def __init__(self, data: List[List[Dict[str, Union[str, int]]]], imageInfo: RecogImageInfo):
187205
"""Constructor.
188206
@param data: The lines/words data structure. For example:
189207
[
@@ -196,11 +214,9 @@ def __init__(self, data, imageInfo):
196214
{"x": 117, "y": 105, "width": 11, "height": 9, "text": "Word4"}
197215
]
198216
]
199-
@type data: list of lists of dicts
200217
@param imageInfo: Information about the recognized image.
201218
This is used to convert coordinates in the recognized image
202219
to screen coordinates.
203-
@type imageInfo: L{RecogImageInfo}
204220
"""
205221
self.data = data
206222
self.imageInfo = imageInfo
@@ -223,11 +239,13 @@ def _parseData(self):
223239
# Separate with a space.
224240
self._textList.append(" ")
225241
self.textLen += 1
226-
self.words.append(LwrWord(self.textLen,
242+
self.words.append(LwrWord(
243+
self.textLen,
227244
self.imageInfo.convertXToScreen(word["x"]),
228245
self.imageInfo.convertYToScreen(word["y"]),
229246
self.imageInfo.convertWidthToScreen(word["width"]),
230-
self.imageInfo.convertHeightToScreen(word["height"])))
247+
self.imageInfo.convertHeightToScreen(word["height"]))
248+
)
231249
text = word["text"]
232250
self._textList.append(text)
233251
self.textLen += len(text)
@@ -249,7 +267,7 @@ class LwrTextInfo(BaseContentRecogTextInfo, textInfos.offsets.OffsetsTextInfo):
249267

250268
def __init__(self, obj, position, result):
251269
self.result = result
252-
super(LwrTextInfo, self).__init__(obj, position)
270+
super().__init__(obj, position)
253271

254272
def copy(self):
255273
return self.__class__(self.obj, self.bookmark, self.result)
@@ -315,7 +333,7 @@ class SimpleResultTextInfo(BaseContentRecogTextInfo, textInfos.offsets.OffsetsTe
315333

316334
def __init__(self, obj, position, result):
317335
self.result = result
318-
super(SimpleResultTextInfo, self).__init__(obj, position)
336+
super().__init__(obj, position)
319337

320338
def copy(self):
321339
return self.__class__(self.obj, self.bookmark, self.result)
@@ -325,6 +343,3 @@ def _getStoryText(self):
325343

326344
def _getStoryLength(self):
327345
return len(self.result.text)
328-
329-
def _getStoryText(self):
330-
return self.result.text

0 commit comments

Comments
 (0)