1- #contentRecog/__init__.py
2- #A part of NonVisual Desktop Access (NVDA)
3- #Copyright (C) 2017 NV Access Limited
4- #This file is covered by the GNU General Public License.
5- #See the file COPYING for more details.
1+ # A part of NonVisual Desktop Access (NVDA)
2+ # Copyright (C) 2017-2023 NV Access Limited, James Teh, Leonard de Ruijter
3+ # This file is covered by the GNU General Public License.
4+ # See the file COPYING for more details.
65
76"""Framework for recognition of content; OCR, image recognition, etc.
87When authors don't provide sufficient information for a screen reader user to determine the content of something,
1413"""
1514
1615from collections import namedtuple
16+ import ctypes
17+ from typing import Callable , Dict , List , Union
1718import garbageHandler
19+ from baseObject import AutoPropertyObject
1820import cursorManager
1921import textInfos .offsets
2022from abc import ABCMeta , abstractmethod
2123from locationHelper import RectLTWH
24+ from NVDAObjects import NVDAObject
25+
26+ onRecognizeResultCallbackT = Callable [[Union ["RecognitionResult" , Exception ]], None ]
2227
2328
2429class BaseContentRecogTextInfo (cursorManager ._ReviewCursorManagerTextInfo ):
@@ -27,24 +32,31 @@ class BaseContentRecogTextInfo(cursorManager._ReviewCursorManagerTextInfo):
2732 """
2833
2934
30- class ContentRecognizer (garbageHandler . TrackedObject , metaclass = ABCMeta ):
35+ class ContentRecognizer (AutoPropertyObject ):
3136 """Implementation of a content recognizer.
3237 """
3338
34- def getResizeFactor (self , width , height ):
39+ allowAutoRefresh : bool = False
40+ """
41+ Whether to allow automatic, periodic refresh when using this recognizer.
42+ This allows the user to see live changes as they occur. However, if a
43+ recognizer uses an internet service or is very resource intensive, this
44+ may be undesirable.
45+ """
46+ autoRefreshInterval : int = 1500
47+ """How often (in ms) to perform recognition."""
48+
49+ def getResizeFactor (self , width : int , height : int ) -> Union [int , float ]:
3550 """Return the factor by which an image must be resized
3651 before it is passed to this recognizer.
3752 @param width: The width of the image in pixels.
38- @type width: int
3953 @param height: The height of the image in pixels.
40- @type height: int
4154 @return: The resize factor, C{1} for no resizing.
42- @rtype: int or float
4355 """
4456 return 1
4557
4658 @abstractmethod
47- def recognize (self , pixels , imageInfo , onResult ):
59+ def recognize (self , pixels : ctypes . Array , imageInfo : "RecogImageInfo" , onResult : onRecognizeResultCallbackT ):
4860 """Asynchronously recognize content from an image.
4961 This method should not block.
5062 Only one recognition can be performed at a time.
@@ -56,9 +68,8 @@ def recognize(self, pixels, imageInfo, onResult):
5668 However, the alpha channel should be ignored.
5769 @type pixels: Two dimensional array (y then x) of L{winGDI.RGBQUAD}
5870 @param imageInfo: Information about the image for recognition.
59- @type imageInfo: L{RecogImageInfo}
60- @param onResult: A callable which takes a L{RecognitionResult} (or an exception on failure) as its only argument.
61- @type onResult: callable
71+ @param onResult: A callable which takes a L{RecognitionResult} (or an exception on failure)
72+ as its only argument.
6273 """
6374 raise NotImplementedError
6475
@@ -73,17 +84,16 @@ def validateCaptureBounds(self, location: RectLTWH) -> bool:
7384 """
7485 return True
7586
76- def validateObject (self , nav ) :
87+ def validateObject (self , nav : NVDAObject ) -> bool :
7788 """Validation to be performed on the navigator object before content recognition
7889 @param nav: The navigator object to be validated
79- @type nav: L{NVDAObjects.NVDAObject}
8090 @return: C{True} or C{False}, depending on whether the navigator object is valid or not.
8191 C{True} for no validation.
82- @rtype: bool
8392 """
8493 return True
8594
86- class RecogImageInfo (object ):
95+
96+ class RecogImageInfo :
8797 """Encapsulates information about a recognized image and
8898 provides functionality to convert coordinates.
8999 An image captured for recognition can begin at any point on the screen.
@@ -97,18 +107,20 @@ class RecogImageInfo(object):
97107 This is done using the L{convertXToScreen} and L{convertYToScreen} methods.
98108 """
99109
100- def __init__ (self , screenLeft , screenTop , screenWidth , screenHeight , resizeFactor ):
110+ def __init__ (
111+ self ,
112+ screenLeft : int ,
113+ screenTop : int ,
114+ screenWidth : int ,
115+ screenHeight : int ,
116+ resizeFactor : Union [int , float ]
117+ ):
101118 """
102119 @param screenLeft: The x screen coordinate of the upper-left corner of the image.
103- @type screenLeft: int
104120 @param screenTop: The y screen coordinate of the upper-left corner of the image.
105- @type screenTop: int
106121 @param screenWidth: The width of the image on the screen.
107- @type screenWidth: int
108122 @param screenHeight: The height of the image on the screen.
109- @type screenHeight: int
110123 @param resizeFactor: The factor by which the image must be resized for recognition.
111- @type resizeFactor: int or float
112124 @raise ValueError: If the supplied screen coordinates indicate that
113125 the image is not visible; e.g. width or height of 0.
114126 """
@@ -125,7 +137,14 @@ def __init__(self, screenLeft, screenTop, screenWidth, screenHeight, resizeFacto
125137 self .recogHeight = int (screenHeight * resizeFactor )
126138
127139 @classmethod
128- def createFromRecognizer (cls , screenLeft , screenTop , screenWidth , screenHeight , recognizer ):
140+ def createFromRecognizer (
141+ cls ,
142+ screenLeft : int ,
143+ screenTop : int ,
144+ screenWidth : int ,
145+ screenHeight : int ,
146+ recognizer : ContentRecognizer
147+ ):
129148 """Convenience method to construct an instance using a L{ContentRecognizer}.
130149 The resize factor is obtained by calling L{ContentRecognizer.getResizeFactor}.
131150 """
@@ -172,18 +191,20 @@ def makeTextInfo(self, obj, position) -> BaseContentRecogTextInfo:
172191 """
173192 raise NotImplementedError
174193
194+
175195# Used internally by LinesWordsResult.
176196# (Lwr is short for LinesWordsResult.)
177197LwrWord = namedtuple ("LwrWord" , ("offset" , "left" , "top" , "width" , "height" ))
178198
199+
179200class LinesWordsResult (RecognitionResult ):
180201 """A L{RecognizerResult} which can create TextInfos based on a simple lines/words data structure.
181202 The data structure is a list of lines, wherein each line is a list of words,
182203 wherein each word is a dict containing the keys x, y, width, height and text.
183204 Several OCR engines produce output in a format which can be easily converted to this.
184205 """
185206
186- def __init__ (self , data , imageInfo ):
207+ def __init__ (self , data : List [ List [ Dict [ str , Union [ str , int ]]]], imageInfo : RecogImageInfo ):
187208 """Constructor.
188209 @param data: The lines/words data structure. For example:
189210 [
@@ -196,11 +217,9 @@ def __init__(self, data, imageInfo):
196217 {"x": 117, "y": 105, "width": 11, "height": 9, "text": "Word4"}
197218 ]
198219 ]
199- @type data: list of lists of dicts
200220 @param imageInfo: Information about the recognized image.
201221 This is used to convert coordinates in the recognized image
202222 to screen coordinates.
203- @type imageInfo: L{RecogImageInfo}
204223 """
205224 self .data = data
206225 self .imageInfo = imageInfo
@@ -223,11 +242,13 @@ def _parseData(self):
223242 # Separate with a space.
224243 self ._textList .append (" " )
225244 self .textLen += 1
226- self .words .append (LwrWord (self .textLen ,
245+ self .words .append (LwrWord (
246+ self .textLen ,
227247 self .imageInfo .convertXToScreen (word ["x" ]),
228248 self .imageInfo .convertYToScreen (word ["y" ]),
229249 self .imageInfo .convertWidthToScreen (word ["width" ]),
230- self .imageInfo .convertHeightToScreen (word ["height" ])))
250+ self .imageInfo .convertHeightToScreen (word ["height" ]))
251+ )
231252 text = word ["text" ]
232253 self ._textList .append (text )
233254 self .textLen += len (text )
@@ -249,7 +270,7 @@ class LwrTextInfo(BaseContentRecogTextInfo, textInfos.offsets.OffsetsTextInfo):
249270
250271 def __init__ (self , obj , position , result ):
251272 self .result = result
252- super (LwrTextInfo , self ).__init__ (obj , position )
273+ super ().__init__ (obj , position )
253274
254275 def copy (self ):
255276 return self .__class__ (self .obj , self .bookmark , self .result )
@@ -315,7 +336,7 @@ class SimpleResultTextInfo(BaseContentRecogTextInfo, textInfos.offsets.OffsetsTe
315336
316337 def __init__ (self , obj , position , result ):
317338 self .result = result
318- super (SimpleResultTextInfo , self ).__init__ (obj , position )
339+ super ().__init__ (obj , position )
319340
320341 def copy (self ):
321342 return self .__class__ (self .obj , self .bookmark , self .result )
@@ -325,6 +346,3 @@ def _getStoryText(self):
325346
326347 def _getStoryLength (self ):
327348 return len (self .result .text )
328-
329- def _getStoryText (self ):
330- return self .result .text
0 commit comments