1- #contentRecog/__init__.py
2- #A part of NonVisual Desktop Access (NVDA)
3- #Copyright (C) 2017 NV Access Limited
4- #This file is covered by the GNU General Public License.
5- #See the file COPYING for more details.
1+ # A part of NonVisual Desktop Access (NVDA)
2+ # Copyright (C) 2017-2023 NV Access Limited, James Teh, Leonard de Ruijter
3+ # This file is covered by the GNU General Public License.
4+ # See the file COPYING for more details.
65
76"""Framework for recognition of content; OCR, image recognition, etc.
87When authors don't provide sufficient information for a screen reader user to determine the content of something,
1413"""
1514
1615from collections import namedtuple
16+ import ctypes
17+ from typing import Callable , Dict , List , Union
1718import garbageHandler
1819import cursorManager
1920import textInfos .offsets
2021from abc import ABCMeta , abstractmethod
2122from locationHelper import RectLTWH
23+ from NVDAObjects import NVDAObject
24+
25+ onRecognizeResultCallbackT = Callable [[Union ["RecognitionResult" , Exception ]], None ]
2226
2327
2428class BaseContentRecogTextInfo (cursorManager ._ReviewCursorManagerTextInfo ):
@@ -31,20 +35,25 @@ class ContentRecognizer(garbageHandler.TrackedObject, metaclass=ABCMeta):
3135 """Implementation of a content recognizer.
3236 """
3337
34- def getResizeFactor (self , width , height ):
38+ allowAutoRefresh : bool = False
39+ """
40+ Whether to allow automatic, periodic refresh when using this recognizer.
41+ This allows the user to see live changes as they occur. However, if a
42+ recognizer uses an internet service or is very resource intensive, this
43+ may be undesirable.
44+ """
45+
46+ def getResizeFactor (self , width : int , height : int ) -> Union [int , float ]:
3547 """Return the factor by which an image must be resized
3648 before it is passed to this recognizer.
3749 @param width: The width of the image in pixels.
38- @type width: int
3950 @param height: The height of the image in pixels.
40- @type height: int
4151 @return: The resize factor, C{1} for no resizing.
42- @rtype: int or float
4352 """
4453 return 1
4554
4655 @abstractmethod
47- def recognize (self , pixels , imageInfo , onResult ):
56+ def recognize (self , pixels : ctypes . Array , imageInfo : "RecogImageInfo" , onResult : onRecognizeResultCallbackT ):
4857 """Asynchronously recognize content from an image.
4958 This method should not block.
5059 Only one recognition can be performed at a time.
@@ -56,9 +65,8 @@ def recognize(self, pixels, imageInfo, onResult):
5665 However, the alpha channel should be ignored.
5766 @type pixels: Two dimensional array (y then x) of L{winGDI.RGBQUAD}
5867 @param imageInfo: Information about the image for recognition.
59- @type imageInfo: L{RecogImageInfo}
60- @param onResult: A callable which takes a L{RecognitionResult} (or an exception on failure) as its only argument.
61- @type onResult: callable
68+ @param onResult: A callable which takes a L{RecognitionResult} (or an exception on failure)
69+ as its only argument.
6270 """
6371 raise NotImplementedError
6472
@@ -73,17 +81,16 @@ def validateCaptureBounds(self, location: RectLTWH) -> bool:
7381 """
7482 return True
7583
76- def validateObject (self , nav ) :
84+ def validateObject (self , nav : NVDAObject ) -> bool :
7785 """Validation to be performed on the navigator object before content recognition
7886 @param nav: The navigator object to be validated
79- @type nav: L{NVDAObjects.NVDAObject}
8087 @return: C{True} or C{False}, depending on whether the navigator object is valid or not.
8188 C{True} for no validation.
82- @rtype: bool
8389 """
8490 return True
8591
86- class RecogImageInfo (object ):
92+
93+ class RecogImageInfo :
8794 """Encapsulates information about a recognized image and
8895 provides functionality to convert coordinates.
8996 An image captured for recognition can begin at any point on the screen.
@@ -97,18 +104,20 @@ class RecogImageInfo(object):
97104 This is done using the L{convertXToScreen} and L{convertYToScreen} methods.
98105 """
99106
100- def __init__ (self , screenLeft , screenTop , screenWidth , screenHeight , resizeFactor ):
107+ def __init__ (
108+ self ,
109+ screenLeft : int ,
110+ screenTop : int ,
111+ screenWidth : int ,
112+ screenHeight : int ,
113+ resizeFactor : Union [int , float ]
114+ ):
101115 """
102116 @param screenLeft: The x screen coordinate of the upper-left corner of the image.
103- @type screenLeft: int
104117 @param screenTop: The y screen coordinate of the upper-left corner of the image.
105- @type screenTop: int
106118 @param screenWidth: The width of the image on the screen.
107- @type screenWidth: int
108119 @param screenHeight: The height of the image on the screen.
109- @type screenHeight: int
110120 @param resizeFactor: The factor by which the image must be resized for recognition.
111- @type resizeFactor: int or float
112121 @raise ValueError: If the supplied screen coordinates indicate that
113122 the image is not visible; e.g. width or height of 0.
114123 """
@@ -125,7 +134,14 @@ def __init__(self, screenLeft, screenTop, screenWidth, screenHeight, resizeFacto
125134 self .recogHeight = int (screenHeight * resizeFactor )
126135
127136 @classmethod
128- def createFromRecognizer (cls , screenLeft , screenTop , screenWidth , screenHeight , recognizer ):
137+ def createFromRecognizer (
138+ cls ,
139+ screenLeft : int ,
140+ screenTop : int ,
141+ screenWidth : int ,
142+ screenHeight : int ,
143+ recognizer : ContentRecognizer
144+ ):
129145 """Convenience method to construct an instance using a L{ContentRecognizer}.
130146 The resize factor is obtained by calling L{ContentRecognizer.getResizeFactor}.
131147 """
@@ -172,18 +188,20 @@ def makeTextInfo(self, obj, position) -> BaseContentRecogTextInfo:
172188 """
173189 raise NotImplementedError
174190
191+
175192# Used internally by LinesWordsResult.
176193# (Lwr is short for LinesWordsResult.)
177194LwrWord = namedtuple ("LwrWord" , ("offset" , "left" , "top" , "width" , "height" ))
178195
196+
179197class LinesWordsResult (RecognitionResult ):
180198 """A L{RecognizerResult} which can create TextInfos based on a simple lines/words data structure.
181199 The data structure is a list of lines, wherein each line is a list of words,
182200 wherein each word is a dict containing the keys x, y, width, height and text.
183201 Several OCR engines produce output in a format which can be easily converted to this.
184202 """
185203
186- def __init__ (self , data , imageInfo ):
204+ def __init__ (self , data : List [ List [ Dict [ str , Union [ str , int ]]]], imageInfo : RecogImageInfo ):
187205 """Constructor.
188206 @param data: The lines/words data structure. For example:
189207 [
@@ -196,11 +214,9 @@ def __init__(self, data, imageInfo):
196214 {"x": 117, "y": 105, "width": 11, "height": 9, "text": "Word4"}
197215 ]
198216 ]
199- @type data: list of lists of dicts
200217 @param imageInfo: Information about the recognized image.
201218 This is used to convert coordinates in the recognized image
202219 to screen coordinates.
203- @type imageInfo: L{RecogImageInfo}
204220 """
205221 self .data = data
206222 self .imageInfo = imageInfo
@@ -223,11 +239,13 @@ def _parseData(self):
223239 # Separate with a space.
224240 self ._textList .append (" " )
225241 self .textLen += 1
226- self .words .append (LwrWord (self .textLen ,
242+ self .words .append (LwrWord (
243+ self .textLen ,
227244 self .imageInfo .convertXToScreen (word ["x" ]),
228245 self .imageInfo .convertYToScreen (word ["y" ]),
229246 self .imageInfo .convertWidthToScreen (word ["width" ]),
230- self .imageInfo .convertHeightToScreen (word ["height" ])))
247+ self .imageInfo .convertHeightToScreen (word ["height" ]))
248+ )
231249 text = word ["text" ]
232250 self ._textList .append (text )
233251 self .textLen += len (text )
@@ -249,7 +267,7 @@ class LwrTextInfo(BaseContentRecogTextInfo, textInfos.offsets.OffsetsTextInfo):
249267
250268 def __init__ (self , obj , position , result ):
251269 self .result = result
252- super (LwrTextInfo , self ).__init__ (obj , position )
270+ super ().__init__ (obj , position )
253271
254272 def copy (self ):
255273 return self .__class__ (self .obj , self .bookmark , self .result )
@@ -315,7 +333,7 @@ class SimpleResultTextInfo(BaseContentRecogTextInfo, textInfos.offsets.OffsetsTe
315333
316334 def __init__ (self , obj , position , result ):
317335 self .result = result
318- super (SimpleResultTextInfo , self ).__init__ (obj , position )
336+ super ().__init__ (obj , position )
319337
320338 def copy (self ):
321339 return self .__class__ (self .obj , self .bookmark , self .result )
@@ -325,6 +343,3 @@ def _getStoryText(self):
325343
326344 def _getStoryLength (self ):
327345 return len (self .result .text )
328-
329- def _getStoryText (self ):
330- return self .result .text
0 commit comments