@@ -1080,34 +1080,64 @@ def extract_text(self, Tj_sep: str = "", TJ_sep: str = "") -> str:
10801080 # Note: we check all strings are TextStringObjects. ByteStringObjects
10811081 # are strings where the byte->string encoding was unknown, so adding
10821082 # them to the text here would be gibberish.
1083+
1084+ space_scale = 1.0
1085+
10831086 for operands , operator in content .operations :
1084- if operator == b_ ("Tj" ):
1087+ if operator == b_ ("Tf" ): # text font
1088+ pass
1089+ elif operator == b_ ("Tfs" ): # text font size
1090+ pass
1091+ elif operator == b_ ("Tc" ): # character spacing
1092+ # See '5.2.1 Character Spacing'
1093+ pass
1094+ elif operator == b_ ("Tw" ): # word spacing
1095+ # See '5.2.2 Word Spacing'
1096+ space_scale = 1.0 + float (operands [0 ])
1097+ elif operator == b_ ("Th" ): # horizontal scaling
1098+ # See '5.2.3 Horizontal Scaling'
1099+ pass
1100+ elif operator == b_ ("Tl" ): # leading
1101+ # See '5.2.4 Leading'
1102+ pass
1103+ elif operator == b_ ("Tmode" ): # text rendering mode
1104+ # See '5.2.5 Text Rendering Mode'
1105+ pass
1106+ elif operator == b_ ("Trise" ): # text rise
1107+ # See '5.2.6 Text Rise'
1108+ pass
1109+ elif operator == b_ ("Tj" ):
1110+ # See 'TABLE 5.6 Text-showing operators'
10851111 _text = operands [0 ]
10861112 if isinstance (_text , TextStringObject ):
10871113 text += Tj_sep
10881114 text += _text
10891115 text += "\n "
10901116 elif operator == b_ ("T*" ):
1117+ # See 'TABLE 5.5 Text-positioning operators'
10911118 text += "\n "
10921119 elif operator == b_ ("'" ):
1120+ # See 'TABLE 5.6 Text-showing operators'
10931121 text += "\n "
10941122 _text = operands [0 ]
10951123 if isinstance (_text , TextStringObject ):
10961124 text += operands [0 ]
10971125 elif operator == b_ ('"' ):
1126+ # See 'TABLE 5.6 Text-showing operators'
10981127 _text = operands [2 ]
10991128 if isinstance (_text , TextStringObject ):
11001129 text += "\n "
11011130 text += _text
11021131 elif operator == b_ ("TJ" ):
1132+ # See 'TABLE 5.6 Text-showing operators'
11031133 for i in operands [0 ]:
11041134 if isinstance (i , TextStringObject ):
11051135 text += TJ_sep
11061136 text += i
1107- elif isinstance (i , NumberObject ):
1137+ elif isinstance (i , ( NumberObject , FloatObject ) ):
11081138 # a positive value decreases and the negative value increases
11091139 # space
1110- if int (i ) < 0 :
1140+ if int (i ) < - space_scale * 250 :
11111141 if len (text ) == 0 or text [- 1 ] != " " :
11121142 text += " "
11131143 else :
0 commit comments