File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -1347,15 +1347,19 @@ def read(self, stream: StreamType) -> None:
13471347 for gen , xref_entry in self .xref .items ():
13481348 if gen == 65535 :
13491349 continue
1350- for id in xref_entry :
1350+ xref_k = sorted (
1351+ xref_entry .keys ()
1352+ ) # must ensure ascendant to prevent damange
1353+ for id in xref_k :
13511354 stream .seek (xref_entry [id ], 0 )
13521355 try :
13531356 pid , _pgen = self .read_object_header (stream )
13541357 except ValueError :
13551358 break
13561359 if pid == id - self .xref_index :
1357- self ._zero_xref (gen )
1358- break
1360+ # fixing index item per item is required for revised PDF.
1361+ self .xref [gen ][pid ] = self .xref [gen ][id ]
1362+ del self .xref [gen ][id ]
13591363 # if not, then either it's just plain wrong, or the
13601364 # non-zero-index is actually correct
13611365 stream .seek (loc , 0 ) # return to where it was
@@ -1750,11 +1754,6 @@ def _read_xref_subsections(
17501754 elif self .strict :
17511755 raise PdfReadError (f"Unknown xref type: { xref_type } " )
17521756
1753- def _zero_xref (self , generation : int ) -> None :
1754- self .xref [generation ] = {
1755- k - self .xref_index : v for (k , v ) in list (self .xref [generation ].items ())
1756- }
1757-
17581757 def _pairs (self , array : List [int ]) -> Iterable [Tuple [int , int ]]:
17591758 i = 0
17601759 while True :
Original file line number Diff line number Diff line change @@ -1136,3 +1136,11 @@ def test_reader(caplog):
11361136 # ...and now no more required
11371137 reader .pages [0 ].extract_text ()
11381138 assert caplog .text == ""
1139+
1140+
1141+ def test_zeroing_xref ():
1142+ # iss #328
1143+ url = "https://github.com/py-pdf/PyPDF2/files/9066120/UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf"
1144+ name = "UTA_OSHA.pdf"
1145+ reader = PdfReader (BytesIO (get_pdf_from_url (url , name = name )))
1146+ len (reader .pages )
You can’t perform that action at this time.
0 commit comments