Skip to content

Commit 3fe9e6e

Browse files
authored
ROB: Handle missing destinations in reader (#840)
If a destination is missing, getDestinationPageNumber now returns -1 If `strict=False`, the first page is used as a fallback. The code triggering the exception was ```python from PyPDF2 import PdfFileReader # https://github.com/mstamy2/PyPDF2/files/6045010/thyroid.pdf with open("thyroid.pdf", "rb") as f: reader = PdfFileReader(f) bookmarks = pdf.getOutlines() for b in bookmarks: print(reader.getDestinationPageNumber(b) + 1) # page count starts from 0 ``` The error message was: PyPDF2.utils.PdfReadError: Unknown Destination Type: 0 Closes #604 Closes #821
1 parent 5e86977 commit 3fe9e6e

File tree

3 files changed

+49
-1
lines changed

3 files changed

+49
-1
lines changed

PyPDF2/_reader.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,8 @@ def _getPageNumberByIndirect(self, indirectRef):
557557
id2num[x.indirectRef.idnum] = i
558558
self._pageId2Num = id2num
559559

560+
if isinstance(indirectRef, NullObject):
561+
return -1
560562
if isinstance(indirectRef, int):
561563
idnum = indirectRef
562564
else:
@@ -595,7 +597,17 @@ def getDestinationPageNumber(self, destination):
595597
def _buildDestination(self, title, array):
596598
page, typ = array[0:2]
597599
array = array[2:]
598-
return Destination(title, page, typ, *array)
600+
try:
601+
return Destination(title, page, typ, *array)
602+
except PdfReadError:
603+
warnings.warn("Unknown destination : " + title + " " + str(array))
604+
if self.strict:
605+
raise
606+
else:
607+
#create a link to first Page
608+
return Destination(title, self.getPage(0).indirectRef,
609+
TextStringObject("/Fit"))
610+
599611

600612
def _buildOutline(self, node):
601613
dest, title, outline = None, None, None

Resources/issue-604.pdf

7.62 MB
Binary file not shown.

Tests/test_reader.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,42 @@ def test_reader_properties():
547547
assert reader.pageMode is None
548548
assert reader.isEncrypted is False
549549

550+
@pytest.mark.parametrize(
551+
"strict",
552+
[(True), (False)],
553+
)
554+
def test_issue604(strict):
555+
"""
556+
Test with invalid destinations
557+
"""
558+
with open(os.path.join(RESOURCE_ROOT, "issue-604.pdf"), "rb") as f:
559+
pdf = None
560+
bookmarks = None
561+
if strict:
562+
with pytest.raises(PdfReadError) as exc:
563+
pdf = PdfFileReader(f, strict=strict)
564+
bookmarks = pdf.getOutlines()
565+
if "Unknown Destination" not in exc.value.args[0]:
566+
raise Exception("Expected exception not raised")
567+
return # bookmarks not correct
568+
else:
569+
pdf = PdfFileReader(f, strict=strict)
570+
bookmarks = pdf.getOutlines()
571+
572+
def getDestPages(x):
573+
# print(x)
574+
if isinstance(x,list):
575+
r = [getDestPages(y) for y in x]
576+
return r
577+
else:
578+
return pdf.getDestinationPageNumber(x) + 1
579+
580+
out = []
581+
for (
582+
b
583+
) in bookmarks: # b can be destination or a list:preferred to just print them
584+
out.append(getDestPages(b))
585+
#print(out)
550586

551587
def test_decode_permissions():
552588
reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))

0 commit comments

Comments
 (0)