-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Closed
Description
I use pypdf 3.16.2/python 3.11.5 on conda-forge on Archlinux
I want to add bookmarks to this pdf (13MB): https://paste.c-net.org/TexasGander
using the code below:
import re
import sys
from os.path import exists, splitext
from pypdf import PdfReader, PdfWriter
def _get_parent_bookmark(current_indent, history_indent, bookmarks):
"""The parent of A is the nearest bookmark whose indent is smaller than A's"""
assert len(history_indent) == len(bookmarks)
if current_indent == 0:
return None
for i in range(len(history_indent) - 1, -1, -1):
# len(history_indent) - 1 ===> 0
if history_indent[i] < current_indent:
return bookmarks[i]
return None
def addBookmark(pdf_path, bookmark_txt_path, page_offset):
if not exists(pdf_path):
return "Error: No such file: {}".format(pdf_path)
if not exists(bookmark_txt_path):
return "Error: No such file: {}".format(bookmark_txt_path)
with open(bookmark_txt_path, "r", encoding="utf-8") as f:
bookmark_lines = f.readlines()
reader = PdfReader(pdf_path)
writer = PdfWriter()
writer.clone_document_from_reader(reader)
maxPages = len(reader.pages)
bookmarks, history_indent = [], []
# decide the level of each bookmark according to the relative indent size in each line
# no indent: level 1
# small indent: level 2
# larger indent: level 3
# ...
for line in bookmark_lines:
line2 = re.split(r"\s+", line.strip())
if len(line2) == 1:
continue
indent_size = len(line) - len(line.lstrip())
parent = _get_parent_bookmark(indent_size, history_indent, bookmarks)
print(parent)
history_indent.append(indent_size)
title, page = " ".join(line2[:-1]), int(line2[-1]) - 1
if page + page_offset >= maxPages:
return "Error: page index out of range: %d >= %d" % (page + page_offset, maxPages)
new_bookmark = writer.add_outline_item(title, page + page_offset, parent=parent)
# AttributeError: 'DictionaryObject' object has no attribute 'insert_child'
print("new_bookmark")
bookmarks.append(new_bookmark)
out_path = splitext(pdf_path)[0] + "-new.pdf"
with open(out_path, "wb") as f:
writer.write(f)
return "The bookmarks have been added to %s" % out_path
if __name__ == "__main__":
import sys
args = sys.argv
if len(args) != 4:
print("Usage: %s [pdf] [bookmark_txt] [page_offset]" % args[0])
else:
print(addBookmark(args[1], args[2], int(args[3])))
# python bookmarks.py "/path/to/xxx.pdf" "/path/to/toc.txt" page_offsettoc.txt is here: https://paste.c-net.org/HeraldCreated
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels