Skip to content

AttributeError: 'DictionaryObject' object has no attribute 'insert_child' #2233

@Firestar-Reimu

Description

@Firestar-Reimu

I use pypdf 3.16.2/python 3.11.5 on conda-forge on Archlinux

I want to add bookmarks to this pdf (13MB): https://paste.c-net.org/TexasGander

using the code below:

import re
import sys

from os.path import exists, splitext
from pypdf import PdfReader, PdfWriter


def _get_parent_bookmark(current_indent, history_indent, bookmarks):
    """The parent of A is the nearest bookmark whose indent is smaller than A's"""
    assert len(history_indent) == len(bookmarks)
    if current_indent == 0:
        return None
    for i in range(len(history_indent) - 1, -1, -1):
        # len(history_indent) - 1   ===>   0
        if history_indent[i] < current_indent:
            return bookmarks[i]
    return None


def addBookmark(pdf_path, bookmark_txt_path, page_offset):
    if not exists(pdf_path):
        return "Error: No such file: {}".format(pdf_path)
    if not exists(bookmark_txt_path):
        return "Error: No such file: {}".format(bookmark_txt_path)

    with open(bookmark_txt_path, "r", encoding="utf-8") as f:
        bookmark_lines = f.readlines()
    reader = PdfReader(pdf_path)
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)

    maxPages = len(reader.pages)
    bookmarks, history_indent = [], []
    # decide the level of each bookmark according to the relative indent size in each line
    #   no indent:          level 1
    #     small indent:     level 2
    #       larger indent:  level 3
    #   ...
    for line in bookmark_lines:
        line2 = re.split(r"\s+", line.strip())
        if len(line2) == 1:
            continue

        indent_size = len(line) - len(line.lstrip())
        parent = _get_parent_bookmark(indent_size, history_indent, bookmarks)
        print(parent)
        history_indent.append(indent_size)

        title, page = " ".join(line2[:-1]), int(line2[-1]) - 1
        if page + page_offset >= maxPages:
            return "Error: page index out of range: %d >= %d" % (page + page_offset, maxPages)
        new_bookmark = writer.add_outline_item(title, page + page_offset, parent=parent)
        # AttributeError: 'DictionaryObject' object has no attribute 'insert_child'
        print("new_bookmark")
        bookmarks.append(new_bookmark)

    out_path = splitext(pdf_path)[0] + "-new.pdf"
    with open(out_path, "wb") as f:
        writer.write(f)

    return "The bookmarks have been added to %s" % out_path


if __name__ == "__main__":
    import sys

    args = sys.argv
    if len(args) != 4:
        print("Usage: %s [pdf] [bookmark_txt] [page_offset]" % args[0])
    else:
        print(addBookmark(args[1], args[2], int(args[3])))

# python bookmarks.py "/path/to/xxx.pdf" "/path/to/toc.txt" page_offset

toc.txt is here: https://paste.c-net.org/HeraldCreated

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions