{"id":43019,"date":"2025-07-29T18:18:03","date_gmt":"2025-07-29T09:18:03","guid":{"rendered":"https:\/\/techgym.jp\/?p=43019"},"modified":"2025-10-20T00:06:20","modified_gmt":"2025-10-19T15:06:20","slug":"python-pdf","status":"publish","type":"post","link":"https:\/\/techgym.jp\/column\/python-pdf\/","title":{"rendered":"Python\u3067PDF\u64cd\u4f5c\u3092\u5b8c\u5168\u30de\u30b9\u30bf\u30fc\u3010PyPDF2\u30fbreportlab\u5fb9\u5e95\u30ac\u30a4\u30c9\u3011"},"content":{"rendered":"\n<p><iframe loading=\"lazy\" width=\"560\" height=\"314\" src=\"\/\/www.youtube.com\/embed\/7iX9nAJE0cE\" allowfullscreen=\"allowfullscreen\"><\/iframe><\/p>\n<p>PDF\uff08Portable Document Format\uff09\u306f\u3001\u30d3\u30b8\u30cd\u30b9\u3084\u5b66\u8853\u5206\u91ce\u3067\u6700\u3082\u4f7f\u7528\u3055\u308c\u308b\u6587\u66f8\u5f62\u5f0f\u306e\u4e00\u3064\u3067\u3059\u3002\u672c\u8a18\u4e8b\u3067\u306f\u3001Python\u3092\u4f7f\u3063\u305fPDF\u64cd\u4f5c\u306e\u65b9\u6cd5\u3092\u3001\u8aad\u307f\u53d6\u308a\u304b\u3089\u4f5c\u6210\u3001\u7de8\u96c6\u307e\u3067\u5fb9\u5e95\u89e3\u8aac\u3057\u307e\u3059\u3002\u521d\u5fc3\u8005\u304b\u3089\u4e0a\u7d1a\u8005\u307e\u3067\u3001\u5b9f\u7528\u7684\u306a\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3068\u3068\u3082\u306b\u5b66\u3079\u308b\u5185\u5bb9\u3067\u3059\u3002<\/p>\n\n<h2>PDF\u3068\u306f<\/h2>\n<p>PDF\u306f\u3001Adobe Systems\u304c\u958b\u767a\u3057\u305f\u6587\u66f8\u5f62\u5f0f\u3067\u3001\u7570\u306a\u308b\u30d7\u30e9\u30c3\u30c8\u30d5\u30a9\u30fc\u30e0\u9593\u3067\u3082\u540c\u3058\u30ec\u30a4\u30a2\u30a6\u30c8\u3092\u4fdd\u6301\u3067\u304d\u308b\u7279\u5fb4\u304c\u3042\u308a\u307e\u3059\u3002\u4e3b\u306a\u7528\u9014\uff1a<\/p>\n<ul>\n<li>\u516c\u5f0f\u6587\u66f8\u30fb\u5951\u7d04\u66f8<\/li>\n<li>\u30ec\u30dd\u30fc\u30c8\u30fb\u8ad6\u6587<\/li>\n<li>\u96fb\u5b50\u66f8\u7c4d<\/li>\n<li>\u30d7\u30ec\u30bc\u30f3\u30c6\u30fc\u30b7\u30e7\u30f3\u8cc7\u6599<\/li>\n<li>\u30d5\u30a9\u30fc\u30e0\u30fb\u7533\u8acb\u66f8<\/li>\n<\/ul>\n<h2>\u4e3b\u8981\u306aPython PDF\u30e9\u30a4\u30d6\u30e9\u30ea<\/h2>\n<h3>1. PyPDF2 \/ pypdf<\/h3>\n<ul>\n<li>PDF\u8aad\u307f\u53d6\u308a\u30fb\u5206\u5272\u30fb\u7d50\u5408\u306b\u7279\u5316<\/li>\n<li>\u8efd\u91cf\u3067\u4f7f\u3044\u3084\u3059\u3044<\/li>\n<li>\u30c6\u30ad\u30b9\u30c8\u62bd\u51fa\u6a5f\u80fd<\/li>\n<\/ul>\n<h3>2. reportlab<\/h3>\n<ul>\n<li>PDF\u4f5c\u6210\u306b\u7279\u5316<\/li>\n<li>\u9ad8\u5ea6\u306a\u30ec\u30a4\u30a2\u30a6\u30c8\u5236\u5fa1<\/li>\n<li>\u30b0\u30e9\u30d5\u30fb\u56f3\u8868\u4f5c\u6210<\/li>\n<\/ul>\n<h3>3. pdfplumber<\/h3>\n<ul>\n<li>\u9ad8\u7cbe\u5ea6\u306a\u30c6\u30ad\u30b9\u30c8\u30fb\u8868\u62bd\u51fa<\/li>\n<li>\u5ea7\u6a19\u60c5\u5831\u3082\u53d6\u5f97\u53ef\u80fd<\/li>\n<li>\u30c7\u30fc\u30bf\u5206\u6790\u306b\u6700\u9069<\/li>\n<\/ul>\n<h3>4. PyMuPDF (fitz)<\/h3>\n<ul>\n<li>\u9ad8\u901f\u51e6\u7406<\/li>\n<li>\u753b\u50cf\u62bd\u51fa\u30fb\u5909\u63db<\/li>\n<li>\u6ce8\u91c8\u30fb\u7de8\u96c6\u6a5f\u80fd<\/li>\n<\/ul>\n<h2>\u74b0\u5883\u69cb\u7bc9\u3068\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb<\/h2>\n<pre><code class=\"language-bash\"># \u57fa\u672c\u30e9\u30a4\u30d6\u30e9\u30ea\npip install pypdf reportlab pdfplumber\n\n# \u8ffd\u52a0\u30e9\u30a4\u30d6\u30e9\u30ea\npip install PyMuPDF pillow matplotlib\n<\/code><\/pre>\n<h2>PDF\u8aad\u307f\u53d6\u308a\u306e\u57fa\u672c<\/h2>\n<h3>PyPDF\u3067\u30c6\u30ad\u30b9\u30c8\u62bd\u51fa<\/h3>\n<pre><code class=\"language-python\">import pypdf\n\nwith open('sample.pdf', 'rb') as file:\n    reader = pypdf.PdfReader(file)\n    text = \"\"\n    for page in reader.pages:\n        text += page.extract_text()\n    print(text)\n<\/code><\/pre>\n<h3>\u7279\u5b9a\u30da\u30fc\u30b8\u306e\u8aad\u307f\u53d6\u308a<\/h3>\n<pre><code class=\"language-python\">import pypdf\n\nwith open('sample.pdf', 'rb') as file:\n    reader = pypdf.PdfReader(file)\n    page = reader.pages[0]  # 1\u30da\u30fc\u30b8\u76ee\n    print(page.extract_text())\n<\/code><\/pre>\n<h3>PDF\u306e\u57fa\u672c\u60c5\u5831\u53d6\u5f97<\/h3>\n<pre><code class=\"language-python\">import pypdf\n\nwith open('sample.pdf', 'rb') as file:\n    reader = pypdf.PdfReader(file)\n    info = reader.metadata\n    print(f\"\u30da\u30fc\u30b8\u6570: {len(reader.pages)}\")\n    print(f\"\u30bf\u30a4\u30c8\u30eb: {info.title}\")\n    print(f\"\u4f5c\u6210\u8005: {info.author}\")\n<\/code><\/pre>\n<h2>pdfplumber\u3092\u4f7f\u3063\u305f\u9ad8\u7cbe\u5ea6\u62bd\u51fa<\/h2>\n<h3>\u30c6\u30ad\u30b9\u30c8\u3068\u8868\u306e\u62bd\u51fa<\/h3>\n<pre><code class=\"language-python\">import pdfplumber\n\nwith pdfplumber.open('sample.pdf') as pdf:\n    for page in pdf.pages:\n        text = page.extract_text()\n        tables = page.extract_tables()\n        print(f\"\u30c6\u30ad\u30b9\u30c8: {text}\")\n        print(f\"\u8868\u30c7\u30fc\u30bf: {tables}\")\n<\/code><\/pre>\n<h3>CSV\u30d5\u30a1\u30a4\u30eb\u3078\u306e\u8868\u30c7\u30fc\u30bf\u51fa\u529b<\/h3>\n<pre><code class=\"language-python\">import pdfplumber\nimport csv\n\nwith pdfplumber.open('table.pdf') as pdf:\n    table = pdf.pages[0].extract_table()\n    with open('output.csv', 'w', newline='', encoding='utf-8') as csvfile:\n        writer = csv.writer(csvfile)\n        writer.writerows(table)\n<\/code><\/pre>\n<h2>PDF\u4f5c\u6210\u306e\u57fa\u672c\uff08reportlab\uff09<\/h2>\n<h3>\u6700\u5c0f\u306ePDF\u4f5c\u6210<\/h3>\n<pre><code class=\"language-python\">from reportlab.pdfgen import canvas\n\nc = canvas.Canvas(\"hello.pdf\")\nc.drawString(100, 750, \"Hello World!\")\nc.save()\n<\/code><\/pre>\n<h3>\u65e5\u672c\u8a9e\u5bfe\u5fdcPDF\u4f5c\u6210<\/h3>\n<pre><code class=\"language-python\">from reportlab.pdfgen import canvas\nfrom reportlab.pdfbase import pdfmetrics\nfrom reportlab.pdfbase.cidfonts import UnicodeCIDFont\n\npdfmetrics.registerFont(UnicodeCIDFont('HeiseiKakuGo-W5'))\nc = canvas.Canvas(\"japanese.pdf\")\nc.setFont('HeiseiKakuGo-W5', 12)\nc.drawString(100, 750, \"\u3053\u3093\u306b\u3061\u306f\u3001\u4e16\u754c\uff01\")\nc.save()\n<\/code><\/pre>\n<h3>\u8907\u6570\u30da\u30fc\u30b8\u306ePDF\u4f5c\u6210<\/h3>\n<pre><code class=\"language-python\">from reportlab.pdfgen import canvas\n\nc = canvas.Canvas(\"multipage.pdf\")\nfor i in range(3):\n    c.drawString(100, 750, f\"Page {i+1}\")\n    c.showPage()  # \u65b0\u3057\u3044\u30da\u30fc\u30b8\nc.save()\n<\/code><\/pre>\n<h2>PDF\u64cd\u4f5c\u306e\u5fdc\u7528<\/h2>\n<h3>PDF\u30d5\u30a1\u30a4\u30eb\u306e\u7d50\u5408<\/h3>\n<pre><code class=\"language-python\">import pypdf\n\nmerger = pypdf.PdfMerger()\nfiles = ['file1.pdf', 'file2.pdf', 'file3.pdf']\n\nfor file in files:\n    merger.append(file)\n\nmerger.write('merged.pdf')\nmerger.close()\n<\/code><\/pre>\n<h3>PDF\u30d5\u30a1\u30a4\u30eb\u306e\u5206\u5272<\/h3>\n<pre><code class=\"language-python\">import pypdf\n\nwith open('source.pdf', 'rb') as file:\n    reader = pypdf.PdfReader(file)\n    for i, page in enumerate(reader.pages):\n        writer = pypdf.PdfWriter()\n        writer.add_page(page)\n        with open(f'page_{i+1}.pdf', 'wb') as output:\n            writer.write(output)\n<\/code><\/pre>\n<h3>PDF\u30da\u30fc\u30b8\u306e\u56de\u8ee2<\/h3>\n<pre><code class=\"language-python\">import pypdf\n\nwith open('source.pdf', 'rb') as file:\n    reader = pypdf.PdfReader(file)\n    writer = pypdf.PdfWriter()\n    \n    for page in reader.pages:\n        page.rotate(90)  # 90\u5ea6\u56de\u8ee2\n        writer.add_page(page)\n    \n    with open('rotated.pdf', 'wb') as output:\n        writer.write(output)\n<\/code><\/pre>\n<h2>\u9ad8\u5ea6\u306aPDF\u4f5c\u6210<\/h2>\n<h3>\u8868\u4ed8\u304dPDF\u306e\u4f5c\u6210<\/h3>\n<pre><code class=\"language-python\">from reportlab.lib.pagesizes import A4\nfrom reportlab.platypus import SimpleDocTemplate, Table, TableStyle\nfrom reportlab.lib import colors\n\ndoc = SimpleDocTemplate(\"table.pdf\", pagesize=A4)\ndata = [['\u540d\u524d', '\u5e74\u9f62', '\u8077\u696d'], ['\u7530\u4e2d', '30', '\u30a8\u30f3\u30b8\u30cb\u30a2'], ['\u4f50\u85e4', '25', '\u30c7\u30b6\u30a4\u30ca\u30fc']]\n\ntable = Table(data)\ntable.setStyle(TableStyle([\n    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),\n    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),\n    ('ALIGN', (0, 0), (-1, -1), 'CENTER'),\n    ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),\n    ('FONTSIZE', (0, 0), (-1, 0), 14),\n    ('BOTTOMPADDING', (0, 0), (-1, 0), 12),\n    ('BACKGROUND', (0, 1), (-1, -1), colors.beige),\n    ('GRID', (0, 0), (-1, -1), 1, colors.black)\n]))\n\ndoc.build([table])\n<\/code><\/pre>\n<h3>\u30b0\u30e9\u30d5\u4ed8\u304dPDF\u4f5c\u6210<\/h3>\n<pre><code class=\"language-python\">from reportlab.pdfgen import canvas\nfrom reportlab.graphics.shapes import Drawing\nfrom reportlab.graphics.charts.barcharts import VerticalBarChart\nfrom reportlab.graphics.renderPDF import drawToFile\n\n# \u30b0\u30e9\u30d5\u4f5c\u6210\ndrawing = Drawing(400, 200)\nchart = VerticalBarChart()\nchart.x = 50\nchart.y = 50\nchart.height = 125\nchart.width = 300\nchart.data = [[10, 20, 30, 40]]\nchart.categoryAxis.categoryNames = ['A', 'B', 'C', 'D']\ndrawing.add(chart)\n\n# PDF\u4fdd\u5b58\ndrawToFile(drawing, fmt='PDF', fnRoot='chart')\n<\/code><\/pre>\n<h2>\u5b9f\u7528\u7684\u306a\u5fdc\u7528\u4f8b<\/h2>\n<h3>\u8acb\u6c42\u66f8\u751f\u6210\u30b7\u30b9\u30c6\u30e0<\/h3>\n<pre><code class=\"language-python\">from reportlab.pdfgen import canvas\nfrom reportlab.lib.pagesizes import A4\nimport datetime\n\ndef create_invoice(invoice_data):\n    c = canvas.Canvas(\"invoice.pdf\", pagesize=A4)\n    width, height = A4\n    \n    # \u30d8\u30c3\u30c0\u30fc\n    c.setFont(\"Helvetica-Bold\", 16)\n    c.drawString(50, height-50, \"\u8acb\u6c42\u66f8\")\n    \n    # \u65e5\u4ed8\n    c.setFont(\"Helvetica\", 12)\n    c.drawString(50, height-100, f\"\u767a\u884c\u65e5: {datetime.date.today()}\")\n    \n    # \u660e\u7d30\n    y = height - 150\n    for item in invoice_data:\n        c.drawString(50, y, f\"{item['name']}: \u00a5{item['price']:,}\")\n        y -= 20\n    \n    c.save()\n\n# \u4f7f\u7528\u4f8b\ninvoice_data = [{'name': '\u5546\u54c1A', 'price': 1000}, {'name': '\u5546\u54c1B', 'price': 2000}]\ncreate_invoice(invoice_data)\n<\/code><\/pre>\n<h3>Web\u4e0a\u306ePDF\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u30fb\u51e6\u7406<\/h3>\n<pre><code class=\"language-python\">import requests\nimport pypdf\nfrom io import BytesIO\n\ndef download_and_extract(url):\n    response = requests.get(url)\n    pdf_file = BytesIO(response.content)\n    \n    reader = pypdf.PdfReader(pdf_file)\n    text = \"\"\n    for page in reader.pages:\n        text += page.extract_text()\n    \n    return text\n\n# \u4f7f\u7528\u4f8b\uff08\u67b6\u7a7a\u306eURL\uff09\n# text = download_and_extract('https:\/\/example.com\/sample.pdf')\n<\/code><\/pre>\n<h3>PDF\u5185\u753b\u50cf\u62bd\u51fa<\/h3>\n<pre><code class=\"language-python\">import fitz  # PyMuPDF\n\ndef extract_images(pdf_path):\n    doc = fitz.open(pdf_path)\n    for page_num in range(doc.page_count):\n        page = doc[page_num]\n        image_list = page.get_images()\n        \n        for img_index, img in enumerate(image_list):\n            xref = img[0]\n            pix = fitz.Pixmap(doc, xref)\n            if pix.n - pix.alpha &lt; 4:  # GRAY or RGB\u30ab\u30e9\u30fc\u30b9\u30da\u30fc\u30b9\n                pix.save(f\"image_page{page_num}_{img_index}.png\")\n            pix = None\n    \n    doc.close()\n<\/code><\/pre>\n<h2>PDF\u30d5\u30a9\u30fc\u30e0\u51e6\u7406<\/h2>\n<h3>\u30d5\u30a9\u30fc\u30e0\u30d5\u30a3\u30fc\u30eb\u30c9\u306e\u8aad\u307f\u53d6\u308a<\/h3>\n<pre><code class=\"language-python\">import pypdf\n\nwith open('form.pdf', 'rb') as file:\n    reader = pypdf.PdfReader(file)\n    if reader.is_encrypted:\n        reader.decrypt('')\n    \n    fields = reader.get_form_text_fields()\n    for field_name, field_value in fields.items():\n        print(f\"{field_name}: {field_value}\")\n<\/code><\/pre>\n<h3>\u30d5\u30a9\u30fc\u30e0\u30d5\u30a3\u30fc\u30eb\u30c9\u306e\u66f8\u304d\u8fbc\u307f<\/h3>\n<pre><code class=\"language-python\">import pypdf\n\nreader = pypdf.PdfReader('form.pdf')\nwriter = pypdf.PdfWriter()\n\n# \u30d5\u30a9\u30fc\u30e0\u30d5\u30a3\u30fc\u30eb\u30c9\u306b\u5024\u3092\u8a2d\u5b9a\nwriter.clone_reader_document_root(reader)\nwriter.update_page_form_field_values(\n    writer.pages[0], {'field_name': 'new_value'}\n)\n\nwith open('filled_form.pdf', 'wb') as output:\n    writer.write(output)\n<\/code><\/pre>\n<h2>\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u6700\u9069\u5316<\/h2>\n<h3>\u30e1\u30e2\u30ea\u52b9\u7387\u7684\u306a\u5927\u5bb9\u91cfPDF\u51e6\u7406<\/h3>\n<pre><code class=\"language-python\">import pypdf\n\ndef process_large_pdf(pdf_path, output_path):\n    with open(pdf_path, 'rb') as input_file:\n        reader = pypdf.PdfReader(input_file)\n        writer = pypdf.PdfWriter()\n        \n        # \u30da\u30fc\u30b8\u5358\u4f4d\u3067\u51e6\u7406\n        for page_num in range(len(reader.pages)):\n            page = reader.pages[page_num]\n            # \u5fc5\u8981\u306a\u51e6\u7406\u3092\u5b9f\u884c\n            writer.add_page(page)\n            \n            # \u30e1\u30e2\u30ea\u89e3\u653e\n            if page_num % 100 == 0:\n                print(f\"\u51e6\u7406\u4e2d: {page_num}\/{len(reader.pages)}\")\n        \n        with open(output_path, 'wb') as output_file:\n            writer.write(output_file)\n<\/code><\/pre>\n<h3>\u30d0\u30c3\u30c1\u51e6\u7406<\/h3>\n<pre><code class=\"language-python\">import os\nimport pypdf\nfrom concurrent.futures import ThreadPoolExecutor\n\ndef process_pdf(file_path):\n    try:\n        with open(file_path, 'rb') as file:\n            reader = pypdf.PdfReader(file)\n            text = \"\"\n            for page in reader.pages:\n                text += page.extract_text()\n        return f\"{file_path}: {len(text)} characters\"\n    except Exception as e:\n        return f\"{file_path}: Error - {e}\"\n\ndef batch_process(folder_path):\n    pdf_files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]\n    full_paths = [os.path.join(folder_path, f) for f in pdf_files]\n    \n    with ThreadPoolExecutor(max_workers=4) as executor:\n        results = list(executor.map(process_pdf, full_paths))\n    \n    return results\n<\/code><\/pre>\n<h2>\u30a8\u30e9\u30fc\u30cf\u30f3\u30c9\u30ea\u30f3\u30b0<\/h2>\n<h3>\u6697\u53f7\u5316PDF\u5bfe\u5fdc<\/h3>\n<pre><code class=\"language-python\">import pypdf\nimport getpass\n\ndef read_encrypted_pdf(pdf_path):\n    try:\n        with open(pdf_path, 'rb') as file:\n            reader = pypdf.PdfReader(file)\n            \n            if reader.is_encrypted:\n                password = getpass.getpass(\"PDF\u30d1\u30b9\u30ef\u30fc\u30c9\u3092\u5165\u529b: \")\n                if not reader.decrypt(password):\n                    print(\"\u30d1\u30b9\u30ef\u30fc\u30c9\u304c\u9593\u9055\u3063\u3066\u3044\u307e\u3059\")\n                    return None\n            \n            text = \"\"\n            for page in reader.pages:\n                text += page.extract_text()\n            return text\n    \n    except Exception as e:\n        print(f\"\u30a8\u30e9\u30fc: {e}\")\n        return None\n<\/code><\/pre>\n<h3>\u5805\u7262\u306aPDF\u51e6\u7406<\/h3>\n<pre><code class=\"language-python\">import pypdf\nimport logging\n\ndef safe_pdf_process(pdf_path):\n    try:\n        with open(pdf_path, 'rb') as file:\n            reader = pypdf.PdfReader(file)\n            \n            # PDF\u306e\u6574\u5408\u6027\u30c1\u30a7\u30c3\u30af\n            if len(reader.pages) == 0:\n                raise ValueError(\"PDF\u306b\u30da\u30fc\u30b8\u304c\u3042\u308a\u307e\u305b\u3093\")\n            \n            results = []\n            for i, page in enumerate(reader.pages):\n                try:\n                    text = page.extract_text()\n                    results.append(text)\n                except Exception as page_error:\n                    logging.warning(f\"\u30da\u30fc\u30b8 {i+1} \u306e\u51e6\u7406\u3067\u30a8\u30e9\u30fc: {page_error}\")\n                    results.append(\"\")\n            \n            return results\n    \n    except Exception as e:\n        logging.error(f\"PDF\u51e6\u7406\u30a8\u30e9\u30fc: {e}\")\n        return []\n<\/code><\/pre>\n<h2>PDF\u30e9\u30a4\u30d6\u30e9\u30ea\u6bd4\u8f03\u8868<\/h2>\n<table>\n<thead>\n<tr>\n<th>\u30e9\u30a4\u30d6\u30e9\u30ea<\/th>\n<th>\u8aad\u307f\u53d6\u308a<\/th>\n<th>\u4f5c\u6210<\/th>\n<th>\u7de8\u96c6<\/th>\n<th>\u901f\u5ea6<\/th>\n<th>\u65e5\u672c\u8a9e\u5bfe\u5fdc<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<td>PyPDF2\/pypdf<\/td>\n<td>\u25cb<\/td>\n<td>\u25b3<\/td>\n<td>\u25cb<\/td>\n<td>\u4e2d<\/td>\n<td>\u25cb<\/td>\n<\/tr>\n<tr>\n<td>reportlab<\/td>\n<td>\u00d7<\/td>\n<td>\u25ce<\/td>\n<td>\u00d7<\/td>\n<td>\u4e2d<\/td>\n<td>\u25cb<\/td>\n<\/tr>\n<tr>\n<td>pdfplumber<\/td>\n<td>\u25ce<\/td>\n<td>\u00d7<\/td>\n<td>\u00d7<\/td>\n<td>\u4f4e<\/td>\n<td>\u25cb<\/td>\n<\/tr>\n<tr>\n<td>PyMuPDF<\/td>\n<td>\u25ce<\/td>\n<td>\u25cb<\/td>\n<td>\u25ce<\/td>\n<td>\u9ad8<\/td>\n<td>\u25cb<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h2>\u30c8\u30e9\u30d6\u30eb\u30b7\u30e5\u30fc\u30c6\u30a3\u30f3\u30b0<\/h2>\n<h3>\u3088\u304f\u3042\u308b\u554f\u984c\u3068\u5bfe\u51e6\u6cd5<\/h3>\n<pre><code class=\"language-python\"># \u6587\u5b57\u5316\u3051\u5bfe\u7b56\nimport pypdf\n\ndef fix_encoding_issues(pdf_path):\n    with open(pdf_path, 'rb') as file:\n        reader = pypdf.PdfReader(file)\n        text = \"\"\n        for page in reader.pages:\n            page_text = page.extract_text()\n            # \u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u554f\u984c\u306e\u4fee\u6b63\n            try:\n                page_text = page_text.encode('latin1').decode('utf-8')\n            except:\n                pass\n            text += page_text\n        return text\n\n# \u30d5\u30a1\u30a4\u30eb\u30b5\u30a4\u30ba\u6700\u9069\u5316\ndef optimize_pdf_size(input_path, output_path):\n    reader = pypdf.PdfReader(input_path)\n    writer = pypdf.PdfWriter()\n    \n    for page in reader.pages:\n        writer.add_page(page)\n    \n    # \u5727\u7e2e\u8a2d\u5b9a\n    writer.compress_identical_objects()\n    \n    with open(output_path, 'wb') as output:\n        writer.write(output)\n<\/code><\/pre>\n<h2>\u307e\u3068\u3081<\/h2>\n<p>Python\u3067\u306ePDF\u64cd\u4f5c\u306f\u3001\u9069\u5207\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u9078\u629e\u306b\u3088\u308a\u69d8\u3005\u306a\u51e6\u7406\u304c\u53ef\u80fd\u3067\u3059\u3002\u8aad\u307f\u53d6\u308a\u306b\u306fpypdf\u3084pdfplumber\u3001\u4f5c\u6210\u306b\u306freportlab\u3001\u9ad8\u5ea6\u306a\u7de8\u96c6\u306b\u306fPyMuPDF\u3092\u4f7f\u3044\u5206\u3051\u308b\u3053\u3068\u3067\u3001\u52b9\u7387\u7684\u306aPDF\u51e6\u7406\u30b7\u30b9\u30c6\u30e0\u3092\u69cb\u7bc9\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>\u672c\u8a18\u4e8b\u306e\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3092\u53c2\u8003\u306b\u3001\u3042\u306a\u305f\u306e\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u306b\u6700\u9069\u306aPDF\u30bd\u30ea\u30e5\u30fc\u30b7\u30e7\u30f3\u3092\u5b9f\u88c5\u3057\u3066\u304f\u3060\u3055\u3044\u3002\u30a8\u30e9\u30fc\u30cf\u30f3\u30c9\u30ea\u30f3\u30b0\u3068\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u6700\u9069\u5316\u3082\u5fd8\u308c\u305a\u306b\u5b9f\u88c5\u3059\u308b\u3053\u3068\u3067\u3001\u5b9f\u7528\u7684\u306a\u30b7\u30b9\u30c6\u30e0\u304c\u5b8c\u6210\u3057\u307e\u3059\u3002<\/p>\n<h2>\u53c2\u8003\u6587\u732e<\/h2>\n<ul>\n<li>PyPDF2\/pypdf\u516c\u5f0f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8<\/li>\n<li>reportlab\u516c\u5f0f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8<\/li>\n<li>pdfplumber GitHub<\/li>\n<li>PyMuPDF\u516c\u5f0f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\">\u25a0<a href=\"https:\/\/amzn.to\/3VxGkpx\">\u3089\u304f\u3089\u304fPython\u587e &#8211; \u8aad\u3080\u3060\u3051\u3067\u30de\u30b9\u30bf\u30fc<\/a><\/h2>\n\n\n\n<p><iframe loading=\"lazy\" width=\"560\" height=\"314\" src=\"\/\/www.youtube.com\/embed\/7iX9nAJE0cE\" allowfullscreen=\"allowfullscreen\"><\/iframe><\/p>\n\n\n\n<p>\u25a0\u30d7\u30ed\u30f3\u30d7\u30c8\u3060\u3051\u3067\u30aa\u30ea\u30b8\u30ca\u30eb\u30a2\u30d7\u30ea\u3092\u958b\u767a\u30fb\u516c\u958b\u3057\u3066\u307f\u305f\uff01\uff01<\/p>\n\n\n\n<figure class=\"wp-block-embed is-type-wp-embed\"><div class=\"wp-block-embed__wrapper\">\n<blockquote class=\"wp-embedded-content\" data-secret=\"amku7lnfRH\"><a href=\"https:\/\/techgym.jp\/column\/ori-app\/\">\u30d7\u30ed\u30f3\u30d7\u30c8\u3060\u3051\u3067\u30aa\u30ea\u30b8\u30ca\u30eb\u30a2\u30d7\u30ea\u3092\u958b\u767a\u30fb\u516c\u958b\u3057\u3066\u307f\u305f\uff01\uff01<\/a><\/blockquote><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; visibility: hidden;\" title=\"&#8220;\u30d7\u30ed\u30f3\u30d7\u30c8\u3060\u3051\u3067\u30aa\u30ea\u30b8\u30ca\u30eb\u30a2\u30d7\u30ea\u3092\u958b\u767a\u30fb\u516c\u958b\u3057\u3066\u307f\u305f\uff01\uff01&#8221; &#8212; \u3010\u30c6\u30c3\u30af\u30b8\u30e0\u3011\u683c\u5b89\u30fb\u5bfe\u9762\u578b\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u30b9\u30af\u30fc\u30eb\" src=\"https:\/\/techgym.jp\/column\/ori-app\/embed\/#?secret=oZPOkdCsmP#?secret=amku7lnfRH\" data-secret=\"amku7lnfRH\" width=\"600\" height=\"338\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe>\n<\/div><\/figure>\n\n\n\n<p>\u25a0AI\u6642\u4ee3\u306e\u7b2c\u4e00\u6b69\uff01\u300cAI\u99c6\u52d5\u958b\u767a\u30b3\u30fc\u30b9\u300d\u306f\u3058\u3081\u307e\u3057\u305f\uff01<\/p>\n\n\n\n<p>\u30c6\u30c3\u30af\u30b8\u30e0\u6771\u4eac\u672c\u6821\u3067\u5148\u884c\u958b\u59cb\u3002<\/p>\n\n\n\n<figure class=\"wp-block-embed is-type-wp-embed\"><div class=\"wp-block-embed__wrapper\">\n<blockquote class=\"wp-embedded-content\" data-secret=\"AYEcmgrbLg\"><a href=\"https:\/\/techgym.jp\/about\/ai-driven-development\/\">AI\u99c6\u52d5\u958b\u767a\/\u751f\u6210AI\u30a8\u30f3\u30b8\u30cb\u30a2\u30b3\u30fc\u30b9\uff08\u521d\u5fc3\u8005\u5411\u3051\uff09<\/a><\/blockquote><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; visibility: hidden;\" title=\"&#8220;AI\u99c6\u52d5\u958b\u767a\/\u751f\u6210AI\u30a8\u30f3\u30b8\u30cb\u30a2\u30b3\u30fc\u30b9\uff08\u521d\u5fc3\u8005\u5411\u3051\uff09&#8221; &#8212; \u3010\u30c6\u30c3\u30af\u30b8\u30e0\u3011\u683c\u5b89\u30fb\u5bfe\u9762\u578b\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u30b9\u30af\u30fc\u30eb\" src=\"https:\/\/techgym.jp\/about\/ai-driven-development\/embed\/#?secret=RaxGPscGzh#?secret=AYEcmgrbLg\" data-secret=\"AYEcmgrbLg\" width=\"600\" height=\"338\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe>\n<\/div><\/figure>\n\n\n\n<p>\u25a0\u30c6\u30c3\u30af\u30b8\u30e0\u6771\u4eac\u672c\u6821<\/p>\n\n\n\n<p>\u300c\u6b66\u7530\u587e\u300d\u306e\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u7248\u3068\u3044\u3048\u3070\u300c\u30c6\u30c3\u30af\u30b8\u30e0\u300d\u3002<br>\u8b1b\u7fa9\u52d5\u753b\u306a\u3057\u3001\u6559\u79d1\u66f8\u306a\u3057\u3002\u300c\u9032\u6357\u7ba1\u7406\u3068\u30b3\u30fc\u30c1\u30f3\u30b0\u300d\u3067\u52b9\u7387\u5b66\u7fd2\u3002<br>\u3088\u308a\u65e9\u304f\u3001\u3088\u308a\u5b89\u304f\u3001\u3057\u304b\u3082\u5bfe\u9762\u578b\u306e\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u30b9\u30af\u30fc\u30eb\u3067\u3059\u3002<\/p>\n\n\n\n<figure class=\"wp-block-embed is-type-wp-embed\"><div class=\"wp-block-embed__wrapper\">\n<blockquote class=\"wp-embedded-content\" data-secret=\"RJGx2rekhC\"><a href=\"https:\/\/techgym.jp\/tokyo\/tokyo_honko\/\">\u30c6\u30c3\u30af\u30b8\u30e0\u6771\u4eac\u672c\u6821<\/a><\/blockquote><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; visibility: hidden;\" title=\"&#8220;\u30c6\u30c3\u30af\u30b8\u30e0\u6771\u4eac\u672c\u6821&#8221; &#8212; \u3010\u30c6\u30c3\u30af\u30b8\u30e0\u3011\u683c\u5b89\u30fb\u5bfe\u9762\u578b\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u30b9\u30af\u30fc\u30eb\" src=\"https:\/\/techgym.jp\/tokyo\/tokyo_honko\/embed\/#?secret=AqF5880EZ5#?secret=RJGx2rekhC\" data-secret=\"RJGx2rekhC\" width=\"600\" height=\"338\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe>\n<\/div><\/figure>\n\n\n\n<p>\uff1c\u77ed\u671f\u8b1b\u7fd2\uff1e5\u65e5\u30675\u4e07\u5186\u306e\u300cPython\u30df\u30cb\u30ad\u30e3\u30f3\u30d7\u300d\u958b\u50ac\u4e2d\u3002<\/p>\n\n\n\n<figure class=\"wp-block-embed is-type-wp-embed\"><div class=\"wp-block-embed__wrapper\">\n<blockquote class=\"wp-embedded-content\" data-secret=\"HaujnJ2YwX\"><a href=\"https:\/\/techgym.jp\/event\/nagatacho_camp\/\">\u3010\u6700\u901f\u30fb\u78ba\u5b9f\u3011\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u653b\u7565\u300c\u6c38\u7530\u753aPython\u30df\u30cb\u30ad\u30e3\u30f3\u30d7\u300d\u30105\u65e5\u9593\u30675\u4e07\u5186\u3011<\/a><\/blockquote><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; visibility: hidden;\" title=\"&#8220;\u3010\u6700\u901f\u30fb\u78ba\u5b9f\u3011\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u653b\u7565\u300c\u6c38\u7530\u753aPython\u30df\u30cb\u30ad\u30e3\u30f3\u30d7\u300d\u30105\u65e5\u9593\u30675\u4e07\u5186\u3011&#8221; &#8212; \u3010\u30c6\u30c3\u30af\u30b8\u30e0\u3011\u683c\u5b89\u30fb\u5bfe\u9762\u578b\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u30b9\u30af\u30fc\u30eb\" src=\"https:\/\/techgym.jp\/event\/nagatacho_camp\/embed\/#?secret=JpDWupDE98#?secret=HaujnJ2YwX\" data-secret=\"HaujnJ2YwX\" width=\"600\" height=\"338\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe>\n<\/div><\/figure>\n\n\n\n<p>\uff1c\u67081\u958b\u50ac\uff1e\u653e\u9001\u4f5c\u5bb6\u306b\u3088\u308b\u6620\u50cf\u30c7\u30a3\u30ec\u30af\u30bf\u30fc\u990a\u6210\u8b1b\u5ea7<\/p>\n\n\n\n<figure class=\"wp-block-embed is-type-wp-embed\"><div class=\"wp-block-embed__wrapper\">\n<blockquote class=\"wp-embedded-content\" data-secret=\"X0lcHgtR7e\"><a href=\"https:\/\/techgym.jp\/event\/video_director\/\">\u73fe\u5f79\u653e\u9001\u4f5c\u5bb6\u304c\u6559\u3048\u308b\u52d5\u753b\u8b1b\u5ea7\uff01\u300e\uff24\uff2f\uff27\uff21\u300f<\/a><\/blockquote><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; visibility: hidden;\" title=\"&#8220;\u73fe\u5f79\u653e\u9001\u4f5c\u5bb6\u304c\u6559\u3048\u308b\u52d5\u753b\u8b1b\u5ea7\uff01\u300e\uff24\uff2f\uff27\uff21\u300f&#8221; &#8212; \u3010\u30c6\u30c3\u30af\u30b8\u30e0\u3011\u683c\u5b89\u30fb\u5bfe\u9762\u578b\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u30b9\u30af\u30fc\u30eb\" src=\"https:\/\/techgym.jp\/event\/video_director\/embed\/#?secret=DQhXmu5Jxk#?secret=X0lcHgtR7e\" data-secret=\"X0lcHgtR7e\" width=\"600\" height=\"338\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe>\n<\/div><\/figure>\n\n\n\n<p>\uff1c\u30aa\u30f3\u30e9\u30a4\u30f3\u7121\u6599\uff1e\u30bc\u30ed\u304b\u3089\u59cb\u3081\u308bPython\u7206\u901f\u8b1b\u5ea7<\/p>\n\n\n\n<figure class=\"wp-block-embed is-type-wp-embed\"><div class=\"wp-block-embed__wrapper\">\n<blockquote class=\"wp-embedded-content\" data-secret=\"6G2zQ5kD7F\"><a href=\"https:\/\/techgym.jp\/tokyo_python\/\">\u3010\u7121\u6599\u30fb\u30aa\u30f3\u30e9\u30a4\u30f3\u3011\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308bPython\u7206\u901f\u8b1b\u5ea7<\/a><\/blockquote><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; visibility: hidden;\" title=\"&#8220;\u3010\u7121\u6599\u30fb\u30aa\u30f3\u30e9\u30a4\u30f3\u3011\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308bPython\u7206\u901f\u8b1b\u5ea7&#8221; &#8212; \u3010\u30c6\u30c3\u30af\u30b8\u30e0\u3011\u683c\u5b89\u30fb\u5bfe\u9762\u578b\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u30b9\u30af\u30fc\u30eb\" src=\"https:\/\/techgym.jp\/tokyo_python\/embed\/#?secret=8BIEAcbtSd#?secret=6G2zQ5kD7F\" data-secret=\"6G2zQ5kD7F\" width=\"600\" height=\"338\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe>\n<\/div><\/figure>\n","protected":false},"excerpt":{"rendered":"<p>PDF\uff08Portable Document Format\uff09\u306f\u3001\u30d3\u30b8\u30cd\u30b9\u3084\u5b66\u8853\u5206\u91ce\u3067\u6700\u3082\u4f7f\u7528\u3055\u308c\u308b\u6587\u66f8\u5f62\u5f0f\u306e\u4e00\u3064\u3067\u3059\u3002\u672c\u8a18\u4e8b\u3067\u306f\u3001Python\u3092\u4f7f\u3063\u305fPDF\u64cd\u4f5c\u306e\u65b9\u6cd5\u3092\u3001\u8aad\u307f\u53d6\u308a\u304b\u3089\u4f5c\u6210\u3001\u7de8\u96c6\u307e\u3067\u5fb9\u5e95\u89e3\u8aac\u3057\u307e\u3059\u3002\u521d\u5fc3\u8005\u304b\u3089 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":42501,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"om_disable_all_campaigns":false,"_monsterinsights_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[5],"tags":[],"class_list":["post-43019","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-column"],"views":115,"jetpack_featured_media_url":"\/wp-content\/uploads\/2025\/07\/f3403acf5c65aedec0dba821c4c26404.png","jetpack_sharing_enabled":true,"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/techgym.jp\/wp-json\/wp\/v2\/posts\/43019","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/techgym.jp\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/techgym.jp\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/techgym.jp\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/techgym.jp\/wp-json\/wp\/v2\/comments?post=43019"}],"version-history":[{"count":0,"href":"https:\/\/techgym.jp\/wp-json\/wp\/v2\/posts\/43019\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/techgym.jp\/wp-json\/wp\/v2\/media\/42501"}],"wp:attachment":[{"href":"https:\/\/techgym.jp\/wp-json\/wp\/v2\/media?parent=43019"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/techgym.jp\/wp-json\/wp\/v2\/categories?post=43019"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/techgym.jp\/wp-json\/wp\/v2\/tags?post=43019"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}