Skip to content

Instantly share code, notes, and snippets.

@atuyosi
Created April 27, 2018 12:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save atuyosi/796ef46e46a85f3d837f891e551565a2 to your computer and use it in GitHub Desktop.
Save atuyosi/796ef46e46a85f3d837f891e551565a2 to your computer and use it in GitHub Desktop.
暫定版陸自イラク日報テキスト付PDF生成スクリプト
# -*- coding: utf-8 -*-
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfmetrics
# from reportlab.rl_config import defaultPageSize
from reportlab.lib.pagesizes import A4, landscape
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
from reportlab.lib.colors import red,black
from reportlab.lib.units import cm
from pdf2image import convert_from_path, convert_from_bytes
import json
from pathlib import Path
import tempfile
import natsort
import math
# [PDF生成 (ReportLab) | Python-izm](https://www.python-izm.com/third_party/pdf/repotlab/)
# [PythonでKindle用自炊PDFを作ろうと実験 - Qiita](https://qiita.com/Gen6/items/5ace54481d66b653fa15)
def create_pdf(pdffile, output, jsonlist ):
images_from_path = []
print("processing pdf: {0}".format(pdffile))
text_pages = []
for i, path in enumerate(jsonlist) :
raw_data = json.load(path.open())
text_pages.append(raw_data['textAnnotations'])
print("json data loaded.")
with tempfile.TemporaryDirectory() as path:
images_from_path = convert_from_path(pdffile, output_folder=path)
print(len(images_from_path))
newPdfPage = canvas.Canvas(output)
newPdfPage.setPageSize(A4)
newPdfPage.saveState() # 必要?
newPdfPage.setAuthor('Provide by Information disclosure')
newPdfPage.setTitle('陸上自衛隊イラク日報PDF')
newPdfPage.setSubject('')
pdfmetrics.registerFont(UnicodeCIDFont('HeiseiKakuGo-W5'))
# pdfmetrics.registerFont(UnicodeCIDFont('HeiseiMin-W3'))
# pdfmetrics.registerFont(UnicodeCIDFont('KozMinPro-Regular'))
for i, image in enumerate(images_from_path):
print(f"page: {i}")
print(image.size)
image_width , image_height = image.size
ratio = image_width / image_height
landscape_mode = False
page_size = {}
if ratio > 1.0 :
landscape_mode = True
newPdfPage.setPageSize(landscape(A4))
page_size['width'], page_size['height'] = landscape(A4)
else:
newPdfPage.setPageSize(A4)
page_size['width'], page_size['height'] = A4
with tempfile.NamedTemporaryFile(mode='w+b',suffix='jpg') as fp:
image.save(fp.name,'JPEG')
newPdfPage.drawImage(fp.name,0,0,width=page_size['width'], height=page_size['height'], preserveAspectRatio=True)
newPdfPage.setFont('HeiseiKakuGo-W5', 10)
newPdfPage.setFillColor(red, alpha=0.7)
print(len(text_pages))
for j, textblob in enumerate(text_pages[i]) :
# 最初のデータはページ全体なのでスキップ
if j == 0 :
continue
#for node in textblob:
node = textblob
#print(node)
#print(node['boundingPoly'])
#print(node['boundingPoly']['vertices'])
anchor_y = image_height - int(node['boundingPoly']['vertices'][3]['y'])
anchor_x = int(node['boundingPoly']['vertices'][3]['x'])
scale = 1.0
if landscape_mode :
scale = min(page_size['height'] / image_height, page_size['width'] / image_width)
else:
scale = min(page_size['height'] / image_height, page_size['width'] / image_width)
text_height = int(node['boundingPoly']['vertices'][3]['y']) - int(node['boundingPoly']['vertices'][0]['y'])
fontsize = math.ceil(text_height / (200 / 72 ) )
newPdfPage.setFont('HeiseiKakuGo-W5', fontsize)
newPdfPage.drawString(scale * anchor_x, scale * anchor_y, node['description'])
# newPdfPage.drawString(250, 650, 'これはテストです') # Y座標は左下原点
newPdfPage.showPage()
newPdfPage.save()
if __name__ == "__main__" :
# ToDo ファイル名の指定 or 一括処理
file = "pdf/イラク後送業務隊/060720.pdf"
json_file_st = "./json/イラク後送業務隊/060720_*.json"
p = Path('.')
unsort = list(p.glob(json_file_st))
json_path_list = natsort.natsorted(unsort)
create_pdf(file, output="test_out.pdf",jsonlist=json_path_list )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment