Created
April 27, 2018 12:34
-
-
Save atuyosi/796ef46e46a85f3d837f891e551565a2 to your computer and use it in GitHub Desktop.
暫定版陸自イラク日報テキスト付PDF生成スクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from reportlab.pdfgen import canvas | |
from reportlab.pdfbase import pdfmetrics | |
# from reportlab.rl_config import defaultPageSize | |
from reportlab.lib.pagesizes import A4, landscape | |
from reportlab.pdfbase.cidfonts import UnicodeCIDFont | |
from reportlab.lib.colors import red,black | |
from reportlab.lib.units import cm | |
from pdf2image import convert_from_path, convert_from_bytes | |
import json | |
from pathlib import Path | |
import tempfile | |
import natsort | |
import math | |
# [PDF生成 (ReportLab) | Python-izm](https://www.python-izm.com/third_party/pdf/repotlab/) | |
# [PythonでKindle用自炊PDFを作ろうと実験 - Qiita](https://qiita.com/Gen6/items/5ace54481d66b653fa15) | |
def create_pdf(pdffile, output, jsonlist ): | |
images_from_path = [] | |
print("processing pdf: {0}".format(pdffile)) | |
text_pages = [] | |
for i, path in enumerate(jsonlist) : | |
raw_data = json.load(path.open()) | |
text_pages.append(raw_data['textAnnotations']) | |
print("json data loaded.") | |
with tempfile.TemporaryDirectory() as path: | |
images_from_path = convert_from_path(pdffile, output_folder=path) | |
print(len(images_from_path)) | |
newPdfPage = canvas.Canvas(output) | |
newPdfPage.setPageSize(A4) | |
newPdfPage.saveState() # 必要? | |
newPdfPage.setAuthor('Provide by Information disclosure') | |
newPdfPage.setTitle('陸上自衛隊イラク日報PDF') | |
newPdfPage.setSubject('') | |
pdfmetrics.registerFont(UnicodeCIDFont('HeiseiKakuGo-W5')) | |
# pdfmetrics.registerFont(UnicodeCIDFont('HeiseiMin-W3')) | |
# pdfmetrics.registerFont(UnicodeCIDFont('KozMinPro-Regular')) | |
for i, image in enumerate(images_from_path): | |
print(f"page: {i}") | |
print(image.size) | |
image_width , image_height = image.size | |
ratio = image_width / image_height | |
landscape_mode = False | |
page_size = {} | |
if ratio > 1.0 : | |
landscape_mode = True | |
newPdfPage.setPageSize(landscape(A4)) | |
page_size['width'], page_size['height'] = landscape(A4) | |
else: | |
newPdfPage.setPageSize(A4) | |
page_size['width'], page_size['height'] = A4 | |
with tempfile.NamedTemporaryFile(mode='w+b',suffix='jpg') as fp: | |
image.save(fp.name,'JPEG') | |
newPdfPage.drawImage(fp.name,0,0,width=page_size['width'], height=page_size['height'], preserveAspectRatio=True) | |
newPdfPage.setFont('HeiseiKakuGo-W5', 10) | |
newPdfPage.setFillColor(red, alpha=0.7) | |
print(len(text_pages)) | |
for j, textblob in enumerate(text_pages[i]) : | |
# 最初のデータはページ全体なのでスキップ | |
if j == 0 : | |
continue | |
#for node in textblob: | |
node = textblob | |
#print(node) | |
#print(node['boundingPoly']) | |
#print(node['boundingPoly']['vertices']) | |
anchor_y = image_height - int(node['boundingPoly']['vertices'][3]['y']) | |
anchor_x = int(node['boundingPoly']['vertices'][3]['x']) | |
scale = 1.0 | |
if landscape_mode : | |
scale = min(page_size['height'] / image_height, page_size['width'] / image_width) | |
else: | |
scale = min(page_size['height'] / image_height, page_size['width'] / image_width) | |
text_height = int(node['boundingPoly']['vertices'][3]['y']) - int(node['boundingPoly']['vertices'][0]['y']) | |
fontsize = math.ceil(text_height / (200 / 72 ) ) | |
newPdfPage.setFont('HeiseiKakuGo-W5', fontsize) | |
newPdfPage.drawString(scale * anchor_x, scale * anchor_y, node['description']) | |
# newPdfPage.drawString(250, 650, 'これはテストです') # Y座標は左下原点 | |
newPdfPage.showPage() | |
newPdfPage.save() | |
if __name__ == "__main__" : | |
# ToDo ファイル名の指定 or 一括処理 | |
file = "pdf/イラク後送業務隊/060720.pdf" | |
json_file_st = "./json/イラク後送業務隊/060720_*.json" | |
p = Path('.') | |
unsort = list(p.glob(json_file_st)) | |
json_path_list = natsort.natsorted(unsort) | |
create_pdf(file, output="test_out.pdf",jsonlist=json_path_list ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment