Skip to content

Instantly share code, notes, and snippets.

@momijiame
Created October 31, 2021 09:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save momijiame/2d3e0ad7cf92864fdda1ff8719af9372 to your computer and use it in GitHub Desktop.
Save momijiame/2d3e0ad7cf92864fdda1ff8719af9372 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""PDF にした Amazon の領収書から「注文日」「注文番号」「請求総額」を取り出すやつ
# 下準備
$ pip install pymupdf click
# 使い方
$ python amznreceipt.py -f <pdf-filepath>
"""
from __future__ import annotations
import fitz
import click
def _is_digital_order(page_text: str) -> bool:
return 'デジタル注⽂概要' in page_text
def _seek_normal_order(page_text: str) -> tuple[str, str, int]:
"""通常の注文から情報を取り出す"""
lines = page_text.split('\n')
order_date = order_id = billing_amount = None
for line in lines:
if '注文日: ' in line:
order_date = line[line.rfind('注文日: ') + 5:]
if '注⽂番号 ' in line:
order_id = line[line.rfind('注⽂番号 ') + 5:]
if 'ご請求額:¥' in line:
billing_amount = int(line[line.rfind('ご請求額:¥') + 6:].replace(',', ''))
return order_date, order_id, billing_amount
def _seek_digital_order(page_text: str) -> tuple[str, str, int]:
"""デジタル注文から情報を取り出す"""
lines = page_text.split('\n')
order_date = order_id = billing_amount = None
for idx, line in enumerate(lines):
if '注⽂⽇: ' in line:
order_date = line[line.rfind('注⽂⽇: ') + 5:]
if '注⽂番号: ' in line:
order_id = line[line.rfind('注⽂番号: ') + 6:]
if '総計:' in line:
# 次の行に総計があることを仮定する
next_line = lines[idx + 1]
billing_amount = int(next_line.replace('¥', '').replace(',', ''))
return order_date, order_id, billing_amount
@click.command()
@click.option('--filepath', '-f',
type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True),
required=True,
help='Amazon receipt PDF file')
def main(filepath: str):
# PDF ファイルを読み込む
with fitz.open(filepath) as pdf_in:
# 含まれるページを読み込む
for pdf_page in pdf_in:
# ページに含まれる文字列を取得する
page_text = pdf_page.get_text()
# デジタル注文と通常の注文でフォーマットが異なるため判定する
is_digital_order = _is_digital_order(page_text)
# 注文日、注文番号、請求額を取り出す
infos = _seek_digital_order(page_text) if is_digital_order else _seek_normal_order(page_text)
# カンマ区切りで出力する
print(','.join(str(info) for info in infos))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment