Skip to content

Instantly share code, notes, and snippets.

@nullableVoidPtr
Last active December 10, 2023 00:55
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nullableVoidPtr/b5193cca8a1b062a42ca229525a05d07 to your computer and use it in GitHub Desktop.
Save nullableVoidPtr/b5193cca8a1b062a42ca229525a05d07 to your computer and use it in GitHub Desktop.
Generate transactions CSV from Commonwealth Bank of Australia PDF statements
#!/usr/bin/env python
# SPDX-License-Identifier: MIT
# Copyright nullableVoidPtr
import sys
import PyPDF2
import re
from decimal import Decimal
from datetime import datetime
from typing import Iterable, Tuple, Optional
def extract(filename: str) -> Iterable[Tuple[str, str, str, str]]:
print(f"Processing {filename}... ", end="")
assert (p := PyPDF2.PdfFileReader(filename)).decrypt("") == 1
rows = 0
year: Optional[int] = None
prev_date: Optional[datetime] = None
prev_balance: Optional[Decimal] = None
for table in (
split[-1]
for page in p.pages
if len(
split := page.extractText().split(
"Date\nTransaction\nDebit\nCredit\nBalance\n"
)
)
> 1
):
for fields in (
fields for row in table.split("\n \n") if len(fields := row.split("\n")) > 1
):
if m := re.match(r"^(\d+) OPENING BALANCE$", fields[1]):
year = int(m.group(1))
prev_date = datetime.strptime(f"{fields[0]} {year}", "%d %b %Y")
fields = fields[4:]
if "BALANCE CARRIED FORWARD" in fields[3]:
break
elif "CREDIT INTEREST EARNED" in fields[1]:
continue
elif "BONUS INTEREST EARNED" in fields[1]:
continue
elif "CLOSING BALANCE" in fields[0]:
break
assert year is not None
date = datetime.strptime(f"{fields[0]} {year}", "%d %b %Y")
if prev_date is not None:
if prev_date > date:
year += 1
date = datetime.strptime(f"{fields[0]} {year}", "%d %b %Y")
if (balance := fields[-1]) == "$0.00":
a, b = fields[-3:-1]
transaction = fields[1:-3]
balance = balance[1:]
else:
a, b = fields[-4:-2]
transaction = fields[1:-4]
balance = ("+" if balance == "CR" else "-") + fields[-2][1:].replace(
",", ""
)
amount = (f"+{b}" if a == "$" else f"-{a}").replace(",", "")
transaction = " ".join(transaction)
if prev_balance is not None:
assert Decimal(balance) == Decimal(prev_balance) + Decimal(amount)
yield (date.strftime("%d/%m/%Y"), amount, transaction, balance)
prev_date = date
prev_balance = Decimal(balance)
rows += 1
print(rows, "transactions recorded.")
def write(filename, transactions):
with open(filename, "w+", newline="") as file:
for date, amount, transaction, balance in transactions:
file.write(f'{date},"{amount}","{transaction}","{balance}"\r\n')
if __name__ == "__main__":
if len(sys.argv) < 3:
print(f"Usage: {sys.argv[0]} filename... output")
else:
closing_balance: Optional[Decimal] = None
transactions = []
for filename in sys.argv[1:-1]:
checked = False
for date, amount, transaction, balance in extract(filename):
if not checked and closing_balance is not None:
assert closing_balance + Decimal(amount) == Decimal(balance)
checked = True
transactions.insert(0, (date, amount, transaction, balance))
closing_balance = Decimal(balance)
write(sys.argv[-1], transactions)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment