Skip to content

Instantly share code, notes, and snippets.

@EpicWink
Last active June 7, 2023 09:54
Show Gist options
  • Save EpicWink/fee98be0baf6bf0afc1ebec2f7063567 to your computer and use it in GitHub Desktop.
Save EpicWink/fee98be0baf6bf0afc1ebec2f7063567 to your computer and use it in GitHub Desktop.
Parse Suncorp bank statement and output transactions as CSV
"""Parse Suncorp bank statement text and output transactions as CSV.
Download e-statements, copy transactions into text file, pass this
file as stdin, send stdout to new CSV file.
```shell
cat statement.txt | python parse-suncorp-statement.py > transactions.csv
```
Set `ADDITIONAL_IGNORED_LINES` to exceptional extra lines to be
ignored.
Set `ADDITIONAL_INTERNET_TRANSFER_ACCOUNTS` to known extra external
internet transfer source names/
"""
import io
import re
import csv
import sys
import typing as t
import datetime
import dataclasses
CURRENCIES_REGEX = r"((aud)|(usd))"
ADDITIONAL_IGNORED_LINES = []
ADDITIONAL_INTERNET_TRANSFER_ACCOUNTS = []
@dataclasses.dataclass(slots=True)
class Transaction:
"""Transaction details."""
occurred: datetime.date
"""Transaction date."""
amount: int
"""Transaction amount (cents), negative is withdrawal."""
other_account_name: str
"""Name of other account in transaction."""
reference: str
"""Transaction reference."""
def _log(message: str) -> None:
print(message, file=sys.stderr)
def _parse_amount(amount_text: str, allow_negative: bool = False) -> int:
if allow_negative and amount_text[-1] == "-":
return -_parse_amount(amount_text[:-1])
return round(float(amount_text.replace(",", "")) * 100)
def parse_transactions_from_bank_statement(
lines: t.Iterable[str],
) -> t.Generator[Transaction, None, None]:
transaction_pattern = re.compile(
r"^(?P<day>\d+) (?P<month>[a-zA-Z]{3}) (?P<year>\d{4}) "
r"(?P<account>.+) "
r"(?P<amount>-?(\d+,)*\d+\.\d+) "
r"(?P<balance>-?(\d+,)*\d+\.\d+-?)"
r"( .+)?$"
)
internet_transfer_pattern = re.compile(
r"^((from)|(to)) (?P<account>.+)(?: ref(erence)? no (?P<ref>\S+))?$"
)
page_pattern = re.compile(r"page \d+ of \d+")
footer_pattern = re.compile(r"statement no: .+")
foreign_currency_pattern = re.compile(r"(\d+,)*\d+.\d+ " + CURRENCIES_REGEX)
effective_date_pattern = re.compile(r"effective date \d\d/\d\d/\d{4}")
months = {
"Jan": 1,
"Feb": 2,
"Mar": 3,
"Apr": 4,
"May": 5,
"Jun": 6,
"Jul": 7,
"Aug": 8,
"Sep": 9,
"Oct": 10,
"Nov": 11,
"Dec": 12,
}
ignored_lines = (
"account transactions",
"account transactions continued",
"date transaction details withdrawal deposit balance",
"details are continued on the back of this page",
"aud",
"13 11 55",
"suncorp.com.au",
*ADDITIONAL_IGNORED_LINES,
)
internet_transfer_accounts = (
"internet transfer credit",
"internet transfer debit",
"internet external transfer",
"osko payment",
"rev osko payment",
*ADDITIONAL_INTERNET_TRANSFER_ACCOUNTS,
)
prior_balance = None
transaction = None
for i, line in enumerate(lines):
line = line.strip()
if not line:
continue
line_lowercase = line.lower()
if (
line_lowercase in
or page_pattern.fullmatch(line_lowercase)
or footer_pattern.fullmatch(line_lowercase)
or effective_date_pattern.fullmatch(line_lowercase)
or foreign_currency_pattern.fullmatch(line_lowercase)
):
continue
elif line_lowercase[:15] == "opening balance":
current_balance = _parse_amount(line_lowercase[16:], allow_negative=True)
if prior_balance is None:
prior_balance = current_balance
if prior_balance != current_balance:
raise ValueError(
f"Opening balance (line {i}) doesn't match closing balance: "
f"${prior_balance / 100.0} != ${current_balance / 100.0}"
)
elif line_lowercase[:23] == "balance carried forward":
current_balance = _parse_amount(line_lowercase[24:], allow_negative=True)
if prior_balance != current_balance:
raise ValueError(
f"Carried-forward balance (line {i}) doesn't match prior balance: "
f"${prior_balance / 100.0} != ${current_balance / 100.0}"
)
elif line_lowercase[:23] == "balance brought forward":
current_balance = _parse_amount(line_lowercase[24:], allow_negative=True)
if prior_balance != current_balance:
raise ValueError(
f"Brought-forward balance (line {i}) doesn't match prior balance: "
f"${prior_balance / 100.0} != ${current_balance / 100.0}"
)
elif line_lowercase[:15] == "closing balance":
current_balance = _parse_amount(line_lowercase[16:], allow_negative=True)
if prior_balance != current_balance:
raise ValueError(
f"Closing balance (line {i}) doesn't match prior balance: "
f"${prior_balance / 100.0} != ${current_balance / 100.0}"
)
elif transaction_match := transaction_pattern.fullmatch(line):
if transaction:
yield transaction
day = int(transaction_match["day"])
month = months[transaction_match["month"]]
year = int(transaction_match["year"])
occurred = datetime.date(year=year, month=month, day=day)
current_balance = _parse_amount(
transaction_match['balance'], allow_negative=True
)
amount = _parse_amount(transaction_match["amount"])
if current_balance < prior_balance:
amount *= -1
transaction = Transaction(
occurred,
amount,
other_account_name=transaction_match["account"],
reference="",
)
prior_balance = current_balance
elif transaction:
if internet_transfer_match := internet_transfer_pattern.fullmatch(
line_lowercase,
):
if (
transaction.other_account_name.lower()
not in internet_transfer_accounts
):
_log(f"Other account: {transaction.other_account_name}")
raise ValueError(
f"Unexpected internet transfer reference (line {i}): "
f"{line}"
)
if internet_transfer_match["ref"]:
if transaction.reference:
transaction.reference += "; "
transaction.reference += f"REF NO {internet_transfer_match['ref']}"
transaction.other_account_name = internet_transfer_match['account']
else:
if transaction.reference:
transaction.reference += "; "
transaction.reference += line
else:
_log(f"Current transaction: {transaction}")
raise ValueError(f"Unexpected line ({i}): {line}")
if transaction:
yield transaction
def emit_transactions_csv(
transactions: t.Iterable[Transaction],
stream: t.BinaryIO,
) -> None:
stream_text = io.TextIOWrapper(
stream,
newline="",
encoding="utf-8",
write_through=True,
)
writer = csv.writer(stream_text)
writer.writerow(('date', 'amount ($)', 'other account', 'reference'))
for transaction in transactions:
occurred_text = transaction.occurred.isoformat()
amount_text = str(round(transaction.amount / 100.0, 2))
writer.writerow((
occurred_text,
amount_text,
transaction.other_account_name,
transaction.reference,
)) # fmt: skip
def main() -> None:
transactions = parse_transactions_from_bank_statement(sys.stdin)
emit_transactions_csv(transactions, sys.stdout.buffer)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment