Skip to content

Instantly share code, notes, and snippets.

@lingxiaoyang
Created August 2, 2019 19:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lingxiaoyang/1da78e72b7403348905d14678a041886 to your computer and use it in GitHub Desktop.
Save lingxiaoyang/1da78e72b7403348905d14678a041886 to your computer and use it in GitHub Desktop.
Read scotiabank (Canada) statement into csv.
"""
Read scotiabank (Canada) statement into csv.
1. Concatenate all your statement files into a single PDF.
2. Use Foxit online PDF tool (14-day free trial) to convert the PDF file into XLSX file.
3. Rename the XLSX file as scotia.xlsx, run this script, and output will be written to scotia.csv
"""
import csv
from openpyxl import load_workbook
from decimal import Decimal
import datetime
import re
def parse_date(s):
d = datetime.datetime.strptime(s, '%b %d')
return d.strftime('%m-%d')
if __name__ == "__main__":
out_file = 'scotia.csv'
wb = load_workbook('scotia.xlsx')
ws = wb.active
rows = ws.rows
current_year = None
state = 0
entries = [] # date_str, desc1, desc2, amount, balance
for row in rows:
match = re.search(r'Opening Balance on [a-zA-Z]+ \d+, (\d{4})', str(row[0].value))
if match:
current_year = int(match.group(1))
state = 0
elif state == 0:
if str(row[0].value) == 'Date' and str(row[1].value) == 'Transactions':
state = 1
if str(row[0].value) == 'Date' and 'Transactions' in str(row[1].value) and 'Amounts' in str(row[1].value) and 'withdrawn' in str(row[1].value):
state = 2
else:
if not str(row[0].value).strip() and row[1].value:
assert not entries[-1][2]
entries[-1][2] = str(row[1].value).strip()
else:
try:
d = parse_date(str(row[0].value))
except Exception:
state = 0
else:
date_str = '{}-{}'.format(current_year, d)
if state == 1:
desc = str(row[1].value).strip()
if '\n' in desc:
desc1, desc2 = desc.split('\n')
assert '\n' not in desc2
else:
desc1 = desc
desc2 = ''
desc1 = desc1.strip()
desc2 = desc2.strip()
if desc1 == 'Opening Balance' or desc1 == 'Closing Balance':
continue
amount = None
if row[2].value:
amount = -Decimal(str(row[2].value).replace(',', ''))
else:
amount = Decimal(str(row[3].value).replace(',', ''))
assert amount is not None
balance = Decimal(str(row[4].value).replace(',', ''))
else:
desc = str(row[1].value).strip()
if '\n' in desc:
desc1, desc2 = desc.split('\n')
assert '\n' not in desc2
else:
desc1 = desc
desc2 = ''
desc1 = desc1.strip()
desc2 = desc2.strip()
if desc1 == 'Opening Balance' or desc1 == 'Closing Balance':
continue
match = re.search('([0-9,]+\.\d{2})(.*)$', desc1)
if match:
amount_withdrawn_str = match.group(1)
if match.group(2):
assert not desc2
desc2 = match.group(2)
desc1 = desc1[:-len(desc2)]
desc2 = desc2.strip()
desc1 = desc1[:-len(amount_withdrawn_str)].strip()
amount = -Decimal(amount_withdrawn_str.replace(',', ''))
else:
amount = Decimal(str(row[3].value).replace(',', ''))
balance = Decimal(str(row[4].value).replace(',', ''))
new_entry = [date_str, desc1, desc2, amount, balance]
entries.append(new_entry)
entries.sort(key=lambda l: l[0])
with open(out_file, 'wb') as csvfile:
writer = csv.writer(csvfile)
for e in entries:
writer.writerow(e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment