Created
August 2, 2019 19:19
-
-
Save lingxiaoyang/1da78e72b7403348905d14678a041886 to your computer and use it in GitHub Desktop.
Read scotiabank (Canada) statement into csv.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Read scotiabank (Canada) statement into csv. | |
1. Concatenate all your statement files into a single PDF. | |
2. Use Foxit online PDF tool (14-day free trial) to convert the PDF file into XLSX file. | |
3. Rename the XLSX file as scotia.xlsx, run this script, and output will be written to scotia.csv | |
""" | |
import csv | |
from openpyxl import load_workbook | |
from decimal import Decimal | |
import datetime | |
import re | |
def parse_date(s): | |
d = datetime.datetime.strptime(s, '%b %d') | |
return d.strftime('%m-%d') | |
if __name__ == "__main__": | |
out_file = 'scotia.csv' | |
wb = load_workbook('scotia.xlsx') | |
ws = wb.active | |
rows = ws.rows | |
current_year = None | |
state = 0 | |
entries = [] # date_str, desc1, desc2, amount, balance | |
for row in rows: | |
match = re.search(r'Opening Balance on [a-zA-Z]+ \d+, (\d{4})', str(row[0].value)) | |
if match: | |
current_year = int(match.group(1)) | |
state = 0 | |
elif state == 0: | |
if str(row[0].value) == 'Date' and str(row[1].value) == 'Transactions': | |
state = 1 | |
if str(row[0].value) == 'Date' and 'Transactions' in str(row[1].value) and 'Amounts' in str(row[1].value) and 'withdrawn' in str(row[1].value): | |
state = 2 | |
else: | |
if not str(row[0].value).strip() and row[1].value: | |
assert not entries[-1][2] | |
entries[-1][2] = str(row[1].value).strip() | |
else: | |
try: | |
d = parse_date(str(row[0].value)) | |
except Exception: | |
state = 0 | |
else: | |
date_str = '{}-{}'.format(current_year, d) | |
if state == 1: | |
desc = str(row[1].value).strip() | |
if '\n' in desc: | |
desc1, desc2 = desc.split('\n') | |
assert '\n' not in desc2 | |
else: | |
desc1 = desc | |
desc2 = '' | |
desc1 = desc1.strip() | |
desc2 = desc2.strip() | |
if desc1 == 'Opening Balance' or desc1 == 'Closing Balance': | |
continue | |
amount = None | |
if row[2].value: | |
amount = -Decimal(str(row[2].value).replace(',', '')) | |
else: | |
amount = Decimal(str(row[3].value).replace(',', '')) | |
assert amount is not None | |
balance = Decimal(str(row[4].value).replace(',', '')) | |
else: | |
desc = str(row[1].value).strip() | |
if '\n' in desc: | |
desc1, desc2 = desc.split('\n') | |
assert '\n' not in desc2 | |
else: | |
desc1 = desc | |
desc2 = '' | |
desc1 = desc1.strip() | |
desc2 = desc2.strip() | |
if desc1 == 'Opening Balance' or desc1 == 'Closing Balance': | |
continue | |
match = re.search('([0-9,]+\.\d{2})(.*)$', desc1) | |
if match: | |
amount_withdrawn_str = match.group(1) | |
if match.group(2): | |
assert not desc2 | |
desc2 = match.group(2) | |
desc1 = desc1[:-len(desc2)] | |
desc2 = desc2.strip() | |
desc1 = desc1[:-len(amount_withdrawn_str)].strip() | |
amount = -Decimal(amount_withdrawn_str.replace(',', '')) | |
else: | |
amount = Decimal(str(row[3].value).replace(',', '')) | |
balance = Decimal(str(row[4].value).replace(',', '')) | |
new_entry = [date_str, desc1, desc2, amount, balance] | |
entries.append(new_entry) | |
entries.sort(key=lambda l: l[0]) | |
with open(out_file, 'wb') as csvfile: | |
writer = csv.writer(csvfile) | |
for e in entries: | |
writer.writerow(e) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment