Skip to content

Instantly share code, notes, and snippets.

@sachinsmc
Created February 11, 2024 15:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sachinsmc/1ac6290778828612e4f112ddbff1a6f8 to your computer and use it in GitHub Desktop.
Save sachinsmc/1ac6290778828612e4f112ddbff1a6f8 to your computer and use it in GitHub Desktop.
parse endb statement
from lxml import etree, objectify
import sys
import argparse
import re
import os.path
import datetime
def f_colorise(string, color):
colors = {
'red': '\033[91m',
'green': '\033[92m',
'blue': '\033[94m',
'cyan': '\033[96m',
'white': '\033[97m',
'yellow': '\033[93m',
'magenta': '\033[95m',
'grey': '\033[90m',
'black': '\033[90m'
}
if color not in colors:
return string
return colors[color] + string + '\033[0m'
def f_remove_namespace(source_xml):
metadata = source_xml
parser = etree.XMLParser(remove_blank_text=True)
tree = etree.parse(metadata, parser)
root = tree.getroot()
for elem in root.getiterator():
if not hasattr(elem.tag, 'find'):
continue
i = elem.tag.find('}')
if i >= 0:
elem.tag = elem.tag[i+1:]
objectify.deannotate(root, cleanup_namespaces=True)
return tree
def f_xml_parse(source_xml, statement_type):
tree = source_xml
root = tree.getroot()
transaction_list = []
for row in root.iter('Row'):
try:
if len(row.getchildren()) == 5:
'''
Debit:
[0] Date
[1] Description
[2] Debit
[3] Credit
[4] Account Balance
Credit:
[0] Transaction date
[1] Posting date
[2] Description
[3] Card Type
[4] Amount
transaction_list format:
[0] Date
[1] Description
[2] Amount
'''
transaction = []
for cell in row.iter('Cell'):
for data in cell.iter('Data'):
transaction.append(data.text)
try:
if re.search(r'^[0-9]{2}', transaction[0]) is not None:
desc = None
amount = None
date = None
memo = ""
# print(transaction)
# Set correct properties
if statement_type == 'credit':
amount = str(transaction[4]).replace(',', '')
desc = str(transaction[2]).replace(',', '')
date = str(transaction[0]).replace(' ', '/')
else:
# Remove unneeded commas which cause csv problems
if transaction[2] is not None:
amount = str(transaction[2]).replace(',', '')
else:
amount = str(transaction[3]).replace(',', '')
# Date formatting
date = re.search(r'[0-9]{2}\-[0-6]{2}\-[0-9]{4}', transaction[1])
if date is None:
date = datetime.datetime.strptime(str(transaction[0]).replace(' ', '/'), '%d/%b/%Y').strftime('%d/%b/%Y')
else:
date = datetime.datetime.strptime(str(date.group()).replace('-', '/'), '%d/%m/%Y').strftime('%d/%b/%Y')
# Cleanup awful description
desc = str(transaction[1]).replace(',', '')
desc = re.sub(r'POS-PURCHASE CARD NO\.[0-9]+\*\*\*\*\*\*[0-9]+\s*', '', desc)
desc = re.sub(r'^\d{6}\s*', '', desc)
desc = re.sub(r'[0-9]{2}\-[0-9]{2}\-[0-9]{4}\s*', '', desc)
desc = re.sub(r'[0-9]*\.[0-9]*AED\s*', '', desc)
desc = re.sub(r'\s+DUBAI:AE', '', desc)
transaction_list.append({
"date": date,
"description": desc,
"amount": amount,
"memo": memo
})
else:
continue
except IndexError:
continue
except KeyError:
pass
# List may be out of order due to pulling the transaction date from description in debit.
transaction_list = sorted(
transaction_list,
key=lambda x: datetime.datetime.strptime(x['date'], '%d/%b/%Y'), reverse=True
)
return transaction_list
def f_write_csv(transactions, csv_file):
f = open(csv_file, "w")
f.write("Date,Payee,Memo,Amount\n")
for t in transactions:
f.write('{date}, {payee}, {memo}, {amount}\n'.format(
date=t['date'],
payee=t['description'],
memo=t['memo'],
amount=t['amount']
))
f.close()
print(len(transactions), " transactions imported")
def f_display_menu(menu_prompt, menu_list):
"""
Pass a title for the menu, the list of options to display and an additional final option (All, Quit etc.)
Format for the list is:
List: Menu_List
- Dict: item1: property1, item1: property2, item1: property3
- Dict: item2: property1, item2: property2, item2: property3
"""
loop = True
while loop is True:
count = 0
for item in menu_list:
count += 1
print(
# Menu layout
"{list_number:>4} {date} {desc} {amount}".format(
# Set variables for the menu
list_number="[" + str(menu_list.index(item) + 1) + "]",
date=f_colorise(str(item["date"]), "green"),
desc=f_colorise(str(item["description"]), "blue"),
amount=f_colorise(str(item["amount"]), "green") if re.search(r'^-', item["amount"]) is None else f_colorise(str(item["amount"]), "red")
)
)
print("{list_number:>4} All".format(list_number="[" + str(count+1) + "]"))
print("{list_number:>4} Quit".format(list_number="[" + str(count+2) + "]"))
choice = input(menu_prompt)
try:
if int(choice) <= int(count): # if user picks a number then return that entry
return(menu_list[int(choice)])
elif int(choice) == int(count+1): # if user picks all then return full array
return(menu_list)
elif int(choice) == int(count+2): # if user picks quit then quit
print("Exiting...")
loop = False
else:
input("\n" + f_colorise(str(choice), "red") + " is an unknown option. Press enter to retry: ")
except Exception as e:
print("Exception: ", e)
input("\n" + f_colorise(str(choice), "red") + " is invalid, the choice needs to be numeric. Press enter to retry: ")
exit(0)
def main():
statement_type = None
csv_file = None
parser = argparse.ArgumentParser()
parser.add_argument("-i",
required=True,
help="Specify source file",
action="store",
dest="xml_file",
default=None
)
parser.add_argument("-o",
required=False,
help="Specify output csv file",
action="store",
dest="csv_file",
default=None
)
ap_parsed = parser.parse_args()
with open(ap_parsed.xml_file, "r") as f:
for line in f:
if re.search("CURRENT ACCOUNT", line) is not None:
statement_type = "debit"
elif re.search("Credit Card Statement", line) is not None:
statement_type = "credit"
if statement_type is None:
print("Unknown statement type")
sys.exit(0)
if ap_parsed.csv_file is None:
csv_file = 'ynab_import.csv'
else:
csv_file = ap_parsed.csv_file
if os.path.isfile(csv_file):
while True:
reply = str(input('CSV file exists. Overwrite? (y/n): ')).lower().strip()
if reply[0] == 'y':
break
if reply[0] == 'n':
sys.exit(0)
cleaned_xml = f_remove_namespace(ap_parsed.xml_file)
processed_transactions = f_xml_parse(cleaned_xml, statement_type)
recent_transation = f_display_menu("Select oldest uncleared transaction: ", processed_transactions)
delete_trans = False
filtered_transactions = []
for t in processed_transactions:
if t == recent_transation or delete_trans is True:
delete_trans = True
else:
filtered_transactions.append(t)
f_write_csv(filtered_transactions, csv_file)
return
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment