Skip to content

Instantly share code, notes, and snippets.

@oseiskar
Created September 2, 2022 14:11
Show Gist options
  • Save oseiskar/b353dd94bd84f06e9eb8858927f057b7 to your computer and use it in GitHub Desktop.
Save oseiskar/b353dd94bd84f06e9eb8858927f057b7 to your computer and use it in GitHub Desktop.
Convert Nordea corporate electronic account statement NDA format to CSV
"""
Parses the electronic account statements in the data format used by the
Finnish bank Nordea (.nda / "TITO") and converts it to CSV.
Usage:
python parse_nordea_nda.py < Konekielinen-tiliote\(YY-MM-DD\).nda > result.csv
OR parse multiple:
cat Konekielinen-tiliote\(*\).nda | python parse_nordea_nda.py > result.csv
All field are not included in the output. Modify this script to add more fields
if needed.
"""
# Reference for the data format
# https://www.finanssiala.fi/wp-content/uploads/2021/03/Konekielinen_tiliote_palvelukuvaus.pdf
# Did not immediately find out what (uncommon) 8-bit encoding format is
# used for the text fields. This decoding table is incomplete.
DECODING_TABLE = [
'{ä',
'|ö',
'[Ä',
'\\Ö',
# TODO: letters å and Å should be added
]
import io
from collections import OrderedDict
def parse_message(f):
letter = f.read(1)
if len(letter) < 1: raise EOFError()
msg_type_id = f.read(2)
msg_length = int(f.read(3)) - 4 # not sure where 4 comes from
payload = f.read(msg_length)
return letter, msg_type_id, payload
def parse_and_concat(f):
cur_msg = None
cur_extras = []
while True:
try:
_, msg_type_id, payload = parse_message(f)
if msg_type_id == '10':
if cur_msg is not None:
yield(cur_msg, cur_extras)
cur_msg = payload
cur_extras = []
elif msg_type_id == '11':
assert(cur_msg is not None)
cur_extras.append(payload)
except EOFError:
break
if cur_msg is not None:
yield(cur_msg, cur_extras)
def parse_string(s):
s = s.rstrip()
for entry in DECODING_TABLE:
s = s.replace(entry[0], entry[1])
return s
def parse_11_msg(s):
head = s[:2]
return parse_string(s[2:])
def parse_date(d):
return '20' + d[:2] + '-' + d[2:4] + '-' + d[4:6]
def parse_amount(sign, amount):
if sign == '+': sign = ''
return sign + str(int(amount[:-2])) + '.' + amount[-2:]
def parse_10_msg(payload):
f = io.StringIO(payload)
d = OrderedDict({})
d['index'] = f.read(6)
d['id'] = f.read(18)
d['event_date'] = f.read(6)
d['value_date'] = f.read(6)
d['paid_date'] = f.read(6)
d['type'] = f.read(1)
d['code'] = f.read(3)
d['code_description'] = f.read(35)
d['sign'] = f.read(1)
d['amount'] = f.read(18)
d['receipt_code'] = f.read(1)
d['transfer_type'] = f.read(1)
d['name'] = f.read(35)
d['name_source'] = f.read(1)
d['account_number'] = f.read(14)
d['account_changed'] = f.read(1)
d['reference'] = f.read(20)
d['form_number'] = f.read(8)
d['level_id'] = f.read(1)
return {
'raw': d,
'id': d['id'],
'date': parse_date(d['event_date']),
'name': parse_string(d['name']),
'amount': parse_amount(d['sign'], d['amount'])
}
def parse(f):
for msg, extras in parse_and_concat(f):
parsed = parse_10_msg(msg)
parsed['extras'] = [parse_11_msg(e) for e in extras]
yield(parsed)
if __name__ == '__main__':
import sys, csv
w = csv.writer(sys.stdout)
w.writerow(['id','date','amount','name','message'])
for m in parse(sys.stdin):
w.writerow([m['id'], m['date'], m['amount'], m['name'], '|'.join(m['extras'])])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment