Created
September 8, 2022 12:08
-
-
Save thomwiggers/dcbde2c85ead1caf8ff9a46aa4c17c2d to your computer and use it in GitHub Desktop.
Convert ABN AMRO "XLS" format to extract some useful fields
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import csv | |
fieldnames = [ | |
"Rekeningnummer", | |
"Muntsoort", | |
"Transactiedatum", | |
"Rentedatum", | |
"Beginsaldo", | |
"Eindsaldo", | |
"Transactiebedrag", | |
"Omschrijving", | |
] | |
data = [] | |
def find_field(regex, string): | |
if match := re.search(regex, string): | |
return match.group("capture").strip() | |
else: | |
print(f"Didn't find field for {regex} in {string!r}!") | |
return None | |
def process_row(row): | |
omschrijving = row["Omschrijving"] | |
if omschrijving.startswith("ABN AMRO Bank N.V.") and ("BetaalPakket" in omschrijving or "BasisPakket" in omschrijving): | |
row["Naam"] = "ABN AMRO" | |
elif "Apple Pay" in omschrijving or "Betaalpas" in omschrijving: | |
row["Naam"] = find_field(r"(Apple Pay|Betaalpas)\s+(?P<capture>.*?),PAS\d{3}", omschrijving) | |
row["Tegenrekening"] = None | |
elif omschrijving.startswith("SEPA Incasso algemeen doorlopend"): | |
row["Naam"] = find_field(r"Naam: (?P<capture>.*?)\s+Machtiging:", omschrijving) | |
row["Tegenrekening"] = find_field(r"IBAN: (?P<capture>\w{2}\d{2}[A-Z0-9]{4}\d+)", omschrijving) | |
elif omschrijving.startswith("/TRTP/"): | |
row["Naam"] = find_field(r"/NAME/(?P<capture>.*?)/", omschrijving) | |
row["Tegenrekening"] = find_field(r"/IBAN/(?P<capture>.*?)/", omschrijving) | |
row["Omschrijving"] = find_field(r"/REMI/(?P<capture>.*?)/(IBAN|EREF)", omschrijving) or omschrijving | |
elif "SEPA Overboeking " in omschrijving: | |
row["Tegenrekening"] = find_field(r"IBAN((: )|/)(?P<capture>\w{2}\d{2}[A-Z0-9]{4}\d{8,})( |/|$)", omschrijving) | |
row["Naam"] = find_field(r"Naam: (?P<capture>.*?)((\s+Omschrijving:)|$)", omschrijving) | |
elif omschrijving.startswith("STORTING BELEG.") or "DIVIDEND" in omschrijving: | |
if "NT EM MKT" in omschrijving: | |
row["Naam"] = "Northern Trust Emerging Markets" | |
elif "NT WORLD" in omschrijving: | |
row["Naam"] = "Nothern Trust World" | |
else: | |
print(f"I don't know {row}") | |
elif omschrijving.startswith("BEA "): | |
row["Naam"] = find_field(r"^BEA NR:[0-9-A-Z]{6,}\s+\d{2}\.\d{2}\.\d{2}/\d{2}\.\d{2} (?P<capture>.*?),PAS\d{3}.*", omschrijving) | |
elif "Servicekosten" in omschrijving: | |
row["Naam"] = "ABN AMRO Beleggen" | |
else: | |
row["Tegenrekening"] = find_field(r"IBAN((: )|/)(?P<capture>\w{2}\d{2}[A-Z0-9]{4}\d+)( |/)", omschrijving) | |
return row | |
with open('transacties.csv', 'r') as tx: | |
txreader = csv.DictReader(tx, fieldnames=fieldnames) | |
next(txreader) | |
for row in txreader: | |
#print(f"Processing {len(data)}") | |
data.append(process_row(row)) | |
with open('output.csv', 'w') as fh: | |
out_fieldnames = [*fieldnames, "Tegenrekening", "Naam"] | |
txwriter = csv.DictWriter(fh, fieldnames=out_fieldnames) | |
txwriter.writeheader() | |
txwriter.writerows(data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment