Created
July 30, 2015 19:28
-
-
Save vDorst/68d555a6a90f62fec004 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
filename = "TXT150730193435.TAB" | |
f = open(filename, 'r') | |
csv = open("out.csv",'w') | |
# IBAN Number of known accounts | |
# Savings, Pay and Household | |
Accounts = [ "NL00ABNA0123345567", "NL02ABNA0123345533", "NL01ABNA0123342347"] | |
def AccountNumberToIBAN(AccountNumber): | |
for Account in Accounts: | |
if (re.search(r"" + AccountNumber + "$", Account)): | |
return Account | |
return None | |
def ParseDescription(desc): | |
values = None | |
### SEPA PLAIN: SEPA iDEAL IBAN: NL12RABO0121212212 BIC: RABONL2U Naam: Silver Ocean B.V. Omschrijving: 1232138 1232131233 412321 iBOOD.com iBOOD.com B.V. Kenmerk: 12-12-2014 21:03 002000 0213123238 | |
sepa = re.findall(r"(?P<SEPA>^SEPA.{28})", desc, re.I) | |
if (sepa): | |
values = {} | |
value = sepa[0] | |
values["TRTP"] = value.strip() | |
values["EREF"] = "" | |
values["REMI"] = "" | |
sepa = re.findall(r"(?P<NAME>[A-Za-z]+(?=:\s)):\s(?P<VALUE>[A-Za-z 0-9.-]+(?=\s))", desc, re.I) | |
for line in sepa: | |
key = line[0] | |
if key.upper() == 'OMSCHRIJVING': | |
key = 'REMI' | |
if key.upper() == 'KENMERK': | |
key = 'EREF' | |
if key.upper() == 'NAAM': | |
key = 'NAME' | |
value = line[1] | |
values[key] = value.strip() | |
# print (values) | |
# continue | |
if len(values["REMI"]) > 19: | |
values["REMI"] = values["REMI"][0:18] + values["REMI"][19:] | |
if values["REMI"] == "": | |
values["REMI"] = values["TRTP"] | |
### TRTP ENCODED: /TRTP/SEPA OVERBOEKING/IBAN/NL23ABNA0000000000/BIC/ABNANL2A/NAME/baasd dsdsT CJ/REMI/Nullijn/EREF/NOTPROVIDED | |
trtp = re.findall(r"\/(?P<NAME>[A-Z]{3,4})\/(?P<VALUE>.*?(?:(?=\/[A-Z]{3,4}\/)|$))",desc, re.I) | |
if (trtp): | |
values = {} | |
values["EREF"] = "" | |
values["REMI"] = "" | |
for line in trtp: | |
key = line[0] | |
value = line[1] | |
values[key] = value.strip() | |
# print (values) | |
# continue | |
if values["REMI"] == "": | |
values["REMI"] = values["TRTP"] | |
### BEA: BEA NR:00AJ01 31.01.01/19.54 Van HarenSchoenen132 UDE,PAS333 | |
trtp = re.findall(r"(?P<TRTP>[BG]EA) +(?P<EREF>NR:[a-zA-Z:0-9]+) +(?P<DATE>[0-9.\/]+) +(?P<NAAM>[^,]*)", desc, re.I) | |
if (trtp): | |
values = {} | |
values["TRTP"] = str(trtp[0][0]).strip() | |
values["NAME"] = str(trtp[0][3]).strip() | |
values["EREF"] = str(trtp[0][1]).strip() | |
values["DATE"] = str(trtp[0][2]).strip() | |
values["REMI"] = values["TRTP"] + " " + values["NAME"] | |
# print (values) | |
# continue | |
### OLD: 12.21.22.222 BNP aaaaaaa aaaaaa SCH BETALINGSKENM. 2323233232323323 MAAND* APRIL 01 REF* 1212121-42-41 | |
trtp = re.findall(r"^ ?(?P<IBAN>[0-9.]{12,15})\W+(?P<NAAM>.{32})", desc, re.I) | |
if (trtp): | |
values = {} | |
values["TRTP"] = "OLD" | |
values["IBAN"] = str(trtp[0][0]).strip() | |
values["NAME"] = str(trtp[0][1]).strip() | |
values["EREF"] = "" | |
values["REMI"] = values["TRTP"] + " " + values["NAME"] | |
# print (values) | |
# continue | |
### ABN AMRO Bank N.V. Prive pakket 3,25 | |
abn = re.findall(r"^ABN AMRO.{24} (?P<DESC>.*)", desc, re.I) | |
if (abn): | |
values = {} | |
values["TRTP"] = "ABN AMBRO" | |
values["NAME"] = "ABN AMBRO" | |
values["EREF"] = str(abn[0]).strip() | |
values["REMI"] = values["EREF"] | |
# print (values) | |
# continue | |
if (values == None): | |
# print ("Unkown: ### %s ###" % ( desc )) | |
return None | |
return values | |
def CategoryTransaction(desc): | |
for Account in Accounts: | |
if (re.search(r"" + Account, desc)): | |
return "Bank:Eigenrekening" | |
if (re.search(r"(NL12ABNA1232312312321,NL12ABNA0521221222)", desc, re.I)): | |
return "Bank:Opgeheven" | |
if (re.search(r"(KINDERBIJSLAG|MYWORK)", desc, re.I)): | |
return ["Inkomen", "Inkomen"] | |
if (re.search(r"(jumbo|kruidvat|ALBERT HEIJN|Gall & Gall|Sligro|Etos|Primera|PLUS |C1000|EMTE|LIDL|ALDI)", desc, re.I)): | |
return ["Boodschappen", "Boodschappen"] | |
if (re.search(r"(belastingdienst|Belastingsamenwerking)", desc, re.I)): | |
return ["Belastingen","Belastingen"] | |
if (re.search(r"(hollandsnieuwe|freevoipdeal)", desc, re.I)): | |
return ["Telefoon","GWE"] | |
if (re.search(r"(SCHADEVERZEKERIN|ZLM|DITZO|FBTO|ZORGVERZEKERINGEN|Delta Lloyd|NATIONALE NEDERLANDEN)", desc, re.I)): | |
return ["Verzekeringen","Verzekeringen"] | |
if (re.search(r"(BRABANT WATER)", desc, re.I)): | |
return ["Engerie/Water","GWE"] | |
if (re.search(r"(Hypotheek)", desc, re.I)): | |
return "Hypotheek" | |
if (re.search(r"(GEA)", desc, re.I)): | |
return ["Contanten","Contanten"] | |
if (re.search(r"(Esprit|Hunkemoller|ECCO|MS MODE|Bon ?Prix|Etam|Chique Dress|Schoenenreus|HarenSchoenen|VAN DAL (B.V.|Mannenmode)|C\&A |ZEEMAN|Wibra|Bijou Brigitte)", desc, re.I)): | |
return ["Kleding","Kleding"] | |
if (re.search(r"([0-9]{4} Action|Hema|Ikea|BLOKKER|Gamma|Praxis|Xenos|Boerenbond|Douglas|Rituals|Trekpleister|V\&D |INTRATUIN)", desc, re.I)): | |
return ["Wonen:Diverse","Wonen"] | |
if (re.search(r"(Intertoys|Bart Smit)", desc, re.I)): | |
return ["Wonen:Speelgoed","Wonen"] | |
if (re.search(r"(Silver Ocean|scheerenfoppen|4Launch|MMS Online|MediaMarkt|DIXONS|WEHKAMP|bol\.com)", desc, re.I)): | |
return ["Wonen:Computers","Wonen"] | |
if (re.search(r"(E Friends|DOMINO.?S|La Place|Mc Donald|Cafetaria|Copper Food|Smullers)", desc, re.I)): | |
return ["Uitjes:Uiteten","Uitjes"] | |
if (re.search(r"(TAMOIL|SHELL|TINQ|TANGO)", desc, re.I)): | |
return ["Bezine", "Auto"] | |
if (re.search(r"(Creditcard|PayPal)", desc, re.I)): | |
return "Creditcard" | |
if (re.search(r"(edutel|kpn|onsbrabantnet|BHOSTED.NL|Tuxis Internet|NXS|IS GROUP B.V.|HTTPS|Transip B.V.)", desc, re.I)): | |
return ["Internet", "GWE"] | |
if (re.search(r"(Autocentr)", desc, re.I)): | |
return "Onderhoud:Auto" | |
if (re.search(r"(Apotheken)", desc, re.I)): | |
return "Zorg:Apotheek" | |
if (re.search(r"(Pathe |Take Ten|Industry|bios)", desc, re.I)): | |
return ["Uitjes:Bioscoop","Uitjes"] | |
if (re.search(r"(St.Natuurcentrum Sla|MRTICKET NL|Safaripark Beekse|De Vrije Teuge|NS GROEP)", desc, re.I)): | |
return ["Uitjes:Diverse","Uitjes"] | |
if (re.search(r"(ABN AM.?RO)", desc, re.I)): | |
return "Bank:ABN" | |
if (re.search(r"(center ?par[ck])", desc, re.I)): | |
return "Vakantie:CenterParcs" | |
return "" | |
csv.write('"date-transaction", "account-iban", "description", "budget-name", "category-name", "amount", "opposing-iban","opposing-name","currency-code","date-rent"\n') | |
for line in f: | |
# ABN Amro CSV Format | |
imp = re.search("^(?P<account>[^\t]*)\t(?P<currencycode>[^\t]*)\t(?P<date>[^\t]*)\t(?P<Beginsaldo>[^\t]*)\t(?P<Eindsaldo>[^\t]*)\t(?P<daterent>[^\t]*)\t(?P<amount>[^\t]*)\t(?P<description>[^\t^\n^\r]*)", line) | |
desc = imp.group("description") | |
ParsedDesc = ParseDescription(desc) | |
if (ParsedDesc != None): | |
ParsedDesc["CATEGORY"] = CategoryTransaction(desc) | |
ParsedDesc["BUDGET"] = "" | |
if isinstance(ParsedDesc["CATEGORY"],list): | |
ParsedDesc["BUDGET"] = ParsedDesc["CATEGORY"][1] | |
ParsedDesc["CATEGORY"] = ParsedDesc["CATEGORY"][0] | |
ostr = "-- %s" % ( ParsedDesc["TRTP"] ) | |
for key, value in ParsedDesc.items(): | |
if (key == 'TRTP') | (key == 'BIC'): | |
continue | |
ostr = ostr + "\n\t %4s: %s" % ( key, value ) | |
print (ostr) | |
if ( not ("IBAN" in ParsedDesc.keys())): | |
ParsedDesc["IBAN"] = "" | |
Account = AccountNumberToIBAN(imp.group("account")) | |
if (Account == None): | |
print ("Unknown AccountNumber%s" % (imp.group("account"))) | |
continue | |
Amount = str(imp.group("amount")) | |
Amount = Amount.replace(',','.') | |
csv.write("\"" + imp.group("date") + "\",\"" + Account + "\",\"" + ParsedDesc["REMI"] + "\",\"" + ParsedDesc["BUDGET"] + "\",\"" + ParsedDesc["CATEGORY"] + "\",\"" + Amount + "\",\"" + ParsedDesc["IBAN"] + "\",\"" + ParsedDesc["NAME"] + "\",\"" + imp.group("currencycode") + "\",\"" + imp.group("daterent") + "\"\n") | |
f.close() | |
csv.close() | |
# input("Press Enter to continue...") | |
Also added a line if there is an empty line in the file (mostly on the end of the file) at line 158
if (imp != None):
Maybe an idea to hash the whole line "desc" to try to get an unique ID.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
With "Tikkie" the REMI is sometimes identical. By adding EREF it is fixed. I added after line 45 this to make it unique
else: values["REMI"] = values["REMI"] + "_" + values["EREF"]
When you made 2 transactions on the same day at the same store, also the description was not unique, by adding date to line 70 it would fix that:
values["REMI"] = values["TRTP"] + " " + values["NAME"] + " " + values["DATE"]