Skip to content

Instantly share code, notes, and snippets.

@bennofs
Created October 28, 2022 13:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bennofs/ffa844d2e0556d66227447ad5487b9ca to your computer and use it in GitHub Desktop.
Save bennofs/ffa844d2e0556d66227447ad5487b9ca to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""Process PayPal transaction exports for ledger import
Usage:
process.py TRANSACTIONS ACTIVITY
where TRANSACTIONS is the csv export of PayPal transactions (https://www.paypal.com/reports/statements/custom)
and ACTIVITY is the csv export of PayPal activities (https://www.paypal.com/reports/dlog) for the same time range.
"""
from docopt import docopt
from numpy import dtype
import pandas as pd
import collections
TIMEZONE_TO_OFFSET = {
'CEST': '+0200',
'CET': '+0100',
}
PAYPAL_OLD_REF_TO_NEW_REF = {
'5ZW23UWZAGXG8': '1011989330659',
'5ZW23UXHMN6HU': '1012263618813',
'5ZW23UY7VBNLU': '1012636739885',
'5ZW23UXPSWE2Y': '1012367038479',
'5ZW23UX6DUZSY': '1012074987407',
'5ZW23UYYKFZPY': '1013050998623',
'5ZW23UYNKLBYG': '1012883297255',
'5ZW23UYRZJNMA': '1012940941620',
'5ZW23UYZJC9JL': '1013067190025',
'5ZW23UZ95GH8J': '1013194661480',
}
def generate_record(activity_by_code, txs):
record = {
'amount_bank_deduct': 0,
'amount_convert_base': 0,
'amount_convert_other': 0,
'amount_main': 0,
'currency_main': 'EUR',
'fee': None,
'currency_convert_base': '',
'currency_convert_other': '',
'bank_ref': '',
'bank_name': '',
'desc': None,
'date': None,
'time': None,
'invoice_code': '',
'name': None,
'tx_code': None,
'kind': 'normal',
'bank_currency': None,
'email': None,
}
main_tx = None
for tx in txs:
desc = tx['Beschreibung']
entgelt = tx['Entgelt'] if 'Entgelt' in tx else tx['Entgelt ']
brutto = tx['Brutto'] if 'Brutto' in tx else tx['Brutto ']
if desc == "Bankgutschrift auf PayPal-Konto" or desc == 'Allgemeine Abbuchung' or desc == 'Allgemeine Abbuchung – Bankkonto':
assert entgelt.strip() == '0,00', "fees not supported for bank transactions"
record['amount_bank_deduct'] = brutto
record['bank_name'] = tx['Name der Bank']
record['bank_currency'] = tx['Währung']
bank_refs = [x['Bankreferenz'] for x in activity_by_code[tx['Transaktionscode']]]
bank_refs = [x for x in bank_refs if x and not pd.isna(x)]
if len(bank_refs) != 1:
print(pd.DataFrame(activity_by_code[tx['Transaktionscode']]))
raise RuntimeError("multiple bank refs or no bank ref")
bank_ref = bank_refs[0]
record['bank_ref'] = PAYPAL_OLD_REF_TO_NEW_REF.get(bank_ref, bank_ref)
# if this is the only TX, treat it as main TX as well
if len(txs) != 1:
continue
record['kind'] = 'bank'
if desc == 'Allgemeine Währungsumrechnung':
assert entgelt.strip() == '0,00', "fees not supported for currency transactions"
base = 'base' if tx['Währung'] == 'EUR' or record['currency_convert_other'] else 'other'
record[f'amount_convert_{base}'] = brutto
record[f'currency_convert_{base}'] = tx['Währung']
continue
if main_tx is not None:
print(pd.DataFrame([main_tx, tx]))
print("--")
print(pd.DataFrame(txs))
raise RuntimeError("tx not handled")
main_tx = tx
record['desc'] = tx['Beschreibung']
record['name'] = tx['Name']
dt = tx['timestamp'].tz_convert('Europe/Berlin')
record['date'] = dt.strftime("%Y-%m-%d")
record['time'] = dt.strftime("%H:%M:%S")
record['currency_main'] = tx['Währung']
record['amount_main'] = brutto
record['invoice_code'] = tx['Rechnungsnummer']
record['tx_code'] = tx['Transaktionscode']
record['fee'] = entgelt
record['email'] = tx['Absender (E-Mail-Adresse)'] if 'Absender (E-Mail-Adresse)' in tx else tx['Absender E-Mail-Adresse']
record['timestamp'] = tx['timestamp']
if main_tx is None:
print(pd.DataFrame(txs))
raise RuntimeError("no main tx")
if bool(record['currency_convert_base']) != bool(record['currency_convert_other']):
print(pd.DataFrame(txs))
raise RuntimeError("missing in or out currency convert transaction")
return record
def main():
assert __doc__ is not None
args = docopt(__doc__)
txs = pd.read_csv(args['TRANSACTIONS'], dtype=str)
activity = pd.read_csv(args['ACTIVITY'], dtype=str)
txs_ts = pd.to_datetime(txs['Datum'] + " " + txs['Uhrzeit'] + " " + txs['Zeitzone'], format="%d.%m.%Y %H:%M:%S %Z", utc=True)
txs = txs.assign(timestamp=txs_ts)
activity_ts = pd.to_datetime(activity['Datum'] + " " + activity['Uhrzeit'] + activity['Zeitzone'].map(TIMEZONE_TO_OFFSET), format="%d.%m.%Y %H:%M:%S%z", utc=True)
activity = activity.assign(timestamp=activity_ts)
activity_by_code = collections.defaultdict(list)
for _, r in activity.iterrows():
code = r['Transaktionscode']
activity_by_code[code].append(r)
records = []
for _, rows in txs.groupby('timestamp'):
tx_by_root = {}
has_parent = set()
for _, tx in rows.iterrows():
code = tx['Transaktionscode']
related_code = tx['Zugehöriger Transaktionscode']
if pd.isna(related_code) or not related_code.strip():
tx_by_root.setdefault(code, [])
tx_by_root[code].append(tx)
continue
tx_by_root.setdefault(related_code, [])
tx_by_root[related_code].extend([tx] + tx_by_root.get(code, []))
tx_by_root[code] = tx_by_root[related_code]
has_parent.add(code)
roots = set(tx_by_root) - has_parent
records.extend(generate_record(activity_by_code, tx_by_root[root]) for root in roots)
df = pd.DataFrame.from_records(records)
print(df.to_csv(index=False))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment