Skip to content

Instantly share code, notes, and snippets.

@oberrich
Last active October 9, 2021 01:48
Show Gist options
  • Save oberrich/c2bcac8d07a9e55bf812aca2091226ce to your computer and use it in GitHub Desktop.
Save oberrich/c2bcac8d07a9e55bf812aca2091226ce to your computer and use it in GitHub Desktop.
Extract Bitcoin transactions with historical prices and fix up time zones in Python
import pandas as pd
import numpy as np
import datetime as dt
import csv, glob, os
from currency_converter import CurrencyConverter
pd.set_option('precision', 9)
filename_tpl = 'output/temp/'+str(int(dt.datetime.utcnow().timestamp()))+'_{}.csv'
currency = CurrencyConverter('http://www.ecb.int/stats/eurofxref/eurofxref-hist.zip', fallback_on_missing_rate=True)
def convert_usd_eur(row, rate_only=False):
if rate_only:
return currency.convert(1.0, 'USD', 'EUR', date=row.name)
return currency.convert(row['HL_AVG'], 'USD', 'EUR', date=row.name)
def convert_btc_eur(row, btc_eur_df, rate_only=False):
btc_eur_at_hour = btc_eur_df.loc[dt.datetime(row.name.year, row.name.month, row.name.day, row.name.hour)]['BTC_EUR']
if rate_only:
return btc_eur_at_hour
return row['Amount'] * btc_eur_at_hour
def parse_coinbase_date(d):
return pd.to_datetime(d, format="%Y-%m-%d %I-%p")
def parse_paypal_date(d):
return pd.to_datetime(d, format="%d/%m/%Y %H:%M:%S")
btc_eur_dict = {
'Date': 'Datum',
'Open': 'Eröffnung (USD)',
'Close': 'Schlusskurs (USD)',
'High': 'Stundenhoch (USD)',
'Low': 'Stundentief (USD)',
'Volume BTC': 'Volumen (BTC)',
'Volume USD': 'Volumen (USD)',
'HL_AVG': 'Mittel aus Stundenhoch/Stundentief (USD)',
'USD_EUR': 'Wechselkurs USD/EUR',
'BTC_EUR': 'Wechselkurs BTC/EUR'
}
# read and parse coinbase historical btc/usd data
btc_eur_df = pd.read_csv('data/Coinbase_BTCUSD_1h.csv', usecols=['Date', 'Open', 'Close', 'Low', 'High', 'Volume BTC', 'Volume USD'], parse_dates=['Date'], date_parser=parse_coinbase_date, index_col=['Date'])
# localize datetime index and convert to german timezone
btc_eur_df.index = btc_eur_df.index.tz_localize('utc').tz_convert('Europe/Berlin')
btc_eur_df['HL_AVG'] = btc_eur_df[['High','Low']].mean(axis=1)
btc_eur_df['USD_EUR'] = btc_eur_df.apply(lambda x: convert_usd_eur(x, True), axis=1)
btc_eur_df['BTC_EUR'] = btc_eur_df.apply(convert_usd_eur, axis=1)
# translate keys to german/human readable and save to disk
btc_eur_df.rename(columns=btc_eur_dict).to_csv(filename_tpl.format('BTCEUR_Umrechnung'), decimal=',', encoding='utf-8-sig')
print(btc_eur_df.head(10))
bitpay_df = pd.read_csv('data/BitPay-Export.csv', usecols=['Date', 'Destination', 'Amount', 'Currency'], parse_dates=['Date'], index_col=['Date'])
# localize datetime index and convert to german timezone
bitpay_df.index = pd.to_datetime(bitpay_df.index).tz_convert('Europe/Berlin')
# determine transaction type from destination column
bitpay_df.Destination = bitpay_df.Destination.fillna('Eingang')
bitpay_df.loc[(bitpay_df.Destination == 'Bitcoin Network Fees'), 'Destination'] = 'Netzwerkgebühren'
bitpay_df.loc[(bitpay_df.Destination != 'Netzwerkgebühren') & (bitpay_df.Destination != 'Eingang'), 'Destination'] = 'Ausgang'
bitpay_df['BTC_EUR'] = bitpay_df.apply(lambda r: convert_btc_eur(r, btc_eur_df, True), axis=1)
bitpay_df['EUR_Amount'] = bitpay_df.apply(lambda r: convert_btc_eur(r, btc_eur_df), axis=1)
bitpay_df.index = bitpay_df.index.rename('Datum')
bitpay_df.rename(columns={
'Destination': 'Transaktionstyp',
'Amount': 'Einnahme Bitcoin (BTC)',
'Currency': 'Währung',
'BTC_EUR': 'Wechselkurs BTC/EUR',
'EUR_Amount': 'Einnahme €'
}).to_csv(filename_tpl.format('BitPay_BTC_Transaktionen'), decimal=',', encoding='utf-8-sig')
print(bitpay_df.head(10))
paypal_tx_files = glob.glob(os.path.join('data', "PayPal*.csv"))
# Date,"Time","TimeZone","Name","Type","Status","Currency","Gross","Fee","Net","From Email Address","To Email Address","Transaction ID","CounterParty Status","Shipping Address","Address Status","Item Title","Item ID","Shipping and Handling Amount","Insurance Amount","Sales Tax","Option 1 Name","Option 1 Value","Option 2 Name","Option 2 Value","Auction Site","Buyer ID","Item URL","Closing Date","Escrow Id","Reference Txn ID","Invoice Number","Custom Number","Quantity","Receipt ID","Balance","Address Line 1","Address Line 2/District/Neighborhood","Town/City","State/Province/Region/County/Territory/Prefecture/Republic","Zip/Postal Code","Country","Contact Phone Number","Subject","Note","Payment Source","Card Type","Transaction Event Code","Payment Tracking ID","Bank Reference ID","Transaction Buyer Country Code","Item Details","Coupons","Special Offers","Loyalty Card Number","Authorization Review Status","Protection Eligibility","Country Code","Balance Impact","Buyer Wallet","Comment 1","Comment 2","Invoice Number","PO Number","Customer Reference Number","Payflow Transaction ID (PNREF)","Tip","Discount","Seller ID","Risk Filter"
paypal_dfs = (pd.read_csv(f, decimal=',', thousands='.', usecols=['Date', 'Time', 'Type', 'Status', 'Currency', 'Gross', 'Fee', 'Net', 'From Email Address', 'To Email Address', 'Balance', 'Payment Source', 'Transaction Buyer Country Code', 'State/Province/Region/County/Territory/Prefecture/Republic', 'Balance Impact', 'Transaction Event Code', 'Transaction ID', 'Reference Txn ID'], date_parser=parse_paypal_date, parse_dates=[['Date', 'Time']]) for f in paypal_tx_files)
paypal_df = pd.concat(paypal_dfs, ignore_index=True)
paypal_df = paypal_df.set_index('Date_Time')
# localize datetime column
paypal_df.index = paypal_df.index.tz_localize('Europe/Berlin')
paypal_df.index = paypal_df.index.rename('Datum')
paypal_dfr = paypal_df.rename(columns={
'State/Province/Region/County/Territory/Prefecture/Republic': 'Bundesstaat',
'From Email Address': 'Absender',
'To Email Address': 'Empfänger',
'Transaction Buyer Country Code': 'Land',
'Currency': 'Währung',
'Type': 'Transaktionstyp'
})
paypal_dfr.to_csv(filename_tpl.format('PayPal_Transactions'), decimal=',', encoding='utf-8-sig', float_format='%.2f')
print(paypal_dfr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment