Skip to content

Instantly share code, notes, and snippets.

@wesm
Created July 13, 2011 01:50
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wesm/1079577 to your computer and use it in GitHub Desktop.
Save wesm/1079577 to your computer and use it in GitHub Desktop.
Summarizing an Interactive Brokers activity statement
"""
A little script for summarizing my Interactive Broker statements
"""
from pandas import *
from pandas.util.testing import set_trace as st
import numpy as np
from BeautifulSoup import BeautifulSoup
def get_table_by_id(tables):
table_dict = {}
for table in tables:
name = _get_attr(table, 'id')
if name:
table_dict[name] = table
return table_dict
def _get_attr(elt, needle):
for kind, value in elt.attrs:
if kind == needle:
return value
return None
def scrape_perf_table(perf_table):
rows = perf_table.findAll('tr')
colnames = ['Symbol', 'Description',
'MTM MTD', 'MTM YTD',
'Realized ST MTD', 'Realized ST YTD',
'Realized LT MTD', 'Realized LT YTD']
all_data = []
for row in rows:
entries = row.findAll('td')
if len(entries) != 11:
continue
row_data = []
for entry in entries:
if _get_attr(entry, 'class') == 'td_spacer':
continue
row_data.append(entry.text.replace(',', ''))
all_data.append(row_data)
converted = {}
for name, col in zip(colnames, zip(*all_data)):
try:
converted[name] = np.array(col, dtype=float)
except:
converted[name] = np.array(col)
return DataFrame(converted, columns=colnames)
def _get_underlying(symbol):
splt = symbol.split()
sym = splt[0]
return sym
if len(splt) > 1:
sym = '%s Option' % sym
return sym
def _get_kind(symbol):
return 'Option' if len(symbol.split()) == 2 else 'Stock'
def get_perf_table(path):
soup = BeautifulSoup(open(path).read())
body = soup.html.body
tables = body.findAll('table')
tables_by_id = get_table_by_id(tables)
perf_table = tables_by_id['tblMTDYTDPerfSum']
frame = scrape_perf_table(perf_table)
frame['Underlying'] = frame['Symbol'].map(_get_underlying)
frame['Kind'] = frame['Symbol'].map(_get_kind)
return frame
def get_grouped_pl(path):
frame = get_perf_table(path)
mtm = {}
realized = {}
for kind, group in frame.groupby('Kind'):
mtm_grouped = group['MTM YTD'].groupby(group['Underlying']).sum().order()
realized_grouped = group['Realized ST YTD'].groupby(group['Underlying']).sum().order()
mtm[kind] = mtm_grouped
realized[kind] = realized_grouped
mtm = DataFrame(mtm).fillna(0)
realized = DataFrame(realized).fillna(0)
return mtm, realized
mtm_2011, realized_2011 = get_grouped_pl('ActivityStatement.html')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment