Skip to content

Instantly share code, notes, and snippets.

@acslater00
Created September 15, 2011 00:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save acslater00/1218208 to your computer and use it in GitHub Desktop.
Save acslater00/1218208 to your computer and use it in GitHub Desktop.
pandas/statsmodel
import numpy as np
import scikits.statsmodels.api as sm
from pandas import *
df = DataFrame(transactions)
# easy aggregates
print np.mean(df['x'])
print np.mean(df['price'])
# subset of transactions
mlb = df[df['event_type'] == 'mlb']
# group by transactions
gbyevent = df.groupby('event_id')
gbyconfig = df.groupby(['venue_id', 'event_type', 'config_id'])
event_avg_prices = gbyevent['price'].describe()
print event_avg_prices[500200] # some random event id we care about
for (vid, et, cid),frame in gbyconfig:
# in here, frame is a subset of transactions corresponding to each config
# can run regressions or whatever
print vid, et, cid, frame
# run a regression
endo = frame['price']
exog = frame.ix[:,['predicted', 'distance']]
model = ols(y=endo, x=exog)
print model
# alternative ols (statsmodels...more powerful stuff in there)
results = sm.OLS(endo, exog).fit()
print results.summary()
# section averages
sect = frame.groupby('section')
sect_avg = sect.mean()['price']
frame["savg"] = frame["section"].map(lambda s: sect_avg[s])
endo = frame['price']
exog = frame.ix[:,['savg']]
results = sm.OLS(endo, exog).fit()
print results.summary()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment