Created
September 15, 2011 00:27
-
-
Save acslater00/1218208 to your computer and use it in GitHub Desktop.
pandas/statsmodel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scikits.statsmodels.api as sm | |
from pandas import * | |
df = DataFrame(transactions) | |
# easy aggregates | |
print np.mean(df['x']) | |
print np.mean(df['price']) | |
# subset of transactions | |
mlb = df[df['event_type'] == 'mlb'] | |
# group by transactions | |
gbyevent = df.groupby('event_id') | |
gbyconfig = df.groupby(['venue_id', 'event_type', 'config_id']) | |
event_avg_prices = gbyevent['price'].describe() | |
print event_avg_prices[500200] # some random event id we care about | |
for (vid, et, cid),frame in gbyconfig: | |
# in here, frame is a subset of transactions corresponding to each config | |
# can run regressions or whatever | |
print vid, et, cid, frame | |
# run a regression | |
endo = frame['price'] | |
exog = frame.ix[:,['predicted', 'distance']] | |
model = ols(y=endo, x=exog) | |
print model | |
# alternative ols (statsmodels...more powerful stuff in there) | |
results = sm.OLS(endo, exog).fit() | |
print results.summary() | |
# section averages | |
sect = frame.groupby('section') | |
sect_avg = sect.mean()['price'] | |
frame["savg"] = frame["section"].map(lambda s: sect_avg[s]) | |
endo = frame['price'] | |
exog = frame.ix[:,['savg']] | |
results = sm.OLS(endo, exog).fit() | |
print results.summary() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment