Created
April 26, 2014 22:45
-
-
Save agibsonccc/11333213 to your computer and use it in GitHub Desktop.
Convert data frame for profit curve
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def transform_column(df,column_name,fn): | |
""" | |
Transforms a column with the given function | |
""" | |
df[column_name] = df[column_name].apply(fn) | |
def vectorize_feature_index(df,label_column): | |
feature_names = [] | |
global X | |
for feature_name in df.columns.values: | |
print feature_name | |
if feature_name != label_column: | |
if feature_name not in feature_names: | |
feature_names.append(feature_name) | |
X = df[feature_names].index | |
scaler = StandardScaler() | |
X = scaler.fit_transform(X) | |
train_index,test_index = train_test_split(df.index) | |
X = df[feature_names].as_matrix().astype(np.float) | |
y = df[label_column].index | |
y_test = y[test_index].astype(float) | |
q = session.query(Users).join(Event).add_entity(Event) | |
df= query_to_df(session,q) | |
df = df.drop(['Users_id','Event_id','Event_User_Id','Event_Meal_Id','Users_Created_Date'],1) | |
print df.columns | |
print df.Users_date.dtype | |
def to_epoch(time_input): | |
return (time_input - datetime(1970,1,1)).total_seconds() | |
transform_column(df,'Event_Type',event_to_num.get) | |
transform_column(df,'Users_Campaign_ID',campaign_to_num.get) | |
transform_column(df,'Users_date',to_epoch) | |
transform_column(df,'Event_date',to_epoch) | |
print df | |
vectorize_feature_index(df,'Event_Type') | |
print X |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment