Skip to content

Instantly share code, notes, and snippets.

@shuozhang1985
Created October 14, 2016 16:14
Show Gist options
  • Save shuozhang1985/4d22ca788998ac0610d893e4355d1d48 to your computer and use it in GitHub Desktop.
Save shuozhang1985/4d22ca788998ac0610d893e4355d1d48 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import os
os.chdir("/Users/shuozhang/Desktop/data")
df=pd.read_csv('nycmodeldata.csv', sep='\t', encoding='utf-8')
#### create the prediction data set
zipcode=list(set(df['zipcode']))
Weekday=list(set(df['Weekday']))
Hour=list(set(df['Hour']))
zipcode1=zipcode*24*7
Hour1=map(lambda x: str(x), np.repeat(Hour,198))
Hour2=Hour1*7
Hour3=map(lambda x: int(x), Hour2)
Weekday1=np.repeat(Weekday,198*24)
Month1=np.repeat([9], 198*7*24)
Rain=np.repeat([0], 198*7*24)
df1=pd.DataFrame(Month1, columns=['Month'])
df1['Weekday']=Weekday1
df1['Hour']=Hour3
df1['zipcode']=zipcode1
temp=pd.read_csv('temp.csv', sep=',', index_col=0)
temp=temp.reset_index()
tl1=np.array(temp['25-Sep'])
t1=np.repeat(tl1,198)
tl2=np.array(temp['19-Sep'])
t2=np.repeat(tl2,198)
tl3=np.array(temp['20-Sep'])
t3=np.repeat(tl3,198)
tl4=np.array(temp['21-Sep'])
t4=np.repeat(tl4,198)
tl5=np.array(temp['22-Sep'])
t5=np.repeat(tl5,198)
tl6=np.array(temp['23-Sep'])
t6=np.repeat(tl6,198)
tl7=np.array(temp['24-Sep'])
t7=np.repeat(tl7,198)
Temp=np.concatenate((t1,t2,t3,t4,t5,t6,t7), axis=0)
df1['Temp']=Temp
df1['Rain']=Rain
dummies = pd.get_dummies(df1['zipcode'])
df2=pd.concat([df1,dummies], axis=1)
df2.drop(['zipcode'], inplace=True, axis=1)
df2.to_csv('predicitondatafinal.csv', sep='\t')
#### use the 3 models to make a prediction
pred_y_rf=RFR.predict(df2)
pred_y_xgb=XGB.predict(df2)
pred_y_com=pd.concat([pred_y_rf,pred_y_xgb], axis=1)
pred_y_ensemble=ols.fit(pred_y_com)
pred_y_final=pd.concat([pred_y_rf,pred_y_xgb, pred_y_ensemble], axis=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment