Skip to content

Instantly share code, notes, and snippets.

@basvandorst
Last active September 13, 2016 14:05
Show Gist options
  • Save basvandorst/e078b78b60ab9b25b641827a3042ba86 to your computer and use it in GitHub Desktop.
Save basvandorst/e078b78b60ab9b25b641827a3042ba86 to your computer and use it in GitHub Desktop.
Kaggle - House Prices
from sklearn.cross_validation import KFold
from sklearn import linear_model
from sklearn import ensemble
from sklearn.metrics import mean_squared_error
xpca = PCA(100).fit_transform(df_cars_maxabs);
kf = KFold(len(df), n_folds=10, shuffle=True)
X = df.drop('SalePrice',1)
Y = df['SalePrice']
results = []
for train_index, test_index in kf:
X_train, X_test = xpca[train_index], xpca[test_index]
Y_train, Y_test = Y.iloc[train_index], Y.iloc[test_index]
regr = linear_model.Ridge()
regr.fit(X_train,Y_train)
predictions = regr.predict(X_test)
error = np.sqrt(mean_squared_error(Y_test, predictions))
RMSLE = np.sqrt(np.sum( (np.log(predictions +1) - np.log(Y_test+1))**2)/len(Y_test))
results += [RMSLE]
print sum(results)/10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment