Skip to content

Instantly share code, notes, and snippets.

@tdpetrou
Created November 15, 2019 02:10
Show Gist options
  • Save tdpetrou/fc8d93788f7c20f8e2db8ade58b07ecb to your computer and use it in GitHub Desktop.
Save tdpetrou/fc8d93788f7c20f8e2db8ade58b07ecb to your computer and use it in GitHub Desktop.
import pandas as pd
housing = pd.read_csv('http://bit.ly/kagglehousingtrain')
cols = ['GrLivArea', 'GarageArea']
X = housing[cols].values
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.tree import DecisionTreeRegressor
ss = StandardScaler()
X_scaled = ss.fit_transform(X)
kf = KFold(n_splits=5, shuffle=True, random_state=999)
train_idx, test_idx = list(kf.split(X))[1]
X_train, X_test = X[train_idx], X[test_idx]
X_scaled_train, X_scaled_test = X_t[train_idx], X_t[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
dtr = DecisionTreeRegressor(max_depth=2, random_state=123)
dtr.fit(X_train, y_train)
print(dtr.score(X_test, y_test))
y_pred = dtr.predict(X_test)
dtr.fit(X_scaled_train, y_train)
print(dtr.score(X_scaled_test, y_test))
y_pred_scaled = dtr.predict(X_scaled_test)
filt = y_pred != y_pred_scaled
print(y_pred[filt])
print(y_pred_scaled[filt])
print(X_test[filt])
print(X_scaled_test[filt])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment