Skip to content

Instantly share code, notes, and snippets.

@zhukovgreen
Created May 31, 2017 07:02
Show Gist options
  • Save zhukovgreen/3d6192be3d1d98b04f7d02dac568602e to your computer and use it in GitHub Desktop.
Save zhukovgreen/3d6192be3d1d98b04f7d02dac568602e to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import visuals as vs
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn import cross_validation
from sklearn.metrics import r2_score
data = pd.read_csv('housing.csv')
prices = data.MEDV
features = data.drop('MEDV', axis=1)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, prices,
test_size=0.2,
random_state=1)
def performance_metric(y_true, y_predict):
""" Calculates and returns the performance score between
true and predicted values based on the metric chosen. """
# TODO: Calculate the performance score between 'y_true' and 'y_predict'
score = r2_score(y_predict, y_true)
# Return the score
return score
# TODO: Import 'make_scorer', 'DecisionTreeRegressor', and 'GridSearchCV'
from sklearn.metrics import make_scorer
from sklearn.model_selection import ShuffleSplit
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
def fit_model(X, y):
""" Performs grid search over the 'max_depth' parameter for a
decision tree regressor trained on the input data [X, y]. """
# Create cross-validation sets from the training data
cv = ShuffleSplit(n_splits=10, test_size=0.20, random_state=0)
# TODO: Create a decision tree regressor object
regressor = DecisionTreeRegressor()
# TODO: Create a dictionary for the parameter 'max_depth' with a range from 1 to 10
params = dict(max_depth=np.arange(1, 11))
# TODO: Transform 'performance_metric' into a scoring function using 'make_scorer'
scoring_fnc = make_scorer(performance_metric)
# TODO: Create the grid search object
grid = GridSearchCV(estimator=regressor, param_grid=params, scoring=scoring_fnc, cv=cv)
# Fit the grid search object to the data to compute the optimal model
grid = grid.fit(X, y)
# Return the optimal model after fitting the data
print grid.best_score_
return grid.best_estimator_
reg = fit_model(features, prices)
regressor = DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,
max_leaf_nodes=None, min_impurity_split=1e-07,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False, random_state=None,
splitter='best')
reg1 = regressor.fit(X_train, y_train)
print r2_score(y_test, regressor.predict(X_test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment