Skip to content

Instantly share code, notes, and snippets.

@MattMcMurray
Last active November 10, 2017 16:29
Show Gist options
  • Save MattMcMurray/0809b174c49c4f962fde80d18d1be737 to your computer and use it in GitHub Desktop.
Save MattMcMurray/0809b174c49c4f962fde80d18d1be737 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Linear Regression"
]
},
{
"cell_type": "code",
"execution_count": 261,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
]
},
"execution_count": 261,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"lin_reg = LinearRegression()\n",
"lin_reg.fit(prepared_data, labels)"
]
},
{
"cell_type": "code",
"execution_count": 262,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predictions: [ 0.26903984 0.19360012 0.19776904 0.23113952 0.17055647]\n",
"Labels: 44864 1\n",
"24799 1\n",
"108547 1\n",
"24979 1\n",
"10221 0\n",
"Name: No-show, dtype: int64\n"
]
}
],
"source": [
"some_data_pt = data.iloc[:5]\n",
"some_data_label = labels.iloc[:5]\n",
"some_data_pt_prepared = full_pipeline.transform(some_data_pt)\n",
"print('Predictions:', lin_reg.predict(some_data_pt_prepared))\n",
"print('Labels:', some_data_label)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** Not even close...**"
]
},
{
"cell_type": "code",
"execution_count": 263,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.39229643378648837"
]
},
"execution_count": 263,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics import mean_squared_error\n",
"predictions = lin_reg.predict(prepared_data)\n",
"lin_mse = mean_squared_error(labels, predictions)\n",
"lin_rmse = np.sqrt(lin_mse)\n",
"lin_rmse"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# DecisionTreeRegressor"
]
},
{
"cell_type": "code",
"execution_count": 264,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n",
" max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
" min_impurity_split=None, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" presort=False, random_state=42, splitter='best')"
]
},
"execution_count": 264,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.tree import DecisionTreeRegressor\n",
"\n",
"dec_tree = DecisionTreeRegressor(random_state=42)\n",
"dec_tree.fit(prepared_data, labels)"
]
},
{
"cell_type": "code",
"execution_count": 265,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.2968375571401668"
]
},
"execution_count": 265,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tree_predictions = dec_tree.predict(prepared_data)\n",
"tree_mse = mean_squared_error(labels, tree_predictions)\n",
"tree_rmse = np.sqrt(tree_mse)\n",
"tree_rmse"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Better. Still not great"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# RandomForestRegressor"
]
},
{
"cell_type": "code",
"execution_count": 266,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.31167539452639936"
]
},
"execution_count": 266,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.ensemble import RandomForestRegressor\n",
"\n",
"forest_reg = RandomForestRegressor()\n",
"forest_reg.fit(prepared_data, labels)\n",
"forest_preds = forest_reg.predict(prepared_data)\n",
"forest_mse = mean_squared_error(labels, forest_preds)\n",
"forest_rmse = np.sqrt(forest_mse)\n",
"forest_rmse"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Worse. Let's go back to Decision Trees and see if we can fine tune that model."
]
},
{
"cell_type": "code",
"execution_count": 283,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"RandomizedSearchCV(cv=None, error_score='raise',\n",
" estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n",
" max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
" min_impurity_split=None, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" presort=False, random_state=42, splitter='best'),\n",
" fit_params=None, iid=True, n_iter=1000, n_jobs=1,\n",
" param_distributions={'splitter': ['best', 'random'], 'max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f8c884dae10>, 'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f8c884da4a8>},\n",
" pre_dispatch='2*n_jobs', random_state=None, refit=True,\n",
" return_train_score='warn', scoring='neg_mean_squared_error',\n",
" verbose=0)"
]
},
"execution_count": 283,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import RandomizedSearchCV\n",
"from scipy.stats import randint as sp_randint\n",
"\n",
"params = {\n",
" 'max_depth': sp_randint(10, 100000),\n",
" 'max_features': sp_randint(1, 10),\n",
" 'splitter': ['best', 'random']\n",
"}\n",
"\n",
"dec_tree = DecisionTreeRegressor(random_state=42)\n",
"rand_search = RandomizedSearchCV(dec_tree, param_distributions=params,\n",
" n_iter=1000, \n",
" scoring='neg_mean_squared_error')\n",
"\n",
"rand_search.fit(prepared_data, labels)"
]
},
{
"cell_type": "code",
"execution_count": 284,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'max_depth': 21, 'max_features': 2, 'splitter': 'best'}"
]
},
"execution_count": 284,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rand_search.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 285,
"metadata": {},
"outputs": [],
"source": [
"final_model = rand_search.best_estimator_"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment