Last active
January 8, 2018 15:58
-
-
Save bicubic/97cec216d2e1e4c093e04ccc738910e3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
} | |
], | |
"source": [ | |
"%pylab inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def make_data(samples, x_dims):\n", | |
" global x, y, tx, ty, vx, vy, features\n", | |
" y = pd.DataFrame({'y': np.random.normal(0, 10, size=samples)\n", | |
" })\n", | |
"\n", | |
" x = pd.DataFrame()\n", | |
"\n", | |
" for i in range(x_dims):\n", | |
" x['f{0}'.format(i)] = np.random.normal(0, 10, size=samples)\n", | |
" x['f0'] = y * 123.\n", | |
" \n", | |
" HELD_OUT_RATIO = 0.2\n", | |
" slicer = int(len(x) * HELD_OUT_RATIO)\n", | |
"\n", | |
" train_x = x[slicer:]\n", | |
" train_y = y[slicer:]\n", | |
"\n", | |
" val_x = x[:slicer]\n", | |
" val_y = y[:slicer]\n", | |
" \n", | |
" tx = np.array(train_x.values)\n", | |
" tx = tx.reshape((int(len(train_x)/1), x_dims))\n", | |
" ty = train_y.values\n", | |
" ty = ty.reshape((len(ty), 1))\n", | |
" vx = np.array(val_x.values)\n", | |
" vx = vx.reshape((int(len(val_x)/1), x_dims))\n", | |
" vy = val_y.values\n", | |
" vy = vy.reshape((len(vy), 1))\n", | |
" features = (0,x_dims)\n", | |
" return x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>f0</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-1372.739221</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>-243.376695</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2160.631416</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>-451.650977</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>-2080.222461</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" f0\n", | |
"0 -1372.739221\n", | |
"1 -243.376695\n", | |
"2 2160.631416\n", | |
"3 -451.650977\n", | |
"4 -2080.222461" | |
] | |
}, | |
"execution_count": 48, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"make_data(samples=1000, x_dims=1).head(5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def test_model(model):\n", | |
" model.fit(tx, ty.transpose()[0])\n", | |
" r = pd.DataFrame(vy)\n", | |
" r['predicted'] = model.predict(vx)\n", | |
" return (r.predicted - r[0]).pow(2).mean()\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from sklearn import linear_model\n", | |
"from sklearn.ensemble import RandomForestRegressor\n", | |
"from sklearn.svm import SVR" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"models = [\n", | |
" linear_model.LinearRegression(),\n", | |
" RandomForestRegressor(),\n", | |
" linear_model.Lasso(),\n", | |
" SVR(kernel='rbf'),\n", | |
" SVR(kernel='linear')\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"LinearRegression 1.9294854828388685e-30\n", | |
"RandomForestRegressor 0.03130526902061943\n", | |
"Lasso 5.718565928185423e-07\n", | |
"SVR 100.3770700128741\n", | |
"SVR 0.0010088304997091927\n" | |
] | |
} | |
], | |
"source": [ | |
"for model in models:\n", | |
" name = str(model)\n", | |
" name = name[:name.find('(')]\n", | |
" print(name, test_model(model))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda env:python3]", | |
"language": "python", | |
"name": "conda-env-python3-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment