Skip to content

Instantly share code, notes, and snippets.

@bicubic
Last active January 8, 2018 15:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bicubic/97cec216d2e1e4c093e04ccc738910e3 to your computer and use it in GitHub Desktop.
Save bicubic/97cec216d2e1e4c093e04ccc738910e3 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def make_data(samples, x_dims):\n",
" global x, y, tx, ty, vx, vy, features\n",
" y = pd.DataFrame({'y': np.random.normal(0, 10, size=samples)\n",
" })\n",
"\n",
" x = pd.DataFrame()\n",
"\n",
" for i in range(x_dims):\n",
" x['f{0}'.format(i)] = np.random.normal(0, 10, size=samples)\n",
" x['f0'] = y * 123.\n",
" \n",
" HELD_OUT_RATIO = 0.2\n",
" slicer = int(len(x) * HELD_OUT_RATIO)\n",
"\n",
" train_x = x[slicer:]\n",
" train_y = y[slicer:]\n",
"\n",
" val_x = x[:slicer]\n",
" val_y = y[:slicer]\n",
" \n",
" tx = np.array(train_x.values)\n",
" tx = tx.reshape((int(len(train_x)/1), x_dims))\n",
" ty = train_y.values\n",
" ty = ty.reshape((len(ty), 1))\n",
" vx = np.array(val_x.values)\n",
" vx = vx.reshape((int(len(val_x)/1), x_dims))\n",
" vy = val_y.values\n",
" vy = vy.reshape((len(vy), 1))\n",
" features = (0,x_dims)\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>f0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-1372.739221</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-243.376695</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2160.631416</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-451.650977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>-2080.222461</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" f0\n",
"0 -1372.739221\n",
"1 -243.376695\n",
"2 2160.631416\n",
"3 -451.650977\n",
"4 -2080.222461"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"make_data(samples=1000, x_dims=1).head(5)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def test_model(model):\n",
" model.fit(tx, ty.transpose()[0])\n",
" r = pd.DataFrame(vy)\n",
" r['predicted'] = model.predict(vx)\n",
" return (r.predicted - r[0]).pow(2).mean()\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn import linear_model\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.svm import SVR"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"models = [\n",
" linear_model.LinearRegression(),\n",
" RandomForestRegressor(),\n",
" linear_model.Lasso(),\n",
" SVR(kernel='rbf'),\n",
" SVR(kernel='linear')\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LinearRegression 1.9294854828388685e-30\n",
"RandomForestRegressor 0.03130526902061943\n",
"Lasso 5.718565928185423e-07\n",
"SVR 100.3770700128741\n",
"SVR 0.0010088304997091927\n"
]
}
],
"source": [
"for model in models:\n",
" name = str(model)\n",
" name = name[:name.find('(')]\n",
" print(name, test_model(model))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:python3]",
"language": "python",
"name": "conda-env-python3-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment