Skip to content

Instantly share code, notes, and snippets.

@dschien
Created July 6, 2015 16:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dschien/66b8764c4ad1b7ed4f33 to your computer and use it in GitHub Desktop.
Save dschien/66b8764c4ad1b7ed4f33 to your computer and use it in GitHub Desktop.
sklearn.linear_model.LinearRegression vs statsmodel OLS
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import scipy as sp\n",
"import statsmodels.api as sm\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.linear_model import LinearRegression\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Data from R ISLR package - write.csv(Boston, \"Boston.csv\", col.names = FALSE)\n",
"boston_df = pd.read_csv(\"../../r/Boston.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(47.117263854857882,\n",
" array([ -3.05335819e+09, 3.05335819e+09, 9.31299461e-02,\n",
" -3.29341722e+00]))"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# fitting medv ~ lstat + I(lstat^2)\n",
"boston_df[\"lstat^2\"] = boston_df[\"lstat\"] ** 2\n",
"# fitting medv ~ poly(lstat,4). We already have lstat^2 and lstat from previous\n",
"boston_df[\"lstat^4\"] = np.power(boston_df[\"lstat\"], 4)\n",
"boston_df[\"lstat^3\"] = np.power(boston_df[\"lstat\"], 4)\n",
"X = boston_df[[\"lstat^4\", \"lstat^3\", \"lstat^2\", \"lstat\"]]\n",
"y = boston_df[\"medv\"]\n",
"reg7 = LinearRegression()\n",
"reg7.fit(X, y)\n",
"(reg7.intercept_, reg7.coef_)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# X = boston_df[[\"lstat^4\", \"lstat^3\", \"lstat^2\", \"lstat\"]]\n",
"X = sm.add_constant(X)\n",
"# X = boston_df[[1., \"lstat^4\", \"lstat^3\", \"lstat^2\", \"lstat\"]]\n",
"ols = sm.OLS(y,X).fit()\n",
"# ols.summary()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eps = 0.0000000001\n",
"np.all(np.abs(ols.params.values[1:] - reg7.coef_) < eps)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([ -1.17513710e-05, -1.17509020e-05, 9.23027375e-02,\n",
" -3.27115207e+00])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ols.params.values[1:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment