Skip to content

Instantly share code, notes, and snippets.

@jiffyclub
Created May 7, 2014 18:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jiffyclub/cd8e26542758e4e20436 to your computer and use it in GitHub Desktop.
Save jiffyclub/cd8e26542758e4e20436 to your computer and use it in GitHub Desktop.
Comparison of performance using statsmodels.formula.api and statsmodels.api.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:cda612edbb49dc07450f70b9797e0bf67b9d6ae2e7bcb0cf46aa2645bdc961e1"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"import numpy as np\n",
"import statsmodels.api as sm\n",
"import statsmodels.formula.api as smf"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"patsy_exp = 'a ~ np.exp(b) + np.log(c)'"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 41
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"num = 5000000\n",
"df = pd.DataFrame({'a': np.linspace(10, num, num), \n",
" 'b': np.log(np.linspace(500, num, num)),\n",
" 'c': np.exp(np.linspace(1, 100, num))})"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 60
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit fresult = smf.ols(formula=patsy_exp, data=df).fit()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1 loops, best of 3: 2.21 s per loop\n"
]
}
],
"prompt_number": 61
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%timeit\n",
"df2 = pd.DataFrame({'a': df.a,\n",
" 'b': df.b.apply(np.exp),\n",
" 'c': df.c.apply(np.log)})\n",
"result = sm.OLS(df2.a, df2[['b', 'c']]).fit()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1 loops, best of 3: 664 ms per loop\n"
]
}
],
"prompt_number": 62
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"test_df = pd.DataFrame({'b': [2], 'c': [3]})\n",
"test_df2 = pd.DataFrame({'b': np.exp([2]), 'c': np.log([3])})"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 57
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"smf.ols(formula=patsy_exp, data=df).fit().predict(test_df)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 58,
"text": [
"array([-530.96187943])"
]
}
],
"prompt_number": 58
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sm.OLS(df2.a, df2[['b', 'c']]).fit().predict(test_df2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 59,
"text": [
"array([-589.93971495])"
]
}
],
"prompt_number": 59
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment