Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jason-curtis/10286720 to your computer and use it in GitHub Desktop.
Save jason-curtis/10286720 to your computer and use it in GitHub Desktop.
Statsmodels Issue 987 - formula support in wls_prediction_std (2)
{
"metadata": {
"name": "",
"signature": "sha256:d95c278a4f5c235b14b85ab9c6888ff0e0b56e3637c67e326a84f15ed991a89e"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Lack of formula support in wls_prediction_std"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://github.com/statsmodels/statsmodels/issues/987\n",
"\n",
"Is it possible to generate exog such that wls_prediction_std will accept it? Or is this just a bug/missing feature?"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from scipy import stats\n",
"import pandas as pd\n",
"import numpy as np\n",
"import statsmodels.formula.api as sm\n",
"\n",
"# The important one\n",
"from statsmodels.sandbox.regression.predstd import wls_prediction_std\n",
"\n",
"d = pd.DataFrame({'x': np.random.randn(100) })\n",
"d['y'] = d.x + np.random.randn(100)\n",
"f = sm.ols('y~x', d).fit()\n",
"\n",
"new_exog=pd.DataFrame({'x':[0.5]})"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# This works, which makes me think that this format for new_exog should be sufficient\n",
"print f.predict(new_exog)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[ 0.36164212]\n"
]
}
],
"prompt_number": 43
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# ...But this doesn't work\n",
"print wls_prediction_std(f, new_exog)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "wrong shape of exog",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-44-723c5af991d8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# ...But this doesn't work\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mprint\u001b[0m \u001b[0mwls_prediction_std\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnew_exog\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m/usr/local/lib/python2.7/dist-packages/statsmodels/sandbox/regression/predstd.pyc\u001b[0m in \u001b[0;36mwls_prediction_std\u001b[1;34m(res, exog, weights, alpha)\u001b[0m\n\u001b[0;32m 79\u001b[0m \u001b[0mexog\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0matleast_2d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexog\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcovb\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[0mexog\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 81\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'wrong shape of exog'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 82\u001b[0m \u001b[0mpredicted\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mres\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mres\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexog\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mValueError\u001b[0m: wrong shape of exog"
]
}
],
"prompt_number": 44
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Workaround 1\n",
"=====\n",
"\n",
"Steal the exog transformation wrapper from model.predict()"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def transform_exog_to_model(fit, exog):\n",
" transform=True\n",
" self=fit\n",
" \n",
" # The following is lifted straight from statsmodels.base.model.Results.predict()\n",
" if transform and hasattr(self.model, 'formula') and exog is not None:\n",
" from patsy import dmatrix\n",
" exog = dmatrix(self.model.data.orig_exog.design_info.builder,\n",
" exog)\n",
"\n",
" if exog is not None:\n",
" exog = np.asarray(exog)\n",
" if exog.ndim == 1 and (self.model.exog.ndim == 1 or\n",
" self.model.exog.shape[1] == 1):\n",
" exog = exog[:, None]\n",
" exog = np.atleast_2d(exog) # needed in count model shape[1]\n",
" \n",
" # end lifted code\n",
" return exog"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 45
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print new_exog"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" x\n",
"0 0.5\n",
"\n",
"[1 rows x 1 columns]\n"
]
}
],
"prompt_number": 46
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"transformed_exog = transform_exog_to_model(f, new_exog)\n",
"print transformed_exog"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[[ 1. 0.5]]\n"
]
}
],
"prompt_number": 47
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print wls_prediction_std(f, transformed_exog, weights=[1])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(array([ 0.99865096]), array([-1.62014821]), array([ 2.34343245]))\n"
]
}
],
"prompt_number": 49
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Better fix\n",
"====\n",
"\n",
"wls_prediction_std should actually use Result.predict"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def wls_prediction_std(res, exog=None, weights=None, alpha=0.05):\n",
" covb = res.cov_params()\n",
" if exog is None:\n",
" exog = res.model.exog\n",
" predicted = res.fittedvalues\n",
" if weights is None:\n",
" weights = res.model.weights\n",
" else:\n",
" # Using res.predict() is less hacky than using res.model.predict()\n",
" predicted = res.predict(exog)\n",
"\n",
" if covb.shape[1] != exog.shape[1]:\n",
" # raise ValueError('wrong shape of exog') # Nope\n",
" # Instead of panicking/raising ValueError in this case, transform \n",
" # the exog appropriately so that the interval calculations below can\n",
" # function\n",
" exog = transform_exog_to_model(res, exog)\n",
"\n",
" if weights is None:\n",
" weights = 1.\n",
" else:\n",
" weights = np.asarray(weights)\n",
" if len(weights) > 1 and len(weights) != exog.shape[0]:\n",
" raise ValueError('weights and exog do not have matching shape')\n",
"\n",
" # The rest of this is unaltered\n",
"\n",
" if weights is None:\n",
" weights = res.model.weights\n",
"\n",
"\n",
" # full covariance:\n",
" #predvar = res3.mse_resid + np.diag(np.dot(X2,np.dot(covb,X2.T)))\n",
" # predication variance only\n",
" predvar = res.mse_resid/weights + (exog * np.dot(covb, exog.T).T).sum(1)\n",
" predstd = np.sqrt(predvar)\n",
" tppf = stats.t.isf(alpha/2., res.df_resid)\n",
" interval_u = predicted + tppf * predstd\n",
" interval_l = predicted - tppf * predstd\n",
" return predstd, interval_l, interval_u"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 55
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# And now, does this work?\n",
"print wls_prediction_std(f, new_exog)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(array([ 0.99865096]), array([-1.62014821]), array([ 2.34343245]))\n"
]
}
],
"prompt_number": 58
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment