Skip to content

Instantly share code, notes, and snippets.

@bashtage
Created July 30, 2020 18:13
Show Gist options
  • Save bashtage/e7c5f7e66e3e2c38fbfdd3dfbdcaf658 to your computer and use it in GitHub Desktop.
Save bashtage/e7c5f7e66e3e2c38fbfdd3dfbdcaf658 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"# with QR(q) we should have (1 - q)% of the data points above the quantile regression line\n",
"# unfortunately this is not the case for the statsmodel implementation\n",
"# based on https://www.statsmodels.org/dev/examples/notebooks/generated/quantile_regression.html\n",
"import pandas as pd\n",
"import statsmodels.api as sm\n",
"import numpy as np\n",
"\n",
"data = sm.datasets.engel.load_pandas().data\n",
"data = sm.add_constant(data) # add 'const' column with all 1's\n",
"data['income2'] = np.sqrt(data['income'].values)\n",
"\n",
"def compute_foc(q, err, x):\n",
" sgn = np.sign(err)\n",
" foc = (1-q) * sgn[:,None] * (sgn[:,None] < 0) * x + q * sgn[:,None] * (sgn[:,None] > 0) * x\n",
" return np.mean(foc, axis=0)\n",
" \n",
"df = {}\n",
"\n",
"def get_smcnt(q, data, xcols):\n",
" # count points above the regression line dor the statsmodel implementation\n",
" # prints OK if counts match theoretical value, otherwise prints ERROR\n",
" # q: quantile\n",
" # data: DF with X and y\n",
" # xcols: cols used for X\n",
" y = np.log(data['foodexp'].values)\n",
" X = data[xcols].values\n",
" qmdl = sm.QuantReg(y, X)\n",
" res = qmdl.fit(q)\n",
" p = np.asarray(res.params)\n",
" data['foodexp_pred'] = qmdl.predict(res.params, exog=X) # in sample prediction\n",
" err = y - data['foodexp_pred']\n",
" foc = compute_foc(q, err, X)\n",
" w = data.apply(lambda x: 1 if np.log(x['foodexp']) > x['foodexp_pred'] else 0, axis=1) # 1 if above the QR line\n",
" p = (1 - q) * len(data) # 1 - q data fraction\n",
" expected = np.round(p, 0)\n",
" actual = w.sum()\n",
" result = 'ERROR' if np.abs((expected - actual) / actual) > 0.05 else 'OK'\n",
" foc_str = \", \".join(f\"{v:0.3f}\" for v in foc)\n",
" print('xcols: ' + str(xcols) + \n",
" ' expected count: ' + str(expected) + \n",
" ' actual count: ' + str(actual) + ' result: ' + result + \" foc: \" + foc_str)\n",
" df[(q,tuple(xcols))] = pd.Series(foc,index=xcols)\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"xcols: ['income2'] expected count: 176.0 actual count: 192 result: ERROR foc: -0.062\n",
"xcols: ['income2'] expected count: 118.0 actual count: 137 result: ERROR foc: -0.118\n",
"xcols: ['income2'] expected count: 59.0 actual count: 76 result: ERROR foc: -0.002\n",
"xcols: ['const'] expected count: 176.0 actual count: 176 result: OK foc: -0.001\n",
"xcols: ['const'] expected count: 118.0 actual count: 118 result: OK foc: 0.002\n",
"xcols: ['const'] expected count: 59.0 actual count: 59 result: OK foc: 0.001\n",
"xcols: ['income'] expected count: 176.0 actual count: 206 result: ERROR foc: -3.421\n",
"xcols: ['income'] expected count: 118.0 actual count: 160 result: ERROR foc: 2.139\n",
"xcols: ['income'] expected count: 59.0 actual count: 97 result: ERROR foc: 2.587\n",
"xcols: ['const', 'income'] expected count: 176.0 actual count: 175 result: OK foc: -0.005, -5.872\n",
"xcols: ['const', 'income'] expected count: 118.0 actual count: 118 result: OK foc: 0.002, 2.372\n",
"xcols: ['const', 'income'] expected count: 59.0 actual count: 60 result: OK foc: 0.005, 5.301\n",
"xcols: ['income', 'income2'] expected count: 176.0 actual count: 179 result: OK foc: 1.056, 0.041\n",
"xcols: ['income', 'income2'] expected count: 118.0 actual count: 122 result: OK foc: -0.383, -0.013\n",
"xcols: ['income', 'income2'] expected count: 59.0 actual count: 64 result: ERROR foc: -1.162, -0.037\n",
"xcols: ['const', 'income', 'income2'] expected count: 176.0 actual count: 176 result: OK foc: -0.001, -3.504, -0.067\n",
"xcols: ['const', 'income', 'income2'] expected count: 118.0 actual count: 118 result: OK foc: 0.002, 9.070, 0.129\n",
"xcols: ['const', 'income', 'income2'] expected count: 59.0 actual count: 59 result: OK foc: 0.001, -2.475, -0.002\n"
]
}
],
"source": [
"# with the statsmodels QR, some counts are off\n",
"for xcols in [['income2'], ['const'], ['income'], \n",
" ['const', 'income'], ['income', 'income2'], ['const', 'income', 'income2' ]]:\n",
" _ = [get_smcnt(q, data, xcols) for q in [0.25, 0.5, 0.75]]\n",
"sm_scores = pd.DataFrame(df).T"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"# try scikit-learn QR implementation\n",
"from sklearn.ensemble import GradientBoostingRegressor\n",
"\n",
"df = {}\n",
"def get_skcnt(q, data, xcols):\n",
" # count points above the regression line dor the scikit-learn QR implementation\n",
" # prints OK if counts match theoretical value, otherwise prints ERROR\n",
" # q: quantile\n",
" # data: DF with X and y\n",
" # xcols: cols used for X\n",
" y = np.log(data['foodexp'].values)\n",
" X = data[xcols].values\n",
" reg = GradientBoostingRegressor(loss='quantile', alpha=q, random_state=0)\n",
" reg.fit(X, y)\n",
" data['foodexp_pred'] = reg.predict(X) # in sample prediction\n",
" err = y - data['foodexp_pred']\n",
" foc = compute_foc(q, err, X)\n",
" w = data.apply(lambda x: 1 if np.log(x['foodexp']) > x['foodexp_pred'] else 0, axis=1) # 1 if above the QR line\n",
" p = (1 - q) * len(data) # 1 - q data fraction\n",
" expected = np.round(p, 0)\n",
" actual = w.sum()\n",
" result = 'ERROR' if np.abs((expected - actual) / actual) > 0.05 else 'OK'\n",
" foc_str = \", \".join(f\"{v:0.3f}\" for v in foc)\n",
" print('xcols: ' + str(xcols) + \n",
" ' expected count: ' + str(expected) + \n",
" ' actual count: ' + str(actual) + ' result: ' + result + \" foc: \" + str(foc_str))\n",
" df[(q,tuple(xcols))] = pd.Series(foc,index=xcols)\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"xcols: ['income2'] expected count: 176.0 actual count: 176 result: OK foc: 0.148\n",
"xcols: ['income2'] expected count: 118.0 actual count: 121 result: OK foc: 0.769\n",
"xcols: ['income2'] expected count: 59.0 actual count: 57 result: OK foc: -0.234\n",
"xcols: ['const'] expected count: 176.0 actual count: 176 result: OK foc: -0.001\n",
"xcols: ['const'] expected count: 118.0 actual count: 117 result: OK foc: 0.000\n",
"xcols: ['const'] expected count: 59.0 actual count: 59 result: OK foc: 0.001\n",
"xcols: ['income'] expected count: 176.0 actual count: 176 result: OK foc: 11.176\n",
"xcols: ['income'] expected count: 118.0 actual count: 121 result: OK foc: 40.070\n",
"xcols: ['income'] expected count: 59.0 actual count: 57 result: OK foc: -9.010\n",
"xcols: ['const', 'income'] expected count: 176.0 actual count: 176 result: OK foc: -0.001, 11.176\n",
"xcols: ['const', 'income'] expected count: 118.0 actual count: 121 result: OK foc: 0.015, 40.070\n",
"xcols: ['const', 'income'] expected count: 59.0 actual count: 57 result: OK foc: -0.007, -9.010\n",
"xcols: ['income', 'income2'] expected count: 176.0 actual count: 176 result: OK foc: 11.176, 0.148\n",
"xcols: ['income', 'income2'] expected count: 118.0 actual count: 121 result: OK foc: 40.070, 0.769\n",
"xcols: ['income', 'income2'] expected count: 59.0 actual count: 57 result: OK foc: -9.010, -0.234\n",
"xcols: ['const', 'income', 'income2'] expected count: 176.0 actual count: 176 result: OK foc: -0.001, 11.176, 0.148\n",
"xcols: ['const', 'income', 'income2'] expected count: 118.0 actual count: 121 result: OK foc: 0.015, 40.070, 0.769\n",
"xcols: ['const', 'income', 'income2'] expected count: 59.0 actual count: 57 result: OK foc: -0.007, -9.010, -0.234\n"
]
}
],
"source": [
"# with the scikit-learn QR all counts are correct\n",
"for xcols in [['income2'], ['const'], ['income'], \n",
" ['const', 'income'], ['income', 'income2'], ['const', 'income', 'income2' ]]:\n",
" _ = [get_skcnt(q, data, xcols) for q in [0.25, 0.5, 0.75]]\n",
"skl_scores = pd.DataFrame(df).T"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>const</th>\n",
" <th>income</th>\n",
" <th>income2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(income2,)</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>-0.0617371</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(income2,)</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>-0.118013</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(income2,)</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>-0.00183331</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(const,)</th>\n",
" <td>-0.00106383</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(const,)</th>\n",
" <td>0.00212766</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(const,)</th>\n",
" <td>0.00106383</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(income,)</th>\n",
" <td></td>\n",
" <td>-3.4206</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(income,)</th>\n",
" <td></td>\n",
" <td>2.13916</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(income,)</th>\n",
" <td></td>\n",
" <td>2.58682</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(const, income)</th>\n",
" <td>-0.00531915</td>\n",
" <td>-5.87161</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(const, income)</th>\n",
" <td>0.00212766</td>\n",
" <td>2.37231</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(const, income)</th>\n",
" <td>0.00531915</td>\n",
" <td>5.30085</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(income, income2)</th>\n",
" <td></td>\n",
" <td>1.05618</td>\n",
" <td>0.0414931</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(income, income2)</th>\n",
" <td></td>\n",
" <td>-0.383333</td>\n",
" <td>-0.0130059</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(income, income2)</th>\n",
" <td></td>\n",
" <td>-1.16205</td>\n",
" <td>-0.0374565</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(const, income, income2)</th>\n",
" <td>-0.00106383</td>\n",
" <td>-3.50431</td>\n",
" <td>-0.0672399</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(const, income, income2)</th>\n",
" <td>0.00212766</td>\n",
" <td>9.07006</td>\n",
" <td>0.128895</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(const, income, income2)</th>\n",
" <td>0.00106383</td>\n",
" <td>-2.47476</td>\n",
" <td>-0.002353</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" const income income2\n",
"0.25 (income2,) -0.0617371\n",
"0.50 (income2,) -0.118013\n",
"0.75 (income2,) -0.00183331\n",
"0.25 (const,) -0.00106383 \n",
"0.50 (const,) 0.00212766 \n",
"0.75 (const,) 0.00106383 \n",
"0.25 (income,) -3.4206 \n",
"0.50 (income,) 2.13916 \n",
"0.75 (income,) 2.58682 \n",
"0.25 (const, income) -0.00531915 -5.87161 \n",
"0.50 (const, income) 0.00212766 2.37231 \n",
"0.75 (const, income) 0.00531915 5.30085 \n",
"0.25 (income, income2) 1.05618 0.0414931\n",
"0.50 (income, income2) -0.383333 -0.0130059\n",
"0.75 (income, income2) -1.16205 -0.0374565\n",
"0.25 (const, income, income2) -0.00106383 -3.50431 -0.0672399\n",
"0.50 (const, income, income2) 0.00212766 9.07006 0.128895\n",
"0.75 (const, income, income2) 0.00106383 -2.47476 -0.002353"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sm_scores.astype(\"object\").fillna(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>const</th>\n",
" <th>income</th>\n",
" <th>income2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(income2,)</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>0.148151</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(income2,)</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>0.769132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(income2,)</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>-0.23405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(const,)</th>\n",
" <td>-0.00106383</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(const,)</th>\n",
" <td>0</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(const,)</th>\n",
" <td>0.00106383</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(income,)</th>\n",
" <td></td>\n",
" <td>11.1759</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(income,)</th>\n",
" <td></td>\n",
" <td>40.0704</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(income,)</th>\n",
" <td></td>\n",
" <td>-9.01015</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(const, income)</th>\n",
" <td>-0.00106383</td>\n",
" <td>11.1759</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(const, income)</th>\n",
" <td>0.0148936</td>\n",
" <td>40.0704</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(const, income)</th>\n",
" <td>-0.00744681</td>\n",
" <td>-9.01015</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(income, income2)</th>\n",
" <td></td>\n",
" <td>11.1759</td>\n",
" <td>0.148151</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(income, income2)</th>\n",
" <td></td>\n",
" <td>40.0704</td>\n",
" <td>0.769132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(income, income2)</th>\n",
" <td></td>\n",
" <td>-9.01015</td>\n",
" <td>-0.23405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(const, income, income2)</th>\n",
" <td>-0.00106383</td>\n",
" <td>11.1759</td>\n",
" <td>0.148151</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(const, income, income2)</th>\n",
" <td>0.0148936</td>\n",
" <td>40.0704</td>\n",
" <td>0.769132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(const, income, income2)</th>\n",
" <td>-0.00744681</td>\n",
" <td>-9.01015</td>\n",
" <td>-0.23405</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" const income income2\n",
"0.25 (income2,) 0.148151\n",
"0.50 (income2,) 0.769132\n",
"0.75 (income2,) -0.23405\n",
"0.25 (const,) -0.00106383 \n",
"0.50 (const,) 0 \n",
"0.75 (const,) 0.00106383 \n",
"0.25 (income,) 11.1759 \n",
"0.50 (income,) 40.0704 \n",
"0.75 (income,) -9.01015 \n",
"0.25 (const, income) -0.00106383 11.1759 \n",
"0.50 (const, income) 0.0148936 40.0704 \n",
"0.75 (const, income) -0.00744681 -9.01015 \n",
"0.25 (income, income2) 11.1759 0.148151\n",
"0.50 (income, income2) 40.0704 0.769132\n",
"0.75 (income, income2) -9.01015 -0.23405\n",
"0.25 (const, income, income2) -0.00106383 11.1759 0.148151\n",
"0.50 (const, income, income2) 0.0148936 40.0704 0.769132\n",
"0.75 (const, income, income2) -0.00744681 -9.01015 -0.23405"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"skl_scores.astype(\"object\").fillna(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>const</th>\n",
" <th>income</th>\n",
" <th>income2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(income2,)</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(income2,)</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(income2,)</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(const,)</th>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(const,)</th>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(const,)</th>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(income,)</th>\n",
" <td></td>\n",
" <td>True</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(income,)</th>\n",
" <td></td>\n",
" <td>True</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(income,)</th>\n",
" <td></td>\n",
" <td>True</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(const, income)</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(const, income)</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(const, income)</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(income, income2)</th>\n",
" <td></td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(income, income2)</th>\n",
" <td></td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(income, income2)</th>\n",
" <td></td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.25</th>\n",
" <th>(const, income, income2)</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.50</th>\n",
" <th>(const, income, income2)</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.75</th>\n",
" <th>(const, income, income2)</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" const income income2\n",
"0.25 (income2,) True\n",
"0.50 (income2,) True\n",
"0.75 (income2,) True\n",
"0.25 (const,) False \n",
"0.50 (const,) False \n",
"0.75 (const,) False \n",
"0.25 (income,) True \n",
"0.50 (income,) True \n",
"0.75 (income,) True \n",
"0.25 (const, income) False True \n",
"0.50 (const, income) True True \n",
"0.75 (const, income) True True \n",
"0.25 (income, income2) True True\n",
"0.50 (income, income2) True True\n",
"0.75 (income, income2) True True\n",
"0.25 (const, income, income2) False True True\n",
"0.50 (const, income, income2) True True True\n",
"0.75 (const, income, income2) True True True"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"smaller = np.abs(sm_scores) < np.abs(skl_scores)\n",
"smaller.where(sm_scores.notnull()).astype(pd.BooleanDtype()).astype(\"object\").fillna(\"\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment