Skip to content

Instantly share code, notes, and snippets.

@kmike
Last active August 3, 2017 14:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kmike/f978d29a250070a7236c0485b1b1f182 to your computer and use it in GitHub Desktop.
Save kmike/f978d29a250070a7236c0485b1b1f182 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
"from sklearn.svm import LinearSVC, LinearSVR\n",
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
"from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor\n",
"from sklearn.datasets import load_boston, load_iris, load_diabetes, make_classification, make_regression\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.pipeline import make_pipeline, Pipeline\n",
"from sklearn.metrics import r2_score\n",
"import numpy as np\n",
"import pandas as pd\n",
"from scipy.stats import spearmanr, pearsonr\n",
"\n",
"import eli5\n",
"from eli5.sklearn import PermutationImportance"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def dcg_score(y_true, y_score, k=10):\n",
" order = np.argsort(y_score)[::-1]\n",
" y_true = np.take(y_true, order[:k])\n",
" gains = 2 ** y_true - 1\n",
" # highest rank is 1 so +2 instead of +1\n",
" discounts = np.log2(np.arange(len(y_true)) + 2)\n",
" return np.sum(gains / discounts)\n",
"\n",
"def ndcg_score(y_true, y_score, k=10):\n",
" best = dcg_score(y_true, y_true, k)\n",
" actual = dcg_score(y_true, y_score, k)\n",
" return actual / best"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_classification_datasets():\n",
" res = []\n",
"\n",
" data = load_iris()\n",
" res.append(('iris_binary', data.data, data.target != 0, data.feature_names))\n",
" \n",
" X, y = make_classification(n_informative=5, n_redundant=0)\n",
" res.append(('CLF(n_informative=5, n_redundant=0)', X, y, None))\n",
" \n",
" X, y = make_classification(n_informative=5, n_redundant=4)\n",
" res.append(('CLF(n_informative=5, n_redundant=4)', X, y, None))\n",
"\n",
" X, y = make_classification(n_informative=1, n_redundant=4, n_clusters_per_class=1)\n",
" res.append(('CLF(n_informative=1, n_redundant=4)', X, y, None))\n",
"\n",
" X, y = make_classification(n_informative=20, n_redundant=0)\n",
" res.append(('CLF(n_informative=20, n_redundant=0)', X, y, None))\n",
"\n",
" return res\n",
"\n",
"\n",
"def get_regression_datasets():\n",
" res = []\n",
" \n",
" data = load_boston()\n",
" res.append(('boston', data.data, data.target, data.feature_names))\n",
" \n",
" data = load_diabetes()\n",
" res.append(('diabetese', data.data, data.target, None))\n",
" \n",
" X, y = make_regression(n_informative=5)\n",
" res.append(('REG(n_informative=5)', X, y, None))\n",
" \n",
" X, y = make_regression(n_informative=5, effective_rank=2)\n",
" res.append(('REG(n_informative=5, effective_rank=2)', X, y, None))\n",
"\n",
" X, y = make_regression(n_informative=1)\n",
" res.append(('REG(n_informative=1)', X, y, None))\n",
"\n",
" X, y = make_regression(n_informative=20)\n",
" res.append(('REG(n_informative=20)', X, y, None))\n",
"\n",
" return res\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_classifiers():\n",
" return [\n",
" LogisticRegression(),\n",
" LinearSVC(),\n",
" RandomForestClassifier(),\n",
" DecisionTreeClassifier(),\n",
" ]\n",
"\n",
"\n",
"def get_regressors():\n",
" return [\n",
" make_pipeline(StandardScaler(), LinearRegression()),\n",
" make_pipeline(StandardScaler(), LinearSVR()),\n",
" RandomForestRegressor(),\n",
" DecisionTreeRegressor(),\n",
" ]\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_explanations(est, X, y, feature_names):\n",
" df_inspect = eli5.explain_weights_df(est, feature_names=feature_names, top=100)\n",
" if isinstance(df_inspect.index, pd.MultiIndex):\n",
" df_inspect.index = df_inspect.index.droplevel()\n",
" df_inspect.index.name = None\n",
" \n",
" pi = PermutationImportance(est, cv='prefit', n_iter=10).fit(X, y)\n",
" df_pi = eli5.explain_weights_df(pi, feature_names=feature_names, top=100)\n",
" \n",
" pi_cv = PermutationImportance(est, cv=5, n_iter=10).fit(X, y)\n",
" df_picv = eli5.explain_weights_df(pi_cv, feature_names=feature_names, top=100)\n",
" \n",
" df = pd.concat([df_inspect.weight, df_pi.weight, df_picv.weight], axis=1)\n",
" df.columns=['w_inspect', 'w_pi', 'w_picv']\n",
" df = df.dropna() / df.abs().sum()\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_scores(df):\n",
" w_inspect_abs = df.w_inspect.abs().values\n",
" def _scores(column):\n",
" return {\n",
" 'SpearmanR': spearmanr(w_inspect_abs, column.values).correlation,\n",
" 'NDCG': ndcg_score(w_inspect_abs, column.values, 100000),\n",
" 'NDCG@5': ndcg_score(w_inspect_abs, column.values, 5),\n",
" 'Pearson': pearsonr(w_inspect_abs, column.values)[0],\n",
"# 'R^2': r2_score(w_inspect_abs, column.values),\n",
" 'L2': np.linalg.norm(w_inspect_abs - column.values),\n",
" }\n",
" return {\n",
" 'PI': _scores(df.w_pi),\n",
" 'PICV': _scores(df.w_picv),\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"done: LogisticRegression iris_binary\n",
"done: LinearSVC iris_binary\n",
"done: RandomForestClassifier iris_binary\n",
"done: DecisionTreeClassifier iris_binary\n",
"done: LogisticRegression CLF(n_informative=5, n_redundant=0)\n",
"done: LinearSVC CLF(n_informative=5, n_redundant=0)\n",
"done: RandomForestClassifier CLF(n_informative=5, n_redundant=0)\n",
"done: DecisionTreeClassifier CLF(n_informative=5, n_redundant=0)\n",
"done: LogisticRegression CLF(n_informative=5, n_redundant=4)\n",
"done: LinearSVC CLF(n_informative=5, n_redundant=4)\n",
"done: RandomForestClassifier CLF(n_informative=5, n_redundant=4)\n",
"done: DecisionTreeClassifier CLF(n_informative=5, n_redundant=4)\n",
"done: LogisticRegression CLF(n_informative=1, n_redundant=4)\n",
"done: LinearSVC CLF(n_informative=1, n_redundant=4)\n",
"done: RandomForestClassifier CLF(n_informative=1, n_redundant=4)\n",
"done: DecisionTreeClassifier CLF(n_informative=1, n_redundant=4)\n",
"done: LogisticRegression CLF(n_informative=20, n_redundant=0)\n",
"done: LinearSVC CLF(n_informative=20, n_redundant=0)\n",
"done: RandomForestClassifier CLF(n_informative=20, n_redundant=0)\n",
"done: DecisionTreeClassifier CLF(n_informative=20, n_redundant=0)\n",
"done: LinearRegression boston\n",
"done: LinearSVR boston\n",
"done: RandomForestRegressor boston\n",
"done: DecisionTreeRegressor boston\n",
"done: LinearRegression diabetese\n",
"done: LinearSVR diabetese\n",
"done: RandomForestRegressor diabetese\n",
"done: DecisionTreeRegressor diabetese\n",
"done: LinearRegression REG(n_informative=5)\n",
"done: LinearSVR REG(n_informative=5)\n",
"done: RandomForestRegressor REG(n_informative=5)\n",
"done: DecisionTreeRegressor REG(n_informative=5)\n",
"done: LinearRegression REG(n_informative=5, effective_rank=2)\n",
"done: LinearSVR REG(n_informative=5, effective_rank=2)\n",
"done: RandomForestRegressor REG(n_informative=5, effective_rank=2)\n",
"done: DecisionTreeRegressor REG(n_informative=5, effective_rank=2)\n",
"done: LinearRegression REG(n_informative=1)\n",
"done: LinearSVR REG(n_informative=1)\n",
"done: RandomForestRegressor REG(n_informative=1)\n",
"done: DecisionTreeRegressor REG(n_informative=1)\n",
"done: LinearRegression REG(n_informative=20)\n",
"done: LinearSVR REG(n_informative=20)\n",
"done: RandomForestRegressor REG(n_informative=20)\n",
"done: DecisionTreeRegressor REG(n_informative=20)\n"
]
}
],
"source": [
"def get_name(est):\n",
" if isinstance(est, Pipeline):\n",
" est = est.steps[-1][1]\n",
" return est.__class__.__name__\n",
"\n",
"dfs = []\n",
"estimators = {}\n",
"scores = []\n",
"\n",
"def _append(X, y, feature_names, dataset_name, est):\n",
" est.fit(X, y)\n",
" df = get_explanations(est, X, y, feature_names)\n",
" name = get_name(est)\n",
" estimators[name, dataset_name] = est\n",
" dfs.append((name, dataset_name, df))\n",
" for k, v in get_scores(df).items():\n",
" scores.append((name, dataset_name, k, v))\n",
" print(\"done: {} {}\".format(name, dataset_name))\n",
" \n",
"\n",
"for (dataset_name, X, y, feature_names) in get_classification_datasets():\n",
" for clf in get_classifiers():\n",
" _append(X, y, feature_names, dataset_name, clf)\n",
" \n",
"for (dataset_name, X, y, feature_names) in get_regression_datasets():\n",
" for reg in get_regressors():\n",
" _append(X, y, feature_names, dataset_name, reg)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>L2</th>\n",
" <th>NDCG</th>\n",
" <th>NDCG@5</th>\n",
" <th>Pearson</th>\n",
" <th>SpearmanR</th>\n",
" <th>dataset</th>\n",
" <th>estimator</th>\n",
" <th>type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.672614</td>\n",
" <td>0.984817</td>\n",
" <td>0.984817</td>\n",
" <td>0.833270</td>\n",
" <td>0.632456</td>\n",
" <td>iris_binary</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.672541</td>\n",
" <td>0.984817</td>\n",
" <td>0.984817</td>\n",
" <td>0.833273</td>\n",
" <td>0.632456</td>\n",
" <td>iris_binary</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.675399</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.872404</td>\n",
" <td>0.948683</td>\n",
" <td>iris_binary</td>\n",
" <td>LinearSVC</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.671430</td>\n",
" <td>0.999950</td>\n",
" <td>0.999950</td>\n",
" <td>0.873153</td>\n",
" <td>0.632456</td>\n",
" <td>iris_binary</td>\n",
" <td>LinearSVC</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.282843</td>\n",
" <td>0.964335</td>\n",
" <td>0.964335</td>\n",
" <td>0.968496</td>\n",
" <td>0.816497</td>\n",
" <td>iris_binary</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.109003</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.993515</td>\n",
" <td>1.000000</td>\n",
" <td>iris_binary</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>iris_binary</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.834689</td>\n",
" <td>0.630930</td>\n",
" <td>0.630930</td>\n",
" <td>0.357553</td>\n",
" <td>0.272166</td>\n",
" <td>iris_binary</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.167495</td>\n",
" <td>0.971622</td>\n",
" <td>0.965187</td>\n",
" <td>0.872985</td>\n",
" <td>0.908544</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.255409</td>\n",
" <td>0.973934</td>\n",
" <td>0.981802</td>\n",
" <td>0.844037</td>\n",
" <td>0.820918</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.123809</td>\n",
" <td>0.969013</td>\n",
" <td>0.932469</td>\n",
" <td>0.893776</td>\n",
" <td>0.934538</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>LinearSVC</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.252516</td>\n",
" <td>0.914352</td>\n",
" <td>0.784295</td>\n",
" <td>0.622974</td>\n",
" <td>0.684211</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>LinearSVC</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.322749</td>\n",
" <td>0.994469</td>\n",
" <td>0.989704</td>\n",
" <td>0.900801</td>\n",
" <td>0.885049</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0.277373</td>\n",
" <td>0.946303</td>\n",
" <td>0.900302</td>\n",
" <td>0.769257</td>\n",
" <td>0.374577</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.072285</td>\n",
" <td>0.993702</td>\n",
" <td>0.970213</td>\n",
" <td>0.981011</td>\n",
" <td>0.973103</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.260483</td>\n",
" <td>0.920732</td>\n",
" <td>0.879966</td>\n",
" <td>0.741018</td>\n",
" <td>0.298858</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.273521</td>\n",
" <td>0.981474</td>\n",
" <td>0.973298</td>\n",
" <td>0.812503</td>\n",
" <td>0.817674</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>0.264550</td>\n",
" <td>0.946682</td>\n",
" <td>0.940374</td>\n",
" <td>0.756563</td>\n",
" <td>0.520301</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>0.250067</td>\n",
" <td>0.966972</td>\n",
" <td>0.913251</td>\n",
" <td>0.768077</td>\n",
" <td>0.706813</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>LinearSVC</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>0.304935</td>\n",
" <td>0.957164</td>\n",
" <td>0.938046</td>\n",
" <td>0.744530</td>\n",
" <td>0.711278</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>LinearSVC</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0.275022</td>\n",
" <td>0.989233</td>\n",
" <td>0.981340</td>\n",
" <td>0.971646</td>\n",
" <td>0.749610</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>0.263122</td>\n",
" <td>0.955524</td>\n",
" <td>0.922541</td>\n",
" <td>0.930586</td>\n",
" <td>0.383459</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0.138652</td>\n",
" <td>0.998331</td>\n",
" <td>0.998283</td>\n",
" <td>0.976383</td>\n",
" <td>0.997411</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>0.247536</td>\n",
" <td>0.944404</td>\n",
" <td>0.877370</td>\n",
" <td>0.928444</td>\n",
" <td>0.173572</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0.336659</td>\n",
" <td>0.990104</td>\n",
" <td>0.978667</td>\n",
" <td>0.918085</td>\n",
" <td>0.875421</td>\n",
" <td>CLF(n_informative=1, n_redundant=4)</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0.401469</td>\n",
" <td>0.963876</td>\n",
" <td>0.954015</td>\n",
" <td>0.893001</td>\n",
" <td>0.221249</td>\n",
" <td>CLF(n_informative=1, n_redundant=4)</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>0.332961</td>\n",
" <td>0.992544</td>\n",
" <td>0.973712</td>\n",
" <td>0.937979</td>\n",
" <td>0.845940</td>\n",
" <td>CLF(n_informative=1, n_redundant=4)</td>\n",
" <td>LinearSVC</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>0.341660</td>\n",
" <td>0.971785</td>\n",
" <td>0.975554</td>\n",
" <td>0.895904</td>\n",
" <td>0.352145</td>\n",
" <td>CLF(n_informative=1, n_redundant=4)</td>\n",
" <td>LinearSVC</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>0.578752</td>\n",
" <td>0.492292</td>\n",
" <td>0.122312</td>\n",
" <td>-0.288927</td>\n",
" <td>0.191127</td>\n",
" <td>CLF(n_informative=1, n_redundant=4)</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0.460242</td>\n",
" <td>0.926089</td>\n",
" <td>0.853204</td>\n",
" <td>0.717342</td>\n",
" <td>0.405126</td>\n",
" <td>CLF(n_informative=1, n_redundant=4)</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>0.235361</td>\n",
" <td>0.991899</td>\n",
" <td>1.000000</td>\n",
" <td>0.927546</td>\n",
" <td>0.737489</td>\n",
" <td>REG(n_informative=5)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>0.189558</td>\n",
" <td>0.980900</td>\n",
" <td>0.999842</td>\n",
" <td>0.927359</td>\n",
" <td>0.506976</td>\n",
" <td>REG(n_informative=5)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>0.142030</td>\n",
" <td>0.990276</td>\n",
" <td>0.984540</td>\n",
" <td>0.980935</td>\n",
" <td>0.779370</td>\n",
" <td>REG(n_informative=5)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>0.124729</td>\n",
" <td>0.901260</td>\n",
" <td>0.907927</td>\n",
" <td>0.939584</td>\n",
" <td>0.163636</td>\n",
" <td>REG(n_informative=5)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>0.107740</td>\n",
" <td>0.988112</td>\n",
" <td>0.977762</td>\n",
" <td>0.975555</td>\n",
" <td>0.999033</td>\n",
" <td>REG(n_informative=5)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63</th>\n",
" <td>0.179533</td>\n",
" <td>0.911248</td>\n",
" <td>0.942287</td>\n",
" <td>0.883271</td>\n",
" <td>-0.133398</td>\n",
" <td>REG(n_informative=5)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>0.367224</td>\n",
" <td>0.999991</td>\n",
" <td>1.000000</td>\n",
" <td>0.967209</td>\n",
" <td>0.999629</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>LinearRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>0.238332</td>\n",
" <td>0.971812</td>\n",
" <td>0.959122</td>\n",
" <td>0.973212</td>\n",
" <td>0.211095</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>LinearRegression</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>0.401022</td>\n",
" <td>0.999889</td>\n",
" <td>1.000000</td>\n",
" <td>0.966992</td>\n",
" <td>0.996933</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>0.284760</td>\n",
" <td>0.980323</td>\n",
" <td>0.978061</td>\n",
" <td>0.977250</td>\n",
" <td>0.509969</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>0.258505</td>\n",
" <td>0.995616</td>\n",
" <td>0.995906</td>\n",
" <td>0.985579</td>\n",
" <td>0.833975</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69</th>\n",
" <td>0.297123</td>\n",
" <td>0.943655</td>\n",
" <td>0.944391</td>\n",
" <td>0.974220</td>\n",
" <td>0.068419</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>0.067249</td>\n",
" <td>0.999453</td>\n",
" <td>1.000000</td>\n",
" <td>0.993935</td>\n",
" <td>0.998769</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>0.164108</td>\n",
" <td>0.978136</td>\n",
" <td>0.967044</td>\n",
" <td>0.966069</td>\n",
" <td>0.342775</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>0.452882</td>\n",
" <td>0.999998</td>\n",
" <td>1.000000</td>\n",
" <td>0.998365</td>\n",
" <td>0.999654</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>LinearRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>0.314697</td>\n",
" <td>0.972598</td>\n",
" <td>0.969714</td>\n",
" <td>0.998146</td>\n",
" <td>0.065281</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>LinearRegression</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>0.624122</td>\n",
" <td>0.992905</td>\n",
" <td>0.983652</td>\n",
" <td>0.959778</td>\n",
" <td>0.900569</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>0.516824</td>\n",
" <td>0.979657</td>\n",
" <td>0.973410</td>\n",
" <td>0.962190</td>\n",
" <td>0.541348</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>0.017653</td>\n",
" <td>0.999450</td>\n",
" <td>0.998772</td>\n",
" <td>0.999990</td>\n",
" <td>0.610201</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>0.014198</td>\n",
" <td>0.997467</td>\n",
" <td>0.997146</td>\n",
" <td>0.999982</td>\n",
" <td>0.060978</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>0.003633</td>\n",
" <td>0.999976</td>\n",
" <td>0.999910</td>\n",
" <td>0.999997</td>\n",
" <td>0.997796</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>0.014089</td>\n",
" <td>0.997671</td>\n",
" <td>0.995892</td>\n",
" <td>0.999954</td>\n",
" <td>0.069229</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>0.096921</td>\n",
" <td>0.999998</td>\n",
" <td>1.000000</td>\n",
" <td>0.968106</td>\n",
" <td>0.999678</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>LinearRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>0.095981</td>\n",
" <td>0.989010</td>\n",
" <td>0.984276</td>\n",
" <td>0.932558</td>\n",
" <td>0.446691</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>LinearRegression</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>0.120204</td>\n",
" <td>0.987079</td>\n",
" <td>0.973107</td>\n",
" <td>0.899272</td>\n",
" <td>0.822808</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>0.125489</td>\n",
" <td>0.987371</td>\n",
" <td>0.973107</td>\n",
" <td>0.904354</td>\n",
" <td>0.797415</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>0.108996</td>\n",
" <td>0.992210</td>\n",
" <td>0.994836</td>\n",
" <td>0.972288</td>\n",
" <td>0.876028</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>0.207201</td>\n",
" <td>0.780758</td>\n",
" <td>0.687252</td>\n",
" <td>0.502312</td>\n",
" <td>0.156832</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>0.128705</td>\n",
" <td>0.936889</td>\n",
" <td>0.886772</td>\n",
" <td>0.939217</td>\n",
" <td>0.998556</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>0.340442</td>\n",
" <td>0.612293</td>\n",
" <td>0.429809</td>\n",
" <td>0.244711</td>\n",
" <td>0.032248</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PICV</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>88 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" L2 NDCG NDCG@5 Pearson SpearmanR \\\n",
"0 0.672614 0.984817 0.984817 0.833270 0.632456 \n",
"1 0.672541 0.984817 0.984817 0.833273 0.632456 \n",
"2 0.675399 1.000000 1.000000 0.872404 0.948683 \n",
"3 0.671430 0.999950 0.999950 0.873153 0.632456 \n",
"4 0.282843 0.964335 0.964335 0.968496 0.816497 \n",
"5 0.109003 1.000000 1.000000 0.993515 1.000000 \n",
"6 0.000000 1.000000 1.000000 1.000000 1.000000 \n",
"7 0.834689 0.630930 0.630930 0.357553 0.272166 \n",
"8 0.167495 0.971622 0.965187 0.872985 0.908544 \n",
"9 0.255409 0.973934 0.981802 0.844037 0.820918 \n",
"10 0.123809 0.969013 0.932469 0.893776 0.934538 \n",
"11 0.252516 0.914352 0.784295 0.622974 0.684211 \n",
"12 0.322749 0.994469 0.989704 0.900801 0.885049 \n",
"13 0.277373 0.946303 0.900302 0.769257 0.374577 \n",
"14 0.072285 0.993702 0.970213 0.981011 0.973103 \n",
"15 0.260483 0.920732 0.879966 0.741018 0.298858 \n",
"16 0.273521 0.981474 0.973298 0.812503 0.817674 \n",
"17 0.264550 0.946682 0.940374 0.756563 0.520301 \n",
"18 0.250067 0.966972 0.913251 0.768077 0.706813 \n",
"19 0.304935 0.957164 0.938046 0.744530 0.711278 \n",
"20 0.275022 0.989233 0.981340 0.971646 0.749610 \n",
"21 0.263122 0.955524 0.922541 0.930586 0.383459 \n",
"22 0.138652 0.998331 0.998283 0.976383 0.997411 \n",
"23 0.247536 0.944404 0.877370 0.928444 0.173572 \n",
"24 0.336659 0.990104 0.978667 0.918085 0.875421 \n",
"25 0.401469 0.963876 0.954015 0.893001 0.221249 \n",
"26 0.332961 0.992544 0.973712 0.937979 0.845940 \n",
"27 0.341660 0.971785 0.975554 0.895904 0.352145 \n",
"28 0.578752 0.492292 0.122312 -0.288927 0.191127 \n",
"29 0.460242 0.926089 0.853204 0.717342 0.405126 \n",
".. ... ... ... ... ... \n",
"58 0.235361 0.991899 1.000000 0.927546 0.737489 \n",
"59 0.189558 0.980900 0.999842 0.927359 0.506976 \n",
"60 0.142030 0.990276 0.984540 0.980935 0.779370 \n",
"61 0.124729 0.901260 0.907927 0.939584 0.163636 \n",
"62 0.107740 0.988112 0.977762 0.975555 0.999033 \n",
"63 0.179533 0.911248 0.942287 0.883271 -0.133398 \n",
"64 0.367224 0.999991 1.000000 0.967209 0.999629 \n",
"65 0.238332 0.971812 0.959122 0.973212 0.211095 \n",
"66 0.401022 0.999889 1.000000 0.966992 0.996933 \n",
"67 0.284760 0.980323 0.978061 0.977250 0.509969 \n",
"68 0.258505 0.995616 0.995906 0.985579 0.833975 \n",
"69 0.297123 0.943655 0.944391 0.974220 0.068419 \n",
"70 0.067249 0.999453 1.000000 0.993935 0.998769 \n",
"71 0.164108 0.978136 0.967044 0.966069 0.342775 \n",
"72 0.452882 0.999998 1.000000 0.998365 0.999654 \n",
"73 0.314697 0.972598 0.969714 0.998146 0.065281 \n",
"74 0.624122 0.992905 0.983652 0.959778 0.900569 \n",
"75 0.516824 0.979657 0.973410 0.962190 0.541348 \n",
"76 0.017653 0.999450 0.998772 0.999990 0.610201 \n",
"77 0.014198 0.997467 0.997146 0.999982 0.060978 \n",
"78 0.003633 0.999976 0.999910 0.999997 0.997796 \n",
"79 0.014089 0.997671 0.995892 0.999954 0.069229 \n",
"80 0.096921 0.999998 1.000000 0.968106 0.999678 \n",
"81 0.095981 0.989010 0.984276 0.932558 0.446691 \n",
"82 0.120204 0.987079 0.973107 0.899272 0.822808 \n",
"83 0.125489 0.987371 0.973107 0.904354 0.797415 \n",
"84 0.108996 0.992210 0.994836 0.972288 0.876028 \n",
"85 0.207201 0.780758 0.687252 0.502312 0.156832 \n",
"86 0.128705 0.936889 0.886772 0.939217 0.998556 \n",
"87 0.340442 0.612293 0.429809 0.244711 0.032248 \n",
"\n",
" dataset estimator type \n",
"0 iris_binary LogisticRegression PI \n",
"1 iris_binary LogisticRegression PICV \n",
"2 iris_binary LinearSVC PI \n",
"3 iris_binary LinearSVC PICV \n",
"4 iris_binary RandomForestClassifier PI \n",
"5 iris_binary RandomForestClassifier PICV \n",
"6 iris_binary DecisionTreeClassifier PI \n",
"7 iris_binary DecisionTreeClassifier PICV \n",
"8 CLF(n_informative=5, n_redundant=0) LogisticRegression PI \n",
"9 CLF(n_informative=5, n_redundant=0) LogisticRegression PICV \n",
"10 CLF(n_informative=5, n_redundant=0) LinearSVC PI \n",
"11 CLF(n_informative=5, n_redundant=0) LinearSVC PICV \n",
"12 CLF(n_informative=5, n_redundant=0) RandomForestClassifier PI \n",
"13 CLF(n_informative=5, n_redundant=0) RandomForestClassifier PICV \n",
"14 CLF(n_informative=5, n_redundant=0) DecisionTreeClassifier PI \n",
"15 CLF(n_informative=5, n_redundant=0) DecisionTreeClassifier PICV \n",
"16 CLF(n_informative=5, n_redundant=4) LogisticRegression PI \n",
"17 CLF(n_informative=5, n_redundant=4) LogisticRegression PICV \n",
"18 CLF(n_informative=5, n_redundant=4) LinearSVC PI \n",
"19 CLF(n_informative=5, n_redundant=4) LinearSVC PICV \n",
"20 CLF(n_informative=5, n_redundant=4) RandomForestClassifier PI \n",
"21 CLF(n_informative=5, n_redundant=4) RandomForestClassifier PICV \n",
"22 CLF(n_informative=5, n_redundant=4) DecisionTreeClassifier PI \n",
"23 CLF(n_informative=5, n_redundant=4) DecisionTreeClassifier PICV \n",
"24 CLF(n_informative=1, n_redundant=4) LogisticRegression PI \n",
"25 CLF(n_informative=1, n_redundant=4) LogisticRegression PICV \n",
"26 CLF(n_informative=1, n_redundant=4) LinearSVC PI \n",
"27 CLF(n_informative=1, n_redundant=4) LinearSVC PICV \n",
"28 CLF(n_informative=1, n_redundant=4) RandomForestClassifier PI \n",
"29 CLF(n_informative=1, n_redundant=4) RandomForestClassifier PICV \n",
".. ... ... ... \n",
"58 REG(n_informative=5) LinearSVR PI \n",
"59 REG(n_informative=5) LinearSVR PICV \n",
"60 REG(n_informative=5) RandomForestRegressor PI \n",
"61 REG(n_informative=5) RandomForestRegressor PICV \n",
"62 REG(n_informative=5) DecisionTreeRegressor PI \n",
"63 REG(n_informative=5) DecisionTreeRegressor PICV \n",
"64 REG(n_informative=5, effective_rank=2) LinearRegression PI \n",
"65 REG(n_informative=5, effective_rank=2) LinearRegression PICV \n",
"66 REG(n_informative=5, effective_rank=2) LinearSVR PI \n",
"67 REG(n_informative=5, effective_rank=2) LinearSVR PICV \n",
"68 REG(n_informative=5, effective_rank=2) RandomForestRegressor PI \n",
"69 REG(n_informative=5, effective_rank=2) RandomForestRegressor PICV \n",
"70 REG(n_informative=5, effective_rank=2) DecisionTreeRegressor PI \n",
"71 REG(n_informative=5, effective_rank=2) DecisionTreeRegressor PICV \n",
"72 REG(n_informative=1) LinearRegression PI \n",
"73 REG(n_informative=1) LinearRegression PICV \n",
"74 REG(n_informative=1) LinearSVR PI \n",
"75 REG(n_informative=1) LinearSVR PICV \n",
"76 REG(n_informative=1) RandomForestRegressor PI \n",
"77 REG(n_informative=1) RandomForestRegressor PICV \n",
"78 REG(n_informative=1) DecisionTreeRegressor PI \n",
"79 REG(n_informative=1) DecisionTreeRegressor PICV \n",
"80 REG(n_informative=20) LinearRegression PI \n",
"81 REG(n_informative=20) LinearRegression PICV \n",
"82 REG(n_informative=20) LinearSVR PI \n",
"83 REG(n_informative=20) LinearSVR PICV \n",
"84 REG(n_informative=20) RandomForestRegressor PI \n",
"85 REG(n_informative=20) RandomForestRegressor PICV \n",
"86 REG(n_informative=20) DecisionTreeRegressor PI \n",
"87 REG(n_informative=20) DecisionTreeRegressor PICV \n",
"\n",
"[88 rows x 8 columns]"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame([s[3] for s in scores])\n",
"df = df.assign(\n",
" estimator=[s[0] for s in scores],\n",
" dataset=[s[1] for s in scores],\n",
" type=[s[2] for s in scores],\n",
")\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>L2</th>\n",
" <th>NDCG</th>\n",
" <th>NDCG@5</th>\n",
" <th>Pearson</th>\n",
" <th>SpearmanR</th>\n",
" <th>dataset</th>\n",
" <th>estimator</th>\n",
" <th>type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.672614</td>\n",
" <td>0.984817</td>\n",
" <td>0.984817</td>\n",
" <td>0.833270</td>\n",
" <td>0.632456</td>\n",
" <td>iris_binary</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.675399</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.872404</td>\n",
" <td>0.948683</td>\n",
" <td>iris_binary</td>\n",
" <td>LinearSVC</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.282843</td>\n",
" <td>0.964335</td>\n",
" <td>0.964335</td>\n",
" <td>0.968496</td>\n",
" <td>0.816497</td>\n",
" <td>iris_binary</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>iris_binary</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.167495</td>\n",
" <td>0.971622</td>\n",
" <td>0.965187</td>\n",
" <td>0.872985</td>\n",
" <td>0.908544</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.123809</td>\n",
" <td>0.969013</td>\n",
" <td>0.932469</td>\n",
" <td>0.893776</td>\n",
" <td>0.934538</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>LinearSVC</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.322749</td>\n",
" <td>0.994469</td>\n",
" <td>0.989704</td>\n",
" <td>0.900801</td>\n",
" <td>0.885049</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.072285</td>\n",
" <td>0.993702</td>\n",
" <td>0.970213</td>\n",
" <td>0.981011</td>\n",
" <td>0.973103</td>\n",
" <td>CLF(n_informative=5, n_redundant=0)</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.273521</td>\n",
" <td>0.981474</td>\n",
" <td>0.973298</td>\n",
" <td>0.812503</td>\n",
" <td>0.817674</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>0.250067</td>\n",
" <td>0.966972</td>\n",
" <td>0.913251</td>\n",
" <td>0.768077</td>\n",
" <td>0.706813</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>LinearSVC</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0.275022</td>\n",
" <td>0.989233</td>\n",
" <td>0.981340</td>\n",
" <td>0.971646</td>\n",
" <td>0.749610</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0.138652</td>\n",
" <td>0.998331</td>\n",
" <td>0.998283</td>\n",
" <td>0.976383</td>\n",
" <td>0.997411</td>\n",
" <td>CLF(n_informative=5, n_redundant=4)</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0.336659</td>\n",
" <td>0.990104</td>\n",
" <td>0.978667</td>\n",
" <td>0.918085</td>\n",
" <td>0.875421</td>\n",
" <td>CLF(n_informative=1, n_redundant=4)</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>0.332961</td>\n",
" <td>0.992544</td>\n",
" <td>0.973712</td>\n",
" <td>0.937979</td>\n",
" <td>0.845940</td>\n",
" <td>CLF(n_informative=1, n_redundant=4)</td>\n",
" <td>LinearSVC</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>0.578752</td>\n",
" <td>0.492292</td>\n",
" <td>0.122312</td>\n",
" <td>-0.288927</td>\n",
" <td>0.191127</td>\n",
" <td>CLF(n_informative=1, n_redundant=4)</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>0.015446</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.999899</td>\n",
" <td>0.999027</td>\n",
" <td>CLF(n_informative=1, n_redundant=4)</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>0.173657</td>\n",
" <td>0.979462</td>\n",
" <td>0.964900</td>\n",
" <td>0.877614</td>\n",
" <td>0.700077</td>\n",
" <td>CLF(n_informative=20, n_redundant=0)</td>\n",
" <td>LogisticRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>0.252788</td>\n",
" <td>0.973859</td>\n",
" <td>0.968925</td>\n",
" <td>0.768863</td>\n",
" <td>0.580477</td>\n",
" <td>CLF(n_informative=20, n_redundant=0)</td>\n",
" <td>LinearSVC</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>0.140510</td>\n",
" <td>0.938507</td>\n",
" <td>0.768774</td>\n",
" <td>0.672920</td>\n",
" <td>0.654109</td>\n",
" <td>CLF(n_informative=20, n_redundant=0)</td>\n",
" <td>RandomForestClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>0.109475</td>\n",
" <td>0.995163</td>\n",
" <td>0.979534</td>\n",
" <td>0.959566</td>\n",
" <td>0.981116</td>\n",
" <td>CLF(n_informative=20, n_redundant=0)</td>\n",
" <td>DecisionTreeClassifier</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>0.242637</td>\n",
" <td>0.999890</td>\n",
" <td>0.999311</td>\n",
" <td>0.952595</td>\n",
" <td>0.989011</td>\n",
" <td>boston</td>\n",
" <td>LinearRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>0.318675</td>\n",
" <td>0.997746</td>\n",
" <td>0.994291</td>\n",
" <td>0.950851</td>\n",
" <td>0.972527</td>\n",
" <td>boston</td>\n",
" <td>LinearSVR</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>0.034486</td>\n",
" <td>0.999036</td>\n",
" <td>0.999015</td>\n",
" <td>0.997868</td>\n",
" <td>0.989011</td>\n",
" <td>boston</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>0.177042</td>\n",
" <td>0.999770</td>\n",
" <td>0.999955</td>\n",
" <td>0.973999</td>\n",
" <td>0.967033</td>\n",
" <td>boston</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>0.292793</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.967100</td>\n",
" <td>1.000000</td>\n",
" <td>diabetese</td>\n",
" <td>LinearRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>0.352574</td>\n",
" <td>0.985625</td>\n",
" <td>0.984779</td>\n",
" <td>0.954882</td>\n",
" <td>0.951515</td>\n",
" <td>diabetese</td>\n",
" <td>LinearSVR</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>0.057059</td>\n",
" <td>0.999937</td>\n",
" <td>1.000000</td>\n",
" <td>0.991997</td>\n",
" <td>0.951515</td>\n",
" <td>diabetese</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>0.120583</td>\n",
" <td>0.927134</td>\n",
" <td>0.916092</td>\n",
" <td>0.929929</td>\n",
" <td>0.915152</td>\n",
" <td>diabetese</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>0.159032</td>\n",
" <td>0.999994</td>\n",
" <td>1.000000</td>\n",
" <td>0.983486</td>\n",
" <td>0.999579</td>\n",
" <td>REG(n_informative=5)</td>\n",
" <td>LinearRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>0.235361</td>\n",
" <td>0.991899</td>\n",
" <td>1.000000</td>\n",
" <td>0.927546</td>\n",
" <td>0.737489</td>\n",
" <td>REG(n_informative=5)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>0.142030</td>\n",
" <td>0.990276</td>\n",
" <td>0.984540</td>\n",
" <td>0.980935</td>\n",
" <td>0.779370</td>\n",
" <td>REG(n_informative=5)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>0.107740</td>\n",
" <td>0.988112</td>\n",
" <td>0.977762</td>\n",
" <td>0.975555</td>\n",
" <td>0.999033</td>\n",
" <td>REG(n_informative=5)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>0.367224</td>\n",
" <td>0.999991</td>\n",
" <td>1.000000</td>\n",
" <td>0.967209</td>\n",
" <td>0.999629</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>LinearRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>0.401022</td>\n",
" <td>0.999889</td>\n",
" <td>1.000000</td>\n",
" <td>0.966992</td>\n",
" <td>0.996933</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>0.258505</td>\n",
" <td>0.995616</td>\n",
" <td>0.995906</td>\n",
" <td>0.985579</td>\n",
" <td>0.833975</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>0.067249</td>\n",
" <td>0.999453</td>\n",
" <td>1.000000</td>\n",
" <td>0.993935</td>\n",
" <td>0.998769</td>\n",
" <td>REG(n_informative=5, effective_rank=2)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>0.452882</td>\n",
" <td>0.999998</td>\n",
" <td>1.000000</td>\n",
" <td>0.998365</td>\n",
" <td>0.999654</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>LinearRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>0.624122</td>\n",
" <td>0.992905</td>\n",
" <td>0.983652</td>\n",
" <td>0.959778</td>\n",
" <td>0.900569</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>0.017653</td>\n",
" <td>0.999450</td>\n",
" <td>0.998772</td>\n",
" <td>0.999990</td>\n",
" <td>0.610201</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>0.003633</td>\n",
" <td>0.999976</td>\n",
" <td>0.999910</td>\n",
" <td>0.999997</td>\n",
" <td>0.997796</td>\n",
" <td>REG(n_informative=1)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>0.096921</td>\n",
" <td>0.999998</td>\n",
" <td>1.000000</td>\n",
" <td>0.968106</td>\n",
" <td>0.999678</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>LinearRegression</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>0.120204</td>\n",
" <td>0.987079</td>\n",
" <td>0.973107</td>\n",
" <td>0.899272</td>\n",
" <td>0.822808</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>LinearSVR</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>0.108996</td>\n",
" <td>0.992210</td>\n",
" <td>0.994836</td>\n",
" <td>0.972288</td>\n",
" <td>0.876028</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>RandomForestRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>0.128705</td>\n",
" <td>0.936889</td>\n",
" <td>0.886772</td>\n",
" <td>0.939217</td>\n",
" <td>0.998556</td>\n",
" <td>REG(n_informative=20)</td>\n",
" <td>DecisionTreeRegressor</td>\n",
" <td>PI</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" L2 NDCG NDCG@5 Pearson SpearmanR \\\n",
"0 0.672614 0.984817 0.984817 0.833270 0.632456 \n",
"2 0.675399 1.000000 1.000000 0.872404 0.948683 \n",
"4 0.282843 0.964335 0.964335 0.968496 0.816497 \n",
"6 0.000000 1.000000 1.000000 1.000000 1.000000 \n",
"8 0.167495 0.971622 0.965187 0.872985 0.908544 \n",
"10 0.123809 0.969013 0.932469 0.893776 0.934538 \n",
"12 0.322749 0.994469 0.989704 0.900801 0.885049 \n",
"14 0.072285 0.993702 0.970213 0.981011 0.973103 \n",
"16 0.273521 0.981474 0.973298 0.812503 0.817674 \n",
"18 0.250067 0.966972 0.913251 0.768077 0.706813 \n",
"20 0.275022 0.989233 0.981340 0.971646 0.749610 \n",
"22 0.138652 0.998331 0.998283 0.976383 0.997411 \n",
"24 0.336659 0.990104 0.978667 0.918085 0.875421 \n",
"26 0.332961 0.992544 0.973712 0.937979 0.845940 \n",
"28 0.578752 0.492292 0.122312 -0.288927 0.191127 \n",
"30 0.015446 1.000000 1.000000 0.999899 0.999027 \n",
"32 0.173657 0.979462 0.964900 0.877614 0.700077 \n",
"34 0.252788 0.973859 0.968925 0.768863 0.580477 \n",
"36 0.140510 0.938507 0.768774 0.672920 0.654109 \n",
"38 0.109475 0.995163 0.979534 0.959566 0.981116 \n",
"40 0.242637 0.999890 0.999311 0.952595 0.989011 \n",
"42 0.318675 0.997746 0.994291 0.950851 0.972527 \n",
"44 0.034486 0.999036 0.999015 0.997868 0.989011 \n",
"46 0.177042 0.999770 0.999955 0.973999 0.967033 \n",
"48 0.292793 1.000000 1.000000 0.967100 1.000000 \n",
"50 0.352574 0.985625 0.984779 0.954882 0.951515 \n",
"52 0.057059 0.999937 1.000000 0.991997 0.951515 \n",
"54 0.120583 0.927134 0.916092 0.929929 0.915152 \n",
"56 0.159032 0.999994 1.000000 0.983486 0.999579 \n",
"58 0.235361 0.991899 1.000000 0.927546 0.737489 \n",
"60 0.142030 0.990276 0.984540 0.980935 0.779370 \n",
"62 0.107740 0.988112 0.977762 0.975555 0.999033 \n",
"64 0.367224 0.999991 1.000000 0.967209 0.999629 \n",
"66 0.401022 0.999889 1.000000 0.966992 0.996933 \n",
"68 0.258505 0.995616 0.995906 0.985579 0.833975 \n",
"70 0.067249 0.999453 1.000000 0.993935 0.998769 \n",
"72 0.452882 0.999998 1.000000 0.998365 0.999654 \n",
"74 0.624122 0.992905 0.983652 0.959778 0.900569 \n",
"76 0.017653 0.999450 0.998772 0.999990 0.610201 \n",
"78 0.003633 0.999976 0.999910 0.999997 0.997796 \n",
"80 0.096921 0.999998 1.000000 0.968106 0.999678 \n",
"82 0.120204 0.987079 0.973107 0.899272 0.822808 \n",
"84 0.108996 0.992210 0.994836 0.972288 0.876028 \n",
"86 0.128705 0.936889 0.886772 0.939217 0.998556 \n",
"\n",
" dataset estimator type \n",
"0 iris_binary LogisticRegression PI \n",
"2 iris_binary LinearSVC PI \n",
"4 iris_binary RandomForestClassifier PI \n",
"6 iris_binary DecisionTreeClassifier PI \n",
"8 CLF(n_informative=5, n_redundant=0) LogisticRegression PI \n",
"10 CLF(n_informative=5, n_redundant=0) LinearSVC PI \n",
"12 CLF(n_informative=5, n_redundant=0) RandomForestClassifier PI \n",
"14 CLF(n_informative=5, n_redundant=0) DecisionTreeClassifier PI \n",
"16 CLF(n_informative=5, n_redundant=4) LogisticRegression PI \n",
"18 CLF(n_informative=5, n_redundant=4) LinearSVC PI \n",
"20 CLF(n_informative=5, n_redundant=4) RandomForestClassifier PI \n",
"22 CLF(n_informative=5, n_redundant=4) DecisionTreeClassifier PI \n",
"24 CLF(n_informative=1, n_redundant=4) LogisticRegression PI \n",
"26 CLF(n_informative=1, n_redundant=4) LinearSVC PI \n",
"28 CLF(n_informative=1, n_redundant=4) RandomForestClassifier PI \n",
"30 CLF(n_informative=1, n_redundant=4) DecisionTreeClassifier PI \n",
"32 CLF(n_informative=20, n_redundant=0) LogisticRegression PI \n",
"34 CLF(n_informative=20, n_redundant=0) LinearSVC PI \n",
"36 CLF(n_informative=20, n_redundant=0) RandomForestClassifier PI \n",
"38 CLF(n_informative=20, n_redundant=0) DecisionTreeClassifier PI \n",
"40 boston LinearRegression PI \n",
"42 boston LinearSVR PI \n",
"44 boston RandomForestRegressor PI \n",
"46 boston DecisionTreeRegressor PI \n",
"48 diabetese LinearRegression PI \n",
"50 diabetese LinearSVR PI \n",
"52 diabetese RandomForestRegressor PI \n",
"54 diabetese DecisionTreeRegressor PI \n",
"56 REG(n_informative=5) LinearRegression PI \n",
"58 REG(n_informative=5) LinearSVR PI \n",
"60 REG(n_informative=5) RandomForestRegressor PI \n",
"62 REG(n_informative=5) DecisionTreeRegressor PI \n",
"64 REG(n_informative=5, effective_rank=2) LinearRegression PI \n",
"66 REG(n_informative=5, effective_rank=2) LinearSVR PI \n",
"68 REG(n_informative=5, effective_rank=2) RandomForestRegressor PI \n",
"70 REG(n_informative=5, effective_rank=2) DecisionTreeRegressor PI \n",
"72 REG(n_informative=1) LinearRegression PI \n",
"74 REG(n_informative=1) LinearSVR PI \n",
"76 REG(n_informative=1) RandomForestRegressor PI \n",
"78 REG(n_informative=1) DecisionTreeRegressor PI \n",
"80 REG(n_informative=20) LinearRegression PI \n",
"82 REG(n_informative=20) LinearSVR PI \n",
"84 REG(n_informative=20) RandomForestRegressor PI \n",
"86 REG(n_informative=20) DecisionTreeRegressor PI "
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_pi = df[df.type==\"PI\"]\n",
"df_pi"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>L2</th>\n",
" <th>NDCG</th>\n",
" <th>NDCG@5</th>\n",
" <th>Pearson</th>\n",
" <th>SpearmanR</th>\n",
" </tr>\n",
" <tr>\n",
" <th>estimator</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>DecisionTreeClassifier</th>\n",
" <td>0.067172</td>\n",
" <td>0.997439</td>\n",
" <td>0.989606</td>\n",
" <td>0.983372</td>\n",
" <td>0.990131</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DecisionTreeRegressor</th>\n",
" <td>0.100825</td>\n",
" <td>0.975222</td>\n",
" <td>0.963415</td>\n",
" <td>0.968772</td>\n",
" <td>0.979390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>LinearRegression</th>\n",
" <td>0.268582</td>\n",
" <td>0.999978</td>\n",
" <td>0.999885</td>\n",
" <td>0.972810</td>\n",
" <td>0.997925</td>\n",
" </tr>\n",
" <tr>\n",
" <th>LinearSVC</th>\n",
" <td>0.327005</td>\n",
" <td>0.980478</td>\n",
" <td>0.957671</td>\n",
" <td>0.848220</td>\n",
" <td>0.803290</td>\n",
" </tr>\n",
" <tr>\n",
" <th>LinearSVR</th>\n",
" <td>0.341993</td>\n",
" <td>0.992524</td>\n",
" <td>0.989305</td>\n",
" <td>0.943220</td>\n",
" <td>0.896974</td>\n",
" </tr>\n",
" <tr>\n",
" <th>LogisticRegression</th>\n",
" <td>0.324789</td>\n",
" <td>0.981496</td>\n",
" <td>0.973374</td>\n",
" <td>0.862891</td>\n",
" <td>0.786834</td>\n",
" </tr>\n",
" <tr>\n",
" <th>RandomForestClassifier</th>\n",
" <td>0.319975</td>\n",
" <td>0.875767</td>\n",
" <td>0.765293</td>\n",
" <td>0.644987</td>\n",
" <td>0.659278</td>\n",
" </tr>\n",
" <tr>\n",
" <th>RandomForestRegressor</th>\n",
" <td>0.103122</td>\n",
" <td>0.996088</td>\n",
" <td>0.995512</td>\n",
" <td>0.988109</td>\n",
" <td>0.840017</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" L2 NDCG NDCG@5 Pearson SpearmanR\n",
"estimator \n",
"DecisionTreeClassifier 0.067172 0.997439 0.989606 0.983372 0.990131\n",
"DecisionTreeRegressor 0.100825 0.975222 0.963415 0.968772 0.979390\n",
"LinearRegression 0.268582 0.999978 0.999885 0.972810 0.997925\n",
"LinearSVC 0.327005 0.980478 0.957671 0.848220 0.803290\n",
"LinearSVR 0.341993 0.992524 0.989305 0.943220 0.896974\n",
"LogisticRegression 0.324789 0.981496 0.973374 0.862891 0.786834\n",
"RandomForestClassifier 0.319975 0.875767 0.765293 0.644987 0.659278\n",
"RandomForestRegressor 0.103122 0.996088 0.995512 0.988109 0.840017"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_pi.groupby('estimator').mean()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>L2</th>\n",
" <th>NDCG</th>\n",
" <th>NDCG@5</th>\n",
" <th>Pearson</th>\n",
" <th>SpearmanR</th>\n",
" </tr>\n",
" <tr>\n",
" <th>dataset</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>CLF(n_informative=1, n_redundant=4)</th>\n",
" <td>0.315954</td>\n",
" <td>0.868735</td>\n",
" <td>0.768673</td>\n",
" <td>0.641759</td>\n",
" <td>0.727879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CLF(n_informative=20, n_redundant=0)</th>\n",
" <td>0.169107</td>\n",
" <td>0.971748</td>\n",
" <td>0.920533</td>\n",
" <td>0.819741</td>\n",
" <td>0.728945</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CLF(n_informative=5, n_redundant=0)</th>\n",
" <td>0.171585</td>\n",
" <td>0.982201</td>\n",
" <td>0.964393</td>\n",
" <td>0.912143</td>\n",
" <td>0.925309</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CLF(n_informative=5, n_redundant=4)</th>\n",
" <td>0.234315</td>\n",
" <td>0.984003</td>\n",
" <td>0.966543</td>\n",
" <td>0.882152</td>\n",
" <td>0.817877</td>\n",
" </tr>\n",
" <tr>\n",
" <th>REG(n_informative=1)</th>\n",
" <td>0.274572</td>\n",
" <td>0.998082</td>\n",
" <td>0.995583</td>\n",
" <td>0.989532</td>\n",
" <td>0.877055</td>\n",
" </tr>\n",
" <tr>\n",
" <th>REG(n_informative=20)</th>\n",
" <td>0.113707</td>\n",
" <td>0.979044</td>\n",
" <td>0.963679</td>\n",
" <td>0.944721</td>\n",
" <td>0.924267</td>\n",
" </tr>\n",
" <tr>\n",
" <th>REG(n_informative=5)</th>\n",
" <td>0.161041</td>\n",
" <td>0.992570</td>\n",
" <td>0.990575</td>\n",
" <td>0.966880</td>\n",
" <td>0.878868</td>\n",
" </tr>\n",
" <tr>\n",
" <th>REG(n_informative=5, effective_rank=2)</th>\n",
" <td>0.273500</td>\n",
" <td>0.998737</td>\n",
" <td>0.998977</td>\n",
" <td>0.978429</td>\n",
" <td>0.957326</td>\n",
" </tr>\n",
" <tr>\n",
" <th>boston</th>\n",
" <td>0.193210</td>\n",
" <td>0.999111</td>\n",
" <td>0.998143</td>\n",
" <td>0.968828</td>\n",
" <td>0.979396</td>\n",
" </tr>\n",
" <tr>\n",
" <th>diabetese</th>\n",
" <td>0.205752</td>\n",
" <td>0.978174</td>\n",
" <td>0.975218</td>\n",
" <td>0.960977</td>\n",
" <td>0.954545</td>\n",
" </tr>\n",
" <tr>\n",
" <th>iris_binary</th>\n",
" <td>0.407714</td>\n",
" <td>0.987288</td>\n",
" <td>0.987288</td>\n",
" <td>0.918542</td>\n",
" <td>0.849409</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" L2 NDCG NDCG@5 \\\n",
"dataset \n",
"CLF(n_informative=1, n_redundant=4) 0.315954 0.868735 0.768673 \n",
"CLF(n_informative=20, n_redundant=0) 0.169107 0.971748 0.920533 \n",
"CLF(n_informative=5, n_redundant=0) 0.171585 0.982201 0.964393 \n",
"CLF(n_informative=5, n_redundant=4) 0.234315 0.984003 0.966543 \n",
"REG(n_informative=1) 0.274572 0.998082 0.995583 \n",
"REG(n_informative=20) 0.113707 0.979044 0.963679 \n",
"REG(n_informative=5) 0.161041 0.992570 0.990575 \n",
"REG(n_informative=5, effective_rank=2) 0.273500 0.998737 0.998977 \n",
"boston 0.193210 0.999111 0.998143 \n",
"diabetese 0.205752 0.978174 0.975218 \n",
"iris_binary 0.407714 0.987288 0.987288 \n",
"\n",
" Pearson SpearmanR \n",
"dataset \n",
"CLF(n_informative=1, n_redundant=4) 0.641759 0.727879 \n",
"CLF(n_informative=20, n_redundant=0) 0.819741 0.728945 \n",
"CLF(n_informative=5, n_redundant=0) 0.912143 0.925309 \n",
"CLF(n_informative=5, n_redundant=4) 0.882152 0.817877 \n",
"REG(n_informative=1) 0.989532 0.877055 \n",
"REG(n_informative=20) 0.944721 0.924267 \n",
"REG(n_informative=5) 0.966880 0.878868 \n",
"REG(n_informative=5, effective_rank=2) 0.978429 0.957326 \n",
"boston 0.968828 0.979396 \n",
"diabetese 0.960977 0.954545 \n",
"iris_binary 0.918542 0.849409 "
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_pi.groupby('dataset').mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment