Skip to content

Instantly share code, notes, and snippets.

@armgilles
Created July 8, 2016 16:02
Show Gist options
  • Save armgilles/bc9b9ba5c18144edcd18af038a4f7d42 to your computer and use it in GitHub Desktop.
Save armgilles/bc9b9ba5c18144edcd18af038a4f7d42 to your computer and use it in GitHub Desktop.
Re commit for mlxtend package
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 112,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.feature_selection import RFECV\n",
"from sklearn.linear_model import LogisticRegression\n",
"import xgboost as xgb\n",
"from mlxtend.classifier import EnsembleVoteClassifier\n",
"from sklearn.cross_validation import cross_val_score\n",
"\n",
"from mlxtend.feature_selection import SequentialFeatureSelector as SFS\n",
"from mlxtend.feature_selection import plot_sequential_feature_selection as plot_sfs\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Read data"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = pd.read_csv(\"https://gist.githubusercontent.com/armgilles/53e510e179e98acaa11bc68d6b4ebbce/raw/ef2e3d835720aea6125227479fac500f982b04c8/my_data_test.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col_0</th>\n",
" <th>col_1</th>\n",
" <th>col_2</th>\n",
" <th>col_3</th>\n",
" <th>col_4</th>\n",
" <th>col_5</th>\n",
" <th>col_6</th>\n",
" <th>col_7</th>\n",
" <th>col_8</th>\n",
" <th>col_9</th>\n",
" <th>...</th>\n",
" <th>col_27</th>\n",
" <th>col_28</th>\n",
" <th>col_29</th>\n",
" <th>col_30</th>\n",
" <th>col_31</th>\n",
" <th>col_32</th>\n",
" <th>col_33</th>\n",
" <th>col_34</th>\n",
" <th>col_35</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>-0.557099</td>\n",
" <td>-1.415828</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>-1.091727</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0.794916</td>\n",
" <td>-1.735269</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1.295000</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>0.254110</td>\n",
" <td>-0.457507</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.295000</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>-0.827502</td>\n",
" <td>-0.138066</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>-1.688408</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>-0.016293</td>\n",
" <td>0.500815</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.698318</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 37 columns</p>\n",
"</div>"
],
"text/plain": [
" col_0 col_1 col_2 col_3 col_4 col_5 col_6 col_7 col_8 \\\n",
"0 1 -0.557099 -1.415828 0 1 0 0 0 0 \n",
"1 1 0.794916 -1.735269 1 1 1 0 1 1 \n",
"2 0 0.254110 -0.457507 1 1 0 0 0 1 \n",
"3 1 -0.827502 -0.138066 1 1 0 0 0 0 \n",
"4 1 -0.016293 0.500815 1 1 0 1 1 0 \n",
"\n",
" col_9 ... col_27 col_28 col_29 col_30 col_31 col_32 col_33 \\\n",
"0 -1.091727 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
"1 1.295000 ... 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n",
"2 1.295000 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n",
"3 -1.688408 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 0.698318 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n",
"\n",
" col_34 col_35 target \n",
"0 0.0 0.0 1.0 \n",
"1 0.0 0.0 1.0 \n",
"2 0.0 0.0 1.0 \n",
"3 0.0 0.0 1.0 \n",
"4 0.0 0.0 1.0 \n",
"\n",
"[5 rows x 37 columns]"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(863, 37)"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y = data.target"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data = data.drop('target', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1.0 0.877173\n",
"0.0 0.122827\n",
"Name: target, dtype: float64"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y.value_counts(normalize=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Serious business"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"roc_auc: 0.6755 (+/- 0.05) [Logistic Newton]\n",
"roc_auc: 0.6304 (+/- 0.04) [Xgb1]\n",
"roc_auc: 0.6239 (+/- 0.04) [Xgb2]\n",
"roc_auc: 0.6751 (+/- 0.05) [Logistic lbfgs]\n",
"roc_auc: 0.6838 (+/- 0.04) [Ensemble]\n"
]
}
],
"source": [
"clf1 = LogisticRegression(class_weight='balanced', solver='newton-cg', C=100.0, random_state=17)\n",
"clf2 = xgb.XGBClassifier(n_estimators=5, learning_rate=0.1, max_depth=8, seed=17)\n",
"clf3 = xgb.XGBClassifier(n_estimators=7,learning_rate=0.05, max_depth=6, seed=17)\n",
"clf4 = LogisticRegression(class_weight='balanced', solver='lbfgs', C=100.0, random_state=17)\n",
"\n",
"eclf = EnsembleVoteClassifier(clfs=[clf1, clf2, clf3, clf4], voting='soft', weights=[1, 4, 4, 1])\n",
"\n",
"for clf, label in zip([clf1, clf2, clf3,clf4, eclf], ['Logistic Newton', 'Xgb1', \n",
" 'Xgb2', 'Logistic lbfgs', 'Ensemble']):\n",
"\n",
" scores = cross_val_score(clf, data, y, cv=5, scoring='roc_auc')\n",
" print(\"roc_auc: %0.4f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), label))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Features selection on (Logistic Newton)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 25/25"
]
}
],
"source": [
"sfs1 = SFS(clf1, \n",
" k_features=25, \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs1 = sfs1.fit(data.values, y.values)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEKCAYAAAAB0GKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcHHWZ+PHPU31M95yZSUggN+FQVA6zcigoWUVBFLwV\nWFcX97fqrseiroIiEhTlWHFR1F2vRX/+3MUVRcAr4IHKAiFyi4QjQC5yJ3P2WVXP74+qTjoz3TPV\nk+npnpnn/XrNq6/6dn27p7qe+t6iqhhjjDFjcRqdAWOMMVODBQxjjDGRWMAwxhgTiQUMY4wxkVjA\nMMYYE4kFDGOMMZHEG52BiSAi1jfYGGNqpKpSy/bTpoShqjX9XXrppZOSZjrsy/d9Cq5HNl9kIFug\ndyjPxy+6mFzBbYr8Nfv31+h9NXv+7LtozL7GY1qUMMz4qSpFz2cgW8TzfQquT77oUnR98q5P3vVw\nPZ/S8SWAAnuG8jyyYRdL53Yyu70FkZouVIwxU5AFjBkqX/ToHcqzrS/L9t4sazfvAcARwXGEWPiX\nSsRwkvERASGViNHWkmDdlj56O1MsntNOMh5rxEcxxkyS2MqVKxudhwN22WWXrRzP51i6dOmkpGmW\nfXm+T99Qng07B9iwY5D+bIGWRJyWhMPhhy0jlYzTkoiRjMdIxBxijoMjUrX0sHjJUtItcQayBbb3\nZUklYqSTY1+DTNZ3MZX/V82QZrruq9nzN1n7uuyyy1i5cuVltaSR8dZlNRMR0enwOepBVRnKu+wa\nyLGjP4eqkkrESEU4sdei6PoM5ArM60qzcHY78di0aR4zZloSEbTGRm8LGNNUruCyJ6xyKrg+iZhD\nazKO49SvrUFV6c8UiMcdDpvXRUc6Ubd9GWMOjAWMGc7zfXqHguqhwVwREaG1JU5ikq/280WPTN7l\nkJ5W5ne3EnOstGFMs7GAMYPlix5Pbe1jKOfS2hK0RTSSH5Y2UskYy+Z10tZipQ1jmokFjBlqKFfk\niS29CEJbqrlOzNmCS67gsWhOG/NmteJY91tjmoIFjBlo10COdVv7m6JUUY3vK32ZAu2pOEsO6qAl\nEZsWjeKer+SKLtm8S8xxSMYdEnGHRMyxcSmm6VnAmEFUled2D7Fx1xBdrckpcQLO5F0KRQ8EEnGH\ntmSC9nScdDJOMh6jJeE0dXtHwfXIFTwGc0V6MwWGcsX9N1AQAQRakwlaW+K0tsRJJWJ7g8l4Pp+q\n4mtQzVc6zkUEIdwfggjh48YFqlzRYzDsqt2WiltpsslZwJghPN/n2e0D7BrI0dXWMiV/mKVR5a7n\n43qKI8EI8kTcoa0lQXtqXyBJxsd3xS7CuL8bX5VcwSNbcOnPFujLFCgW/eA9HaElEatakvBV936u\nouejviLlny+ZoC0VdEbwwm19P7j1fHB9H89XXD943vP3HdulvY12tIsEJwMHQZwgTWsyQXd7kvZU\nYkK7VOeKHv2ZAjv6smQKbpA3VZLxGHNnpelua4k0NsdMPgsYM0C+6LFuax+ZvEtXW0ujszPhqgWS\ncZ33VVCUmBOOXhch5jjEHHAkvHUcHAfijoPjyN6uwYM5Fz88piay9OP5PkXXp+gpvgafT0TCAZLs\nvQ2e2/daTR9bNQgoWgosSsH1KRQ9VCGRcJjdkaIrnaS1JV5z6TRXcOnLFNjenyWbd3FESLfE9xvp\n73o+mbyL5yttLXHmzkrT1Zq02QCaiAWMaa6ZG7ebWVCVU5qgEpTgdr/nYW91TzIeIxF3pmTJLQrX\n88kVPVzXB4HOdILu9hTtqQTpZKxiqak8SOTyQbVi67AgUU3BDbpZo9Dd3sKczhQd6URTVz/OBBYw\nprHdAzme2tZPa7J5G7fN1KMalD6yheCEHo85dLe3MKstSSLuMJAtsqMvS7bgITUEiWr7yhU9coWg\nk8CczhSzO1K0tYycq8zUnwWMaUhV2dKbYeOOQTqnSOO2mbo8X8kVgtmKCavE0snYhFcl+b6SCfeT\nTDjM60ozy9o7JpUFjGlmOjRuGzOWoueTDds70skYc7uC9o6Jnu/M7M8CxjQy3Ru3jamk6PoM5Yug\nkG6J7w0eVg078SxgTAOZcGbZbX0ZYo5DuzVumxkqWOXRxVdoTyc4qDNlPa0mkAWMKSpYp6LA1nDS\nwLgjtLUk6jqzrDFTSb4YjIlRoDOV4KCuNB3phAWPA2ABY4rJFlx2DuTY3pvFVyVtPaCMGVMpeAAc\n1Jli3qxWaywfBwsYU4Dn+/RnCmztzTKQLRKPCa0tCWJWmjCmJr4qQzkX1/Pobm/h4FlttKesi25U\nFjCaWLbgsnsgx9beLL6vpJt4skBjphJVJVvwyBU9OlJxDulutV6FEVjAaEIF1+OZbQP0ZfLEHIe2\nlJUmjKmXfDFoKE8kHBb2tNHd3mIjyquwgNGEntraR99Qgc7WZKOzYsyMUXR9BnNFYo4wv6eV2R0p\nayAfxgJGk+kdyvP4c310tyWtXtWYBvB8n8FsEQXmdaWZ25W2AYEhCxhNxPN9Htmwm0TMsSsbYxrM\n95WhfBHXU+KxfbMXl2YDLs1gLA7B7MbhrMExx8EJbxMxh5gjwV/Z/anaVjKegGGhtk627slQdH1b\ny9qYJuA4Qkc6qBb2/X2zE5duix6At3dK+9JsxrBvlmNfFWHkWiQxR/bOcJyMBxeIyViwYFZna3LK\nBpRKLGDUQSbvsnl3hi5rtzCm6ewbEDsxJ3LfVzxViq5PruDhayFYAMvzWTi7jYWz2ydkP83AAsYE\nU1XW7xygJRGzkdrGzACOE6xuyLCaZ1+VzbuH6Eglps18cNbfbILtGsgxkCnS2mKx2JiZzBGhPZVg\n3bZ+Cq7X6OxMiLoHDBE5Q0TWisgTInJhhdf/RUQeEJH7ReQREXFFZFb42rMi8lD4+r31zuuBKrge\nG3YO0pG2dgtjTLB6o4jwzLaBvW0iU1lde0mJiAM8AbwKeA5YA5yjqmurbP964AJVPS18/DTwV6q6\nZ4z9NEUvqWe397NrIG9jLowx+9kzmGfhnDbmd7c1Oit7jaeXVL1LGCcAT6rqelUtAjcAbxhl+3OB\n/y57LEyRarP+bIFtvVkrXRhjRuhqTbJp5xD92UKjs3JA6n0yXgBsLHu8KXxuBBFJA2cAPy57WoHb\nRWSNiPxD3XJ5gDxfWb99gLZUwgboGWNGcByhLRVn3da+Kd2e0UxX72cBd6pqb9lzJ6vqcuBM4AMi\nckpjsja67X0ZckXPJhM0xlSVjMdQFZ7dPnXbM+rdlWczsLjs8cLwuUrOYf/qKFR1S3i7Q0RuIqji\nurNS4pUrV+69v2LFClasWDHePNckV3DZuGuIrrS1WxhjRteRTrBnMMe23gyHTHJ7xh133MEdd9xx\nQO9R70bvGPA4QaP3FuBe4FxVfWzYdl3A08BCVc2Gz7UCjqoOikgbcBtwmareVmE/DWn0VlWe3BKs\nu91mS6kaYyLwfaU3k+cFC3sa2ubZdI3equoBHyQ42T8K3KCqj4nI+0TkvWWbvhFYVQoWoXnAnSLy\nAHAPcGulYNFIewbz7BkqWLAwxkTmhEswT8X2DJt8cJyKns+fN+yiJREnEWumpiBjzFQwkC3QkUpw\n+CFdDeks03QljOnsud1D+D4WLIwx49KRTrJnKM/2vuzYGzcJO9uNw2CuaGMujDEHrLO1hWd3DDCY\nKzY6K5FYwKiRr8qz2wdIJ2M25sIYc0BiYXvGU1v7KHp+o7MzJgsYNdrRlyVbcG3VLmPMhGhJxPA8\nZf2OAZq9TdkCRg3yxdLkgjbmwhgzcTpbk+wayLO9L9forIzKLpNr8NzuIWJOsDSjMcZMpK7WJOt3\nDBCPybjOMaqgBKsDer7iq+KXblXxfVD18TQYCzIeFjAiKrgeOwdyNhOtMaYuYo7Q2hLn6W39B/xe\npWVhJVybXCrcHw8LGBHtGcwDTKv1eY0xzaUlEWvqOemsDSMCX5UtvRkb0W2MmdEsYEQwmCtSKPo2\nSM8YM6PZGTCC7b3Zpi4mGmPMZLCAMYZ80WP3UJ500gKGMWZms4Axht2DuQPqVWCMMdOFBYxReL6y\ndY/NGWWMMWABY1QD2QKu5xNz7Gsyxhg7E45ia2+GFmu7MMYYwAJGVbmCS3+mQNomGTTGGMACRlW7\nBnJWFWWMMWXsjFiB5/ts7c3ayG5jjCljAaOCvqECnq82K60xxpSxgFHB1t4MrS3WdmGMMeUsYAwz\nlC8ymHNtKhBjjBnGAsYwO/tzxG2SQWOMGcHOjGVcz2dHf5Y2q44yxpgRLGCU6R3KowqONXYbY8wI\nFjBCqsqWPdbYbYwx1VjACA3lXbIFj2TcGruNMaYSCxihHX1ZEnH7Oowxpho7QwIF12PnQM6qo4wx\nZhQWMIA9g3kAHFskyRhjqprxAcNXZUtvxuaNMsaYMcz4gDGYK1Io+iRssJ4xxoxqxp8lt/dmbRoQ\nY4yJYEYHjHzRY/dQnrStqmeMMWOa0QFj92AOAcQau40xZkwzth+p5ytb92TpSFtj92TYuGED1151\nA9u3wdx5cMGF57Bo8eKmSDPZ+zJmqpqxAWMgW8D1fGKOBYxajPfEev47rmfD+s8BbcAQD91/Cdf/\n8PyqaScrzWTvy5ipTFS10Xk4YCKitX6OtZv3UHB90skZGzNrVukkuXjJ/idJ14W+XmHPbmHPHqF3\nt/CNr17Jww9cFKYpGWLJoVfx0lMuxvcF3wf1wfPB9+G+1ZezedOFI9IcsuAqjlv+6Yr5e/D+y9my\nubY0IvDgfZfzXIV0Rz7/Kt741k+SboV0q9LaqrS2Bfe/8ZUr+cPvPjkizVlv+gLXfO0TY36PVjIx\njXbk/Fmoak318TPybOmrMpAtMqutpdFZmVKuveqGsmAB0MaG9Z/jLa+9ms6uz7Bnt0NmCDq7lO4e\nZVa30t2tbNkM+59Yg8ciPke90MeJKY6A44A4we2jD/sV07S1+Zz+Ordi/p58vHqa15w5Mk3pGuOJ\ntZXTDQ367NzhkMlAJiNkM0I2vL/2Uadimj/e4XD5JSnmL/RZsNBn/kJlwUKf7h5F5MBKQVZlZhpt\nRgYME4hyQtmyWbj3njir74pz2y9iVDpJLljo8m//nmFWt9LZpTjDulJ87AM+t940xPCr8aOPVc59\nV6Fi3v74O2XdkyPTHPVC5cyzixXT/GaV8tQTldO87g2V0wD87vbK6ZYfr1x0aa5imo99wKv4mQ4/\nEuYv9Hluk8Oae+I8t8nhuc1CIS8cssBnoP9GdmwfGXQ//5kruOZrH6e1NSj1lLMqM9MsLGDMUNVO\nKFde+x42bVzGvXfFWX13jMEB4YSXepzwUpcd231+/5uRJ8llh8PSZX7VfV1w4Tk8dP8lI6qyLrjw\n/Ianmeh9XfXl81m0eGQQHByELZsdPvpPHju2jwy6d/7e4WXHdOIrzJ6t9Mz26Zmj9MxWHnngRxVL\ndp/8yJW870MX4ThKLAaxWFA6i8Xgumt+WDHNtVdZlZkZv8htGCKSBhar6uP1zVLtam3D8FW5b92O\naVUlVeuP/GMfuJpbb/oUw0/+yeS/8qrTP8XxJ3mc+DKXw47w95YYorRhTFT+JjPNZO6r2vdeavvI\nZGD3LmH3LoddO4XdO4Wvf/kLbFx/+Yj36pp1CS865jN4vuB74HkEtz489cRlZIY+VzHNm972aQ49\nzOfQwzyWLvOZd7DuLdUcyP/YTC3jacOIFDBE5Czgi0BSVQ8VkeOAz6rq2ePL6sSa6QGj2o/83797\nPuIsZfNGZ+/fpo3Cpo0Oj/35s7juZ0e814kvu5jv31j9CtSuPg/MeE7IYwWZWtIcf9KVvPLVn+Lp\ndTGeWefwzDqHXFZYuiwIIE+svZwn1o7sADBWY74dF1NPPRu9VwInAHcAqOqDInJoTbkzdfNvV1Zu\njD7rVf/KkqWXsGCRH/4pr36Rx8JFPt/6us/tvxxZvTR33uj7WrR48ZhVGqa6RYsXc/0Pz+faq75Q\ndnId/ep9IqvMrrx2ZJVZfx88sy7G0+sc7l9TuQPAHb9x+Mg/ppl3sDLvEJ+585R5B/vMPdinWFjP\n+95l7SUzQdQSxj2qepKIPKCqLw6fe1hVj6l7DiOYiSUM34cH/hRj1c8T/OC7n6dYrK20YFUPU0uj\nq8xeesoVvOWcT7Fti7B9m8O2rQ7btwrbtjo899xnUf/jI9JE6WJsGqeeJYxHReQ8ICYiRwAfBu6q\nNYNmbKP9yH0f7lsTY9XPEqz6RYLOTuWM1xc5+RU+d1RojB6ttDCeK13TOOMp2Y0nTbWSyeVfPJ9F\niyv3NHvnW4rce/fIUsltv4xx2adSvHyFy4knu7QN3wSryppqopYwWoGLgdeET60CLlfVyn0OJ9l0\nKWFUuupftOQSPnLh/+H+NYdx2y8SdPcEQeL01xc5/Ai/ajorLZjxmqgOFK945RWc+NKL+ePvEzzy\nQIwXHetxyqkup5xa5KgX+WzeZMdtI9Wt0bvZTZeAUe2H19l5Ne95/yc54/VFlh1eufuqXamZRoly\nwZLJwOq74tx5R/DX3y+k0ivZvLH2BnYzMepWJSUitwNvU9Xe8HE3cIOqnl57Nk0127dBpQbHo17k\n8k8X5EdNa43RplGiVG+2tsJfn+by16cFI+43bRTec27lBvZNGyYv76Y2Udsw5pSCBYCq7hGRuXXK\n04zU1wtbt8SA2nsuGdNotV6wLFykHH2s8uzTI4/3Rx9JsOL4Dl5ykstLTnQ5/kSPZYf7+40VsdJ0\nY0QNGL6ILFbVDQAisgSY+nVZTcD34ab/SXDNFSle9vJ34rmXsGljbSOVjZmKqjWw/+cN5+MWh1iz\nOsafVsf5xldSZLPwVyd4HHHk0/z4f65n2xbrwtsIURu9zwC+CfweEODlwHtVdVV9sxfNVG3DeOxR\nh8s+laZYhJVX5Dj6WM+unsyMEvV437JZ+NPqOF+55krWPzOy3eP0113Bdd/6+KTlezqoa6O3iMwB\nTgof3qOqO2vMX91MtYAxOABf+WKKW36S4IJP5HnbeQVitkqsMWP627dezeq7Pj/iecf5DIuXXsqx\nyz2OW+5x3HKXI4/ySYTL3diF2Ej1nt68BdgdpnmBiKCqfxgrUVg6uZZgOdjvqOpVw17/F+BvCKq4\nEsBRhG0mY6WdalTh5z9NcOXnUrxihcsv7hikZ7bV7BkTVdCeN7Ld48yzPd7/4QwP3hfjwfvj/Nf3\nkmze6PCCoz2WHf40v73tenbusGqsAxW1Suoq4B3Ao0CpX6eONZeUiDjAE8CrgOeANcA5qrq2yvav\nBy5Q1dNqSdusJYzyq5p0q9C7+3xyuaWsvCLL8uO9uu7bmOmoljFHA/3w8IMxrrzsah5/bGQ11ste\nfgVfvz6YUn60/U3Xkkk9SxhvBJ6nqqP37RzpBOBJVV0PICI3AG8AKgYM4Fzgv8eZtqlUOrC7ey7h\nh7ecz9Jl0+OAM2ay1TJDQUcnnPwKj1ndHpW67z70gPDSoztZcqjPsctdjn2xx7HLgx5ZsZitKVJJ\n1IDxNEF1Ua0BYwGwsezxJoJAMEI4ffoZwAdqTduMKq1Ot2f357juGhuUZMyBqLULb7VqrFe+2ueK\nL/Xz2KMxHro/xt13xvnGV1vYucPh6GM9du6ovA5JlDVFpquoASMDPCgiv6EsaKjqhycwL2cBd5aP\n95iqdu8S7vpj5SU8g8F5xpjJMtpsv8kWOHZ5ULIo2bNbePjBGCsvqjywcOP6oD1y+MqIM0HUgHFL\n+FerzUB52W1h+Fwl57CvOqrWtKxcuXLv/RUrVrBixYracjoBVOGWnyS48rIUXbOEXTttEJ4xjVbr\nRJvdPcqpr3RZfryyedPI3/Bjf0nwipd0cPyJLie81OMlJ7kcNgEDC+s9I/Hqu/7I6rvuHDMfo6nr\nXFIiEgMeJ2i43gLcC5yrqo8N266LoNproapma0kbbtvwRu9NG4VLL0yzY7vD56/JMqv7GZtYzZgp\nrFoD+3/ecD7qL2XNPTHWrI6z5p44mSE4/iSPI454mht/+A22bqntdz+eCUQPdNLReq64dwRwBfAC\nIFV6XlWXRUh7BvBl9nWNvVJE3hck12+G27wbOF1VzxsrbZV9NCxgeB58/z+TfP3aFv7+/QXe8/68\n9f02ZpqoZWDhmtVxvvLFK9nw7MgeWYsWX8UrXnkx6TSkW5XWViVVdv8H372S1XddNCLdqa+6gk98\n+kJE2PvnOMHtFZddxW9v++SINFFXR7z1pi/ULWDcCVwK/BtBW8P5gKOqn6llZ/XSqICx9i8OF/9L\nmnQaPnd1lkMPqzyTrDFmZqg2sHDZ4Z/mb/7uYrIZyGaETFbIZSGTEbIZYfX/fpa+vpFrsKfTn2HB\nokvx/aDKu/Tn+7Bt62UU8iMXTpt90CWc966LWbzUZ8lSn8VLfbp7gnXb9y+VtNetW21aVX8jwZl5\nPbBSRO4DmiJg1Nvwq4x/uuAcbvnJEdzw/SQf+2SOt55bxHEanUtjTKNV65H1wqOVv31PoXIi4GMf\n8Ln1ppHpTjvD45qvDVZJ41VMs2gRuC787vY4G9Y7bHg2hu/DkqU+u3f/iC2bP8fIxvxoogaMfDiQ\n7kkR+SBB43P7uPY4xVSqJ/zlrZ/hpS//R279zVzmzrOR2saYwHjWXx9vumpprvn6+SxavP8IiN49\nwvpnHS66oFLPr+iiVkkdDzwGzAI+B3QBV6vqPePe8wSqZ5VUtUWNbJEXY0wlzdpLCoafz8RW3Iui\nloBRrU7yxJddzPdvtIBhjJk6DrQNI1LNu4i8RERuEpH7ReTh0t+4cjzF7KuTLGfjKYwxU09pTMpZ\nb/rCuNJHrZJ6HPg48Aj7Jh+kNM9To9WzhLFxwwbe/fbr2bTBxlMYY6aPek4+uENVxzPSe8pbtHgx\nf/cP7+Ub113NssPdMUeJGmPMdBU1YFwqIt8Ghs8l9ZO65KrJrL7rMD560YW85Zxio7NijDENEzVg\nnA88n2DG2r3rYQDTPmAMDsDdd8a54kuZRmfFGGMaKmrAOF5Vn1fXnDSpX69KcMJJLp1djc6JMcY0\nVtTxyXeJyAvqmpMm9fObE7zuDVYVZYwxUUsYJxGsh/EMQRuGEEweeEzdctYE9uwW7rs3zrX/YdVR\nxhgTNWCcUddcNKnbfxnnlFNd2sY/kt4YY6aNMQNGuC7FKlV9/iTkp6n8/OYk57271lVpjTFmehqz\nDUNVPeBxEZlRAw927hD+/HCMU1/pNjorxhjTFKJWSXUDj4rIvZTNk6GqZ9clV03gVz9L8NenFUml\nG50TY4xpDlEDxiV1zUUT+vnNCd77AauOMsaYkkgBQ1V/LyLzgOPDp+5V1e31y1ZjbdksrHvS4eRT\nrTrKGGNKos5W+3bgXuBtwNuB1SLy1npmrJF++bMEp53ukkw2OifGGNM8olZJXUww2ns7gIgcBPwa\nuLFeGWukn9+c4KMXWnWUMcaUizrS2xlWBbWrhrRTyoZnHZ7b5HDiyVYdZYwx5aKWMH4lIquA/w4f\nvwP4RX2y1Fi/uDXB6a8rEo/6zRhjzAwxailBRFoAVPXjwDeAY8K/b6rqhfXP3uSzuaOMMaaysa6j\n7waWi8j3VfVvmebTmT/1pMOe3cJfneA1OivGGNN0xgoYSRE5D3iZiLx5+IvTbQGlX9yc4MyzizjT\nsnXGGGMOzFgB4/3A3wCzgLOGvTatFlBSDaqjrv5yttFZMcaYpjRqwFDVO0XkLmCTqn5+kvLUEI89\n6lAsCMe82KqjjDGmkiiTD/rAtB2kV/LzmxOc+YYCIo3OiTHGNKeotfW/EZG3iEzP06kq/PKWJGee\nbb2jjDGmmqgB433Aj4CCiPSLyICI9NcxX5Pq4QdiJJLKUS/0G50VY4xpWlEnH+yod0YaqTT2YnqW\nn4wxZmJEnXxQROSdInJJ+HiRiJxQ36xNDt8PRnefaYP1jDFmVFGrpL4OvBQ4L3w8CHytLjmaZPfd\nG6O7Rzn8CKuOMsaY0USdMelEVV0uIg8AqOoeEZkWk3/bVCDGGBNN1BJGUURiBIP1StObT/lLcteF\nVT9PcObZhUZnxRhjml7UgPEV4CZgroh8HrgT+ELdcjVJVv9vnPkLfRYv0UZnxRhjml7UXlI/EJH7\ngFcBArxRVR+ra84mgVVHGWNMdKMGDBFJEcwndTjwCPANVZ0WKwsVC/DrVXE+9LFco7NijDFTwlhV\nUt8DXkIQLF4LfLHuOZok99yZ5LAjfA5ZYNVRxhgTxVhVUi9Q1aMBROQ7wL31z1J9PfPMej796ev5\n1SqX+QuFjRvezqLFixudLWOMaXpjBYy9Ffyq6k71qaSeeWY9r371daxbdxnQxu5dQ5z/jku4/ofn\nW9AwxpgxiGr1KhkR8YCh0kMgDWTC+6qqnXXPYQQioqN9jpJ3vvMyfvCDfwHayp4d4qw3fYFrvvaJ\nuuXPjJ/nK0XXY7wVh6rgq6IKqooS3AKIBK9DeECHaRwR0i1xEjFbSctMX0fOn4Wq1lQKGGs9jNiB\nZam5bN7ss3+wAGhj+7ZG5MaUU1VcTym4HkXPD87eAomYQ1tLYlyrIDoCjuPgCMQch5gjOCI44a0I\n4a3gCIgIqkp/tsCO/hyDuSICtLbEScan1U/BmHGJOtJ7WliwwCEoMO1fwpg7r0EZmqF8Xyl6PgXX\nw/MVNDhZp5IxetpbaG2Jk0rGaUnEGnKV35ZKcEh3G7mCS3+2yM7+HL2DeRClJREnlYgx1atnjRmP\nUaukpoqoVVLD2zBgiMVLrA1jsgzmiriuTywmtKUSdKYTpJPBCTiZiOE08Um44HoMZIvsGsjRlymA\nQjIRI5Vs7nwbU814qqRmVMCAfb2kHl+XZ/58hwsuPMeCxSToG8rT2hJn2bxOUsmpXbB1PZ/BXJHd\ng3n2DObxw2PPEQmqvZzgNhZWh1lpxDQjCxgR+arct24Hs9pa6pgrA8F33TeUZ3ZHiqVzO4iNpzGi\niXm+ksm7FD2PgutTKHoUPJ9C0afgebiej/oErersa1x3RIjHHJJxh7g1rpsGmPBGb2MOhOv59GcL\nLOxpY35P27S80o45Qkc6ASQqvq6qeL7i+orn+3he8LjgeWTzHv3ZAoO5oPe6I0Iy7pCMx3Cc6fdd\nmanPAobGXIcnAAAXn0lEQVSpi3zRI5N3OfzgLmZ3pBqdnYYREeIxIehkVbmnVdHzyRU8Mvki/Zki\n/bkCvh90BU7Eg1JIIuZMy4BrphYLGGbCZfIunu/zgkXdtKcqX3mbfRIxh0TaoSOdYN6soFSSd31y\nBZeBXJG+oQJ9mUKpp/GIwBEljARJgq7E5e9R+XHwZCzsgmxMiQUMM6H6MwVaEjGet6CHVMLGLoyH\niJBKxEglYsxqa2HRbPD8oBSSK3p7Bx6WN9vp3tuRT4Zb4/tBm1JpIKOvivqKVxrUqIq/93nIFl18\nVTrSSWIWOAwWMMwECRq3C/S0J1k6t9MacidYzHFoSzm0TWKJzfV8dvRn2bx7CAjaaqwL8cxmAcMc\nMM/36RsqML+njQWz2+ykMk3EYw6HdLcxuyPFtr4sW/ZkiDsO7am4tafMUHW/DBSRM0RkrYg8ISIX\nVtlmhYg8ICJ/FpHflT3/rIg8FL425WfKnY4KrkdfpsCygztZNKfdgsU0lIzHWDS7nWOWzGZWW5Le\noQKZ/LRYFsfUqK4lDBFxgK8SrNT3HLBGRG5W1bVl23QBXwNeo6qbRWRO2Vv4wApV3VPPfJrxyRZc\niq7PUQu76UwnG50dU2epRIxl8zqZNyvNpl1D7B7M05qMTfmBmCa6ev+nTwCeVNX1ACJyA/AGYG3Z\nNucBP1bVzQCqurPsNWESSkGmNp7vM5AtkkrEeOGibjthzDBtLQmeN38W/dkCm3YOsmcwT1vKJmic\nCP2ZPH7EMchSdq/U2SGdjJOM168Ldr1/6QuAjWWPNxEEkXJHAomwKqod+Iqqfj98TYHbw2nWv6mq\n36pzfpuG72vQxbGJqniyBZd80SPmCAt62jioK21TgM9gnekkRy3spncoz8ZdQ/QO5WlPJazDwzhl\n8i6pZJznzZ+1tzvzvp5wuvf+3h5xZd3kPF8ZzBXZ1Z+jb6gAElQlTvRcZ81waRgHlgOvJJgR8G4R\nuVtVnwJOVtUtInIQQeB4TFXvbGRmJ8NQrojr+wiCr7q3n3winEZiMk/Snu8zlCt1r0yweE67dbM0\ne4kI3e0pOltb2D2YY9OuQbycoijxWGzv1CfWtjW6YOZmn+ct6Nk/4MqIO1Wlk3EO6kxTcINBs7sG\ncuwZKqC+Eo87pJPxA/7d1jtgbAbKZ/ZbGD5XbhOwU1VzQE5E/gAcCzylqlsAVHWHiNxEUDqpGDBW\nrly59/6KFStYsWLFBH2EyaOq9GcKtLbEOergbhJxZ+/8RNmiy1C2yFDeZShcp0EJpqYozUk0kfM0\nlZcmDulupae9xaqeTFUxRzioM01Pe4p80aPgegzmigxkiwxmi/tN0JiMOyTiMbvoCPm+Mpgt8vwF\nsyZk7FIyHiMZD8bweL7PUN6ldzDPqtt/y71334mE68yMR10nHxSRGPA4QaP3FoI1wc9V1cfKtnk+\ncB1wBtACrAbeATwLOKo6KCJtwG3AZap6W4X9TPnJBz1f6csUmNeVYtGc9lFP/p4fLDSUL3pkCy5D\nOZfBXJGi5++9DgmCiRNMS+E4xGIy5lVeqTTh+T6drUkOntVqpQlzwEoj14PjtchAJjheXd/fu00y\nHtt7rM600eV7BvMsPqidg2e11nU/qkqm4NKfKbCzP8cxS+c01+SDquqJyAcJTvYO8B1VfUxE3he8\nrN9U1bUisgp4GCi1VfxFRA4FbhIRDfP5g0rBYjooXY0tPaiDuV3pMdstYo6QTsZJJ+P7BT3X8ym4\nPq7nU/T8vQElW/DIZov7GtPCOSZKU3CLQK7ghv3urTRhJlb5yPWu1iQHzwqeDy56fHLF4KInW3AZ\nyhfxfN3vwqdUDRuPybSb7TiYybmFeV3puu9LRGhrSdDWEiwQNq73sOnNGyuTd3E9n8MP7qSrzvlx\nvVIwUVwv+KHmisGqd3M6UlaaME2hdMFTdINVGTOli56wG/doytdmH36fssfpcEXHRsrkXRwHjlrQ\n3ZCOAuGSxM1TwjCj6xvKh70iJqdrajwWNEDumzu28QHTmOFKx2mloT1euLxvMSxJA3t7E4bzKyJl\nkyyOfCxkckW29mbYM5gnHpNwzfjJvVAquj5Fz+NF83umVK8yCxg1GMwG9a4HenXi+0pfJk9PR4ql\nB3VMqQPGmEYKqlFjB9Q4nErE6OlIBT2JBnNs783iq05aqcP3lYFcgefNnzXlqn6nVm4bzPV8Fh3U\nzu7BPL2DeZDai7ZFz2cgW2DRnHYOmdXaVOMsjJlJWlvitLa0M7+7lb6hAlv7sntLHa0tibpVz/Zl\n8iya094UVeK1soARUcH1SLXEOHhWKwfPaiVfDFZL29Gfixw8Sl1Vjzyki+72mbuokDHNJOY49HSk\n6OlIkS247BwISh2er7S2TGypoz9ToKcjxSF17hFVL9boHVF/psD8ntaKvQtKwWNnf46BcIzE8ODR\nnymQiDsccUgX6SlWDDVmpvF8n/5Mga29WQayRWKO0JY6sFJHtuAiwPMXdjfFDAnW6F1Hvq90tlae\nYK8lEeOgRHrvKMtSP+c9Q3mEIED1tLWwdF5nUxwoxpjRxRyH7vYU3e0TU+oodXN/4aKeKX0OsIAR\ngecrsZhEKhkk4zHmdKaZUxY8PF85qCtt0yMYMwWlk3EWza7c1hGlh5WvykC2wJGHdNHaMrVPuVM7\n95MkV3DpaU/VfMIvBQ9jzNRX3tZRSw+rvqECC3rapkW7pQWMCIquT3f71OvRYIypjxE9rKqM6xjI\nFpjVlmR+z/hGVjcbCxhjUFUQpnxR0hgz8YaXOnb2Z9nen0NVicccYo6wdG7HtKmOtrPgGAquT2c6\nMaUbqowx9dfaEmfxQR0smN1G71CBXQM5Fsxum1YLS1nAGEOu4DJvbkejs2GMmSJijsPsjhSzO6Z+\nm8Vwdtk8Bh/oSCUanQ1jjGk4CxijcD2fZNxp+KyWxhjTDCxgjCJb8JjTkbL5nowxBgsYo/J8n65W\n605rjDFgAaMqXxURse60xhgTsoBRRb7oMavNVqAzxpgSCxhV5IsePVNwvnpjjKkXCxjVKLSnrTut\nMcaUWMCooOB6pFvi02qEpjHGHCgLGBXkCh5zOq06yhhjylnAqMD3lY505cWSjDFmprKAMUwtiyUZ\nY8xMYgFjmPEulmSMMdOdBYxhbLEkY4ypzAJGmdJiSW02utsYY0awgFGmtFhS3BZLMsaYEezMWCZX\ncOmZhoueGGPMRLCAUcYWSzLGmOosYIRKiyWlrDutMcZUZAEjVFosyRhjTGUWMEK2WJIxxozOAga2\nWJIxxkRhAYNg7YtuWyzJGGNGZQGDUsCw6ihjjBmNBYyQLZZkjDGjm/EBo+B6tCZtsSRjjBnLjA8Y\nuYLH7E7rTmuMMWOZ8QHDV6XDqqOMMWZMMzpgeL4SdxxabXS3McaMaUYHjGCywRbEFksyxpgxzeiA\nUfR8Zll3WmOMiWRGBwywxZKMMSaqmRswFDpbbbEkY4yJauaeLQV62q07rTHGRDVjA0Y6GbPFkowx\npgYzMmAIMKcjbYslGWNMDURVG52HAyYiOh0+hzHGTBYRQVVrGlMwI0sYxhhjamcBwxhjTCQWMIwx\nxkRiAcMYY0wkdQ8YInKGiKwVkSdE5MIq26wQkQdE5M8i8rta0hpjjJkcdQ0YIuIAXwVOB14InCsi\nzx+2TRfwNeD1qvoi4G1R0x6IO+64Y1LSTNd9Wf6mzr6aPX+Tua9mz99k76tW9S5hnAA8qarrVbUI\n3AC8Ydg25wE/VtXNAKq6s4a04zZdDwD74Y0/zXTdV7PnbzL31ez5m+x91areAWMBsLHs8abwuXJH\nAj0i8jsRWSMif1tDWmOMMZOkGYY6x4HlwCuBNuBuEbm7sVkyxhgzXF1HeovIScBKVT0jfHwRoKp6\nVdk2FwIpVb0sfPxt4JfA5rHSlr2HDfM2xpga1TrSu94ljDXA4SKyBNgCnAOcO2ybm4HrRCQGtAAn\nAl8CHo+QFqj9QxtjjKldXQOGqnoi8kHgNoL2ku+o6mMi8r7gZf2mqq4VkVXAw4AHfFNV/wJQKW09\n82uMMaa6aTH5oDHGmPqbcSO9ReQ7IrJNRB6uIc1CEfmtiDwqIo+IyIcjpGkRkdXhgMRHROTSGvbn\niMj9InJLDWmeFZGHwv3dGzFNl4j8SEQeCz/biRHSHBnu4/7wti/i9/GRcGDmwyLyAxFJRszjP4ff\nX9XvvdL/VES6ReQ2EXlcRFaF432ipHtrmE9PRJZHTHN1+B0+KCI/FpHOCGk+W/b/+pWIHBxlX2Wv\nfUxEfBHpibCvS0VkU/g/u19Ezoi6LxH5UPjZHhGRKyPs64ay/TwjIvdHSHOsiNxdOnZF5CVR8ici\nx4jIXeH3eLOItA9LU/F3O9qxMUqasY6L4ek+FD5f9dgYJU3VY6Na/sper3ZcVNvXmMfGflR1Rv0B\npwDHAQ/XkOZg4LjwfjtB+8rzI6RrDW9jwD3ACRH39xHg/wG31JDHp4HuGr+L7wLnh/fjQGeN6R3g\nOWDRGNvND/OXDB//EHhXhPd/IUFVZUv4Hd4GLIvyPwWuAj4R3r8QuDJiuucBRwC/BZZHTHMa4IT3\nrwSuiJCmvez+h4B/j3qsAguBXwHPAD0R9nUp8NFafxfAivA7j4eP50TJX9nrXwQ+HWE/q4DXhPdf\nC/wuYv7uBU4J7/8d8NlhaSr+bkc7NkZJM9ZxUS1d1WNjlDRVj41qaSIcF9X2NeaxUf4340oYqnon\nsKfGNFtV9cHw/iDwGBHGhKhqJrzbQnBCHrP+T0QWAmcC364ljwTrQkX+f4ZXOi9X1evDvLqq2l/j\nPk8D1qnqxjG3DE74bSISB1oJAs1YjgJWq2peVT3gD8Cbh29U5X/6BuB74f3vAW+Mkk5VH1fVJwm+\nzxGqpPm1qvrhw3sIfrhjpRkse9gG+AwzyrH6b8DHo+YvNGrHkCrp/pHgZOqG2+yMkKbc24H/jpDG\nB0pX+bMIekhGyd8R4fMAvwbeMixNpd/tQkY5Nqr91iMcF9XSVT02RklT9dgY41w02nExWrrInYZm\nXMA4UCKylOBKZ3WEbR0ReQDYCtyuqmsi7KL0T6+1cUmB2yUY/PgPEbY/FNgpIteHRdFviki6xn2+\ng2EnhIoZU30OuAbYQHAy6FXVX0d4/z8DLw+rEFoJAumiiHmbq6rbwv1vBeZGTHeg3kPQLXxMInK5\niGwgmO3gMxHTnA1sVNVHaszXB8NqkW9Lheq5Ko4EXiEi90gwsHZEVdEo+Xw5sFVV10XY/CPAF8Pv\n4mrgkxF382j4fUAQnBZW27Dsd3sPMC/KsVHLbz1iuqrHxvA0UY6N8jS1HBcV8hf52LCAUYOwjvRG\n4J+HXQVUpKq+qr6Y4EA+UUReMMb7vw7YFl4JCDVEfuBkVV1OcFL9gIicMsb2pQGTXwvTZYCLou5M\nRBLA2cCPImw7i+CqbglB9VS7iJw3VjpVXUtQfXA78AvgAYKedONR994dInIxUFTV/4qyvap+WlUX\nAz8gqHoY6/3TwKcIqhH2Ph1hV18nqMo7juDi5UtR8kdwjHSr6knAJ4D/iZgOgi7wY15MhP6R4De1\nmCB4/GfEdO8hONbXEFyJFyptVOF3O/xYGHFs1PpbHyvdaMdGpTRjHRvlaQh+E5GOiwr7qunYsIAR\nUViVciPwfVW9uZa0YVXP74DRG5TgZOBsEXma4Mf21yLyfyPuY0t4uwO4iWAurtFsIrgi+VP4+EaC\nABLVa4H7wv2N5TTgaVXdHVYt/QR4WZSdqOr1qvoSVV0B9AJPRMzfNhGZBxA2Gm6PmG5cROTvCIL1\nmIGwgv9iWHVKFYcBS4GHROQZgguR+0Rk1NKTqu7QsPIa+BZwfMR8bST4XxGWjn0RmT1WIgnGVL2Z\noK0qiner6k/D/dzI2Mcu4bZPqOrpqno8wVxzI0ozVX63ox4b4/2tV0s32rERYV8jjo0KaSIdF5X2\nVeuxMVMDRq1X7xBc9fxFVb8caQcic0rFu/DK8NXA2tHSqOqnVHWxqi4jGKj4W1V9V4R9tYZXDohI\nG/Aaguqc0fa1DdgoIkeGT70K+MtY+ypTyxXkBuAkEUmJiIT7ijSmRkQOCm8XA28i+AFV3JT9/6e3\nEDSEArybYIBolHTDXxszTdiz5OPA2aqaj5jm8LLX3kj172NvOlX9s6oerKrLVPVQgqD/YlUdHgyH\n76u8B9abqX5sDP8ufkowZQ/hcZJQ1V1jpIHgWH8srIqMsp/NInJquJ9XUf2iYPjnKh0bDvBp4D8q\npKn0ux3r2Bjrt17tuBiRLsKxUSnNWMfGfmlqOC4q7SvqsRHQiK3j0+WP4ITzHJAnOJGdHyHNyQTF\nvgcJqkXuB84YI83R4XYPEvT0ubjGfJ5KxF5SBO0Rpbw9AlwUMd2xBKPxHyS4kuyKmK4V2AF01PB5\nLg0P/IcJGhoTEdP9ITyIHwBWRP2fAt0EDaGPE/T0mRUx3RsJrqyzBDMM/DJCmieB9eH/+37g6xHS\n3Bj+rx4kOGEdUuuxStDzbHhvmEr7+r/h9/4gQRCYF/G7iAPfD/P5J+DUKPkDrgfeW8P/6mXh+z8A\n3E1wsouS7sPh/3ct8IWov1ugp9qxMUqasY6LSuleO9qxMcq+qh4b1dJEOC6q7WvMY6P8zwbuGWOM\niWSmVkkZY4ypkQUMY4wxkVjAMMYYE4kFDGOMMZFYwDDGGBOJBQxjjDGRWMAwTSucpvlfyx5/TEQi\nzbkU4b2vF5ERExlONAmmxf6LiPxm2PNLRCQj+6aJvz8ciVvr+y8RkYorURoz0SxgmGaWB948fG7/\nRgunvojq74H/o6qvqvDaU6q6XFVfHN6648jOoYxjOpJwdLQxNbGDxjQzF/gm8NHhLwwvIYjIQHh7\nqojcISI/FZGnROQKETlPgsWsHhKRQ8ve5tXh7L5rw4kfSzMMXx1u/6CEM/+G7/sHEbkZeLRCfs6V\nYHGoh0XkivC5SwjWcfiOiFxV4fONmGIinOblOxLMEHufiJwVPr8k3P+fwr+TwiRXAKeEJZR/FpF3\ni8h1Ze93q4i8ovQdicgXJZhB+SQRWR5+V2tE5Jdl8yt9WIKFdh4UkUgTKZoZIurUDvZnf5P9B/QT\nLPbyDNABfAz4TPja9cCby7cNb08FdhNMWZ0kmFfn0vC1DwNfKkv/i/D+4QTTPiSBfwA+FT6fJJg6\nZUn4vgPA4gr5PIRg+ocegouw3xDMHQTBpJOVprpYQjBDcGnKiOvC5z8PnBfe7yKYviINpNi3ANXh\nwJqyz3tL2fu+G/hK2eNbgVeE933gLeH9OPC/wOzw8duB74T3NxNO3UKNi2rZ3/T+q7nO1JjJpKqD\nIvI9gmmcsxGTrdFw4jURWUcwXxAE8/OsKNvuf8J9PBVu93yCiRuPFpG3hdt0Eqy0VgTuVdUNFfZ3\nPMEqcbvDff4AeAXBJHdQfbK6pzSYWr7ca4CzRKS0EE4SWEwwf9FXReQ4gjmBjqj+8atyCWefJVhB\n7kUEa6iUFt8qTRb4EPBfIvJTgvmFjAGwgGGmhC8TXIVfX/acS1ilGp7wytcIL58V1C977LP/MV8+\nkZqEjwX4kKreXp6BcDbVoVHyWOvsx6N5iwaru5Xv/1KCBYmOCdtQqgXPvd9LKFV2P6eqpc8swJ9V\n9eQK7/E6goB3NnCxiLxI960aZ2Ywa8Mwzaw0rfcegtLA35e99ixQWgHuDUBiHO//NgkcRtB4/DjB\n+tL/VOqxJCJHSLDa32juJViZric8mZ8L3BFh/5WCzCqCqjPC/R8X3u0iKGUAvItgyVsIqsk6ytI/\nCxwXfq5F7L+2RPn+HgcOKrWFiEhc9i3wtVhVf0+woFYnQbWgMVbCME2tvARwDfCBsue+BdwcNuCu\novrV/2jTMW8gONl3AO9T1YKIfJtgMZr7w5LLdiqsB77fDlS3ishF7AsSP1PVn0XYf6XXLgeuFZGH\nCU7wzxBc6X8d+LGIvAv4Ffs+78MECxs9AHxXVb8sIs8SNMw/BtxXaX+qWhSRtwLXSbBuSyzc7xPA\n/5NgzXcBvqy1r/Vupimb3twYY0wkViVljDEmEgsYxhhjIrGAYYwxJhILGMYYYyKxgGGMMSYSCxjG\nGGMisYBhjDEmEgsYxhhjIvn/B8IjU8qXVigAAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x115b13990>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig1 = plot_sfs(sfs1.get_metric_dict(), kind='std_dev')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_score</th>\n",
" <th>ci_bound</th>\n",
" <th>cv_scores</th>\n",
" <th>feature_idx</th>\n",
" <th>std_dev</th>\n",
" <th>std_err</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.7161</td>\n",
" <td>0.0535366</td>\n",
" <td>[0.689443779904, 0.807644110276, 0.66146326080...</td>\n",
" <td>(1, 34, 4, 5, 9, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0531368</td>\n",
" <td>0.0265684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.7161</td>\n",
" <td>0.0535366</td>\n",
" <td>[0.689443779904, 0.807644110276, 0.66146326080...</td>\n",
" <td>(1, 34, 4, 5, 9, 15, 21, 23, 27)</td>\n",
" <td>0.0531368</td>\n",
" <td>0.0265684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.716091</td>\n",
" <td>0.0531213</td>\n",
" <td>[0.690340909091, 0.806704260652, 0.66146326080...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0527245</td>\n",
" <td>0.0263623</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.71601</td>\n",
" <td>0.0554486</td>\n",
" <td>[0.691238038278, 0.812969924812, 0.66114790287...</td>\n",
" <td>(1, 34, 4, 5, 23, 9, 27, 21)</td>\n",
" <td>0.0550345</td>\n",
" <td>0.0275172</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.715621</td>\n",
" <td>0.0560501</td>\n",
" <td>[0.683612440191, 0.812343358396, 0.66099022390...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 11, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0556315</td>\n",
" <td>0.0278158</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_score ci_bound cv_scores \\\n",
"10 0.7161 0.0535366 [0.689443779904, 0.807644110276, 0.66146326080... \n",
"9 0.7161 0.0535366 [0.689443779904, 0.807644110276, 0.66146326080... \n",
"11 0.716091 0.0531213 [0.690340909091, 0.806704260652, 0.66146326080... \n",
"8 0.71601 0.0554486 [0.691238038278, 0.812969924812, 0.66114790287... \n",
"12 0.715621 0.0560501 [0.683612440191, 0.812343358396, 0.66099022390... \n",
"\n",
" feature_idx std_dev std_err \n",
"10 (1, 34, 4, 5, 9, 15, 21, 22, 23, 27) 0.0531368 0.0265684 \n",
"9 (1, 34, 4, 5, 9, 15, 21, 23, 27) 0.0531368 0.0265684 \n",
"11 (1, 34, 35, 4, 5, 9, 15, 21, 22, 23, 27) 0.0527245 0.0263623 \n",
"8 (1, 34, 4, 5, 23, 9, 27, 21) 0.0550345 0.0275172 \n",
"12 (1, 34, 35, 4, 5, 9, 11, 15, 21, 22, 23, 27) 0.0556315 0.0278158 "
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_clf1 = pd.DataFrame.from_dict(sfs1.get_metric_dict(confidence_interval=0.90)).T\n",
"result_clf1.sort_values('avg_score', ascending=0, inplace=True)\n",
"result_clf1.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We want the best avg_score so we take the first row of feature_idx"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"best_features_clf1 = result_clf1.feature_idx.head(1).tolist()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index([u'col_1', u'col_34', u'col_4', u'col_5', u'col_9', u'col_15', u'col_21',\n",
" u'col_22', u'col_23', u'col_27'],\n",
" dtype='object')"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"select_features_clf1 = data.columns[best_features_clf1]\n",
"select_features_clf1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's do a CV just to be sure :"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.7161000125992284"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cross_val_score(clf1, data[select_features_clf1], y, cv=5, scoring='roc_auc').mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ok, we have the same result as our best result_clf1 ! :)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's do again our EnsembleVoteClassifier with our new features selections"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"roc_auc: 0.7161 (+/- 0.05) [Logistic Newton]\n",
"roc_auc: 0.6790 (+/- 0.04) [Xgb1]\n",
"roc_auc: 0.6735 (+/- 0.04) [Xgb2]\n",
"roc_auc: 0.7161 (+/- 0.05) [Logistic lbfgs]\n",
"roc_auc: 0.7218 (+/- 0.05) [Ensemble]\n"
]
}
],
"source": [
"for clf, label in zip([clf1, clf2, clf3,clf4, eclf], ['Logistic Newton', 'Xgb1', \n",
" 'Xgb2', 'Logistic lbfgs', 'Ensemble']):\n",
"\n",
" scores = cross_val_score(clf, data[select_features_clf1], y, cv=5, scoring='roc_auc')\n",
" print(\"roc_auc: %0.4f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), label))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We now have better result for our Ensemble (0.7218 / 0.6838)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Let's try to have the best features selection for our xgb2 (lowest AUC)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 25/25"
]
}
],
"source": [
"sfs2 = SFS(clf3, \n",
" k_features=25, \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs2 = sfs2.fit(data.values, y.values)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEPCAYAAABRHfM8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XvcHGV9///XZ3fvc84cwjEh4VjOUjkUBIOC4tcKVK0C\nVi22lZ8/AWlVoEIIiAJBpbZi+y2VL1pLxRaLYGsFqiLlC4VIICcSjiEnEkjI8T7u7szn+8fM3vdm\n7917Z+/ce993su/ng+XenZ3PXtduZuYzc10z15i7IyIiUk1qrCsgIiK7ByUMERFJRAlDREQSUcIQ\nEZFElDBERCQRJQwREUmk7gnDzM4zsxVm9pKZXVPm/S+Z2XNmttDMlphZ3symxO+9bmaL4vefqXdd\nRUSkMqvndRhmlgJeAt4LvAEsAC5y9xUV5v994Cp3Pyd+/Rrwu+6+pW6VFBGRROp9hHEK8LK7r3L3\nHHAfcMEQ818M/KjotaFmMxGRcaHeG+MDgTVFr9fG0wYxszbgPOAnRZMdeNTMFpjZn9WtliIiUlVm\nrCtQ5EPAE+6+tWjaGe6+3sz2IUocy939iTGqn4hIQ6t3wlgHzCh6fVA8rZyL2Lk5CndfH//daGYP\nEDVxDUoYZqYBsUREauTuVsv89W6SWgAcZmYzzayZKCk8VDqTmU0G3g08WDSt3cwmxM87gPcBSysV\n5O41PebNmzcqMXtqWarfyJcVhCH5ICQfBOTyAdl8QF8uT282T082T09fjq9cP5fuvlxNj+KYnsKj\n8JnZ6PN7s3n6cjs/rp87d9C0ao/r584lm4/qnw8C8kFIEIaEYTgufvfdcbmoV8xw1PUIw90DM7sc\neIQoOd3t7svN7LLobb8rnvVC4GF37ykKnw48EB89ZIB73f2RetZXZCTlg5B86HESCMnmQ3pzefpy\nIX25gA1buln42kZCdyqtv9b//2iGjdt6WLZ6c031iGK29H8GxqDyzKz/7cKTN7f2sHjV2zWVtWFr\nD8+v3FTx/ZRFZaXMwCBlhsV1fG3DNpqb0rRk0jRlUmTSKTIpIx3/NatpZ1jqoO59GO7+C+DIkml/\nX/L6B8APSqatBE6sd/1EahG6E4TxIwgJQqcnm+fNbd30ZQP6cnFSyAf9G+XC5j5lRipl0UYwFW0Q\nO1qbMEi8MWxpSjO5o6WmOkcxzTXF9Me111ZWa1OaKUPUz92jdOSFtOSE8fPOvjxBT44gHNgDLvx2\nZpBJp2htStMSJ5Xuvhxv7+glyj1G4Se0OAlROh3I5gN6cwFN6ejfQGoznjq9R9WcOXNGJWYsynKP\nVsL+lTHegy2srKFHa2i0ojrvPO0MtnVnaypnODHDjRvV+p16Om/v6CUXhGTzAfl8GD8PyQYhYbjz\nrrkBs475XdZu6iKdMjLpFOl0iolN6WgveginnfGuqvOUOvX0d9X6lYYVU6+yijfm8RTSwOnvOpO2\n5sqbo2iZjhL1jp4cW4Ishx57Mq+9uZ3Cx5UepBVPKzyf8TsnsSQ+akqnjNamDG0t6f5ElEmnaE7H\nRzfpgYSyp24valXXC/dGi5n5nvA9hiMIQ7r7Ajp7s2ze0Ud3Nr/T+4X1snTFsaIXXjRfoyv8ToWj\ngXT8KLyudQMv41cYOvkwOkrMx0eLUHxECC1NGVqb07Q1Z2iJm8ma4p2CTLyDsLsyM7zGTm8ljN1M\n6E5PX57O3hxbOvvo7MvhHm3gWpvTNKVTausVGQGhO0Gwc1KBKKGE8V8zaMmkaW4aOEpp7u9/SeE4\nYbjzEVI+DAc+NwjJBwyUEYY0p9PsNbGFSe3NtDal67Y+K2HsgdydnmxAd1+OLZ1ZtvdkCdyjBBEv\nnEoQImPD3cmHThj3a+WDkNC9bBNZQcos7vin/6jVik4GCMKQnmyAh05TU4q9JrYypb2F9pYM6dTI\nretKGLsp79/ziE6tDAInmw/Y3p1la3e2v928Od6DUbOISGPIB1HyCMKQlBlTO5qZNrGVjtYmmnax\nOUwJYxxyd3pzAfkgSga5fEhfPiCbC8gG0emVuSAse1plcyY6zE2N4F6FiOyeQnd6s9H1OThMbGti\nr0mtTGxtonWIEwYqUcIYR4LQ2drVxxubu+jNBv29zGb0d6QWd6iqWUlEknL36LqebB53aGlOM6Wj\nmUltzVEHfVO66mcoYYwD2XzA5s4oUQSh096SoTlT/R9PRGS4cnFrRaFjvimTYkp7C1M6mmmNz/Aq\n3SlVwhhDvdk8b23r4c1tPRgwoa1JFwaJyJjIFzV341ECmRwfgXS0REcgqVSq5oTRsBfujZTO3hwb\ntnSzubOPTNqY1NasPgcRGVOlFx4GobO9O8em7b0Awz7bSgljGEJ3tndneWNLNzt6srQ2ZZjS0ax+\nCBEZl9Ipo70lQ3tLtMkvHbEgKSWMGgRhyObOPtZt7iKbC2lvyTBtQutYV0tEpCbDbQVRwqjBure7\nWL+lm4ltzXS0NI11dURERpUSRg22dWeZ2N68yxfMiIjsjrTlSygIw3hYZP1kItKYtPVLqDcbVBwb\nRkSkEShhJNSTzWsIcBFpaEoYCXX25tUcJSINTVvAhHb0ZGlOMD6LiMieSgkjAXV4i4goYSTSmwvG\nugoiImNOCSOB3qwShoiIEkYCnb05NUeJSMPTVjCB7d1Z3dNCRBqeEkYV/R3eGf1UItLYtBWsojcX\n3T9XRKTRKWFUMXA/bhGRxqaEUUWXOrxFRAAljKq29+TU4S0ighLGkKIO77w6vEVEUMIYUm8uwMOx\nroWIyPighDGEvpw6vEVECpQwhtDZkyOjDm8REUAJY0jbe3K0qMNbRARQwqgoCJ2ebJ5MWm1SIiKg\nhFFRXy7AADMlDBERUMKoqDeX14ggIiJFlDAqUIe3iMjOtEWsILrCWz+PiEiBtohlFDq8NYaUiMgA\nbRHLUIe3iMhgShhl9OUCdXiLiJRQwihjR29WHd4iIiW0VSxjR7c6vEVESmmrWCIInW51eIuIDKKt\nYom+XACow1tEpJQSRokoYajLW0SkVN0ThpmdZ2YrzOwlM7umzPtfMrPnzGyhmS0xs7yZTUkSWw9R\nh7dGqBURKWXu9dubNrMU8BLwXuANYAFwkbuvqDD/7wNXufs5tcSamY/U93hhzRYc1328RWSPdsQB\nU3D3mtreM/WqTOwU4GV3XwVgZvcBFwBlEwZwMfCjYcbusiB0uvvyTGpvqlcRdbNm9Wq+Pf8+3noT\n9p0OV11zEQfPmDEuYlS/sSlruPUTqaTeCeNAYE3R67VEiWAQM2sDzgM+X2vsSMnmo/6L3a3De83q\n1Vz68XtYvepmoAPoYtHCudzz40srbiBGK0b1271+i0LsnpY8ZWTUu0nqI8D73f2z8es/Ak5x9yvL\nzPsx4BPufsEwYn3evHn9r+fMmcOcOXNqru+Wzj5e2bCVKR2tNceOlFpXhh3b4c8/9w0e//VfEm0Y\nCrqYNXs+J//e9WSzkO2Dvj6LnxsvrbiZLZuvHhQzYcLt7LvfDWXLemvDV+nsrC1muHGjFTNU3KTJ\ntzNj5lwsBek0pFLRI52GV16+mbc3Do6ZOu12DjxoLrmckctBPg/5+HkuB507vko+/+VBcZb6Bk1N\n80gZWNEDg77em8rGTJp0O7MPn0trKzS3OK2t0NrqNLfAb5/+Gq+/ds2gmHe88zY+c9m1tLZF87a2\nQmvbwN+3N63i8j+9h7WrBxLNjJm1J6dqMcON25WyRiuhjVdPP/nfPP3kE/2v77xj/rhrkloHFP+6\nB8XTyrmIgeaoWmO58cYbh1fDImPd4V1uZXh+4VxuueMz5HOHsHZNijWr4sfqFGtXp8j2GU6KnTcM\nAB2EHnLs8QHNzdFGpLnFaW6GlhZn/lfzbNk8OGb24Xlu+3Z32fpde1Wexc/VFjPcuNGKGSpuxiF5\nbrqtlzCAIIAwHHjcMi/P2xsHxxxwYJ6b5veSyThNTZDJQFOzR3+b4PN/kuPZZwbHvfOUHHffux13\nCB3cgfjvZz9VPmbmrDzX3tBLXx/09hp9vfHfPljwVEi5ZWL16/DQvzXT2wu9PbbT375eY/PbPyGf\nv7kotoPVq27m3NO/QUvLPFIpIE5mqRSYOd3dPyGXHRzz+2d/g732uYF02kkZpApJNw0pgzfW3c+2\nrYPjLv34fM4+9zra2p22duho9/7nP/rHfylaPwZivvG1W/jmd66mqTlOtEVG82iwEDcek8ypp5/J\nqaef2f/6zjvm1/wZ9U4YC4DDzGwmsJ4oKVxcOpOZTQbeDXyi1tiR1NmTH9MrvO+47b5BK8OaVTfz\n2T/6Bse/43oOnhFy8MyQc8/Lc1D8fNpezpcuD/jZA12U7k0ef6Jz0SezZcs69HB4YengmJmHwGGH\nh2VjZh4Ci5+rLWa4caMVM1TcrNlw3AlB2ZjDj4QVLwyOmX1Y5RiAAw6EZxkct9/+0NpWW8whs+Gk\nk8uX9cyTzqrXB8ecfmbIt75bOXl+8qM5nn5ycKI56ZQc//DDKKF5GCUydwhD43OX5li4YHDMkUfn\n+OZ3ugjDKOG6FyXeAOZenWfb1sFx6UzIAQeF9HQbXZ2w8c0UPd1Gdze8tALKJcJHfp7mHQ9PIgig\npQVaWouPmu5nx/bBSeYzF8/n7HOuo7XNaWuLYtraCkdczr3fL5+c7rj1Fv7q764u+/uNdlPgaKtr\nwnD3wMwuBx4hOoX3bndfbmaXRW/7XfGsFwIPu3tPtdh61XWsOrzf3GD8968z/PdjGR75eZpyK8Px\n78jxw/u7Kn7GVddcxKKFcwcdpl91zaVjHqP6jU1Zw63fvtOBMslp//2ho3TRBMA58CBYuGBwzMEz\nYMYhlRP1rENh6eLBccce71z62fI7Ol/8fFh25+iDFwR867vbCQL6j5Z6e6Ijri9dkWfposHrlVnI\n/geG9PYYPT2wZXOK3l7o6TF6e4wXl0O59fE/HkzzyM8n0THB6eiA9g6Pnzuvvnw/G9YPTjI3XH0r\nN912DXvvG9LePvh77UqiGU117cMYLSNxWm1PNs+y1ZuZ3NEyQrUqv8ew3/4zeP7ZNL/5VYbHf9XE\n+jeM08/Mc9bZeX716C08+p+D+yI+9Ae38K3vlt+jGaqsRuzcHO/1G82yhhszmv0Ko1HWFz9/Oz97\n4CvUul4NFXfLt66mq8vo6oKuTqOr0+juNm676VZefvFrgz6rY8Jcpk69iY0bjUwG9tk3ZO99nH32\nDdlnX+eZp77OihcG9zlVq+OuHJUM57RaJYzY1q4+Xl6/jSkjlDDKLdht7TeQSl3BIbMO5qz35Djr\n7DwnnBSQyVSOSbLiiYykPS15jmZCq5ac3KFzB2x8K8XGt4yNb6XYtNG4566vs37d4EQzZepcLvzD\n6zlkVsghswNmzQ6Zvr+TSu369kIJYxesfbuTjdt7mdA6Mk1SlRaccz9wK9+9+8sV43aHdkyR3c1o\nJbThbsQrbS9OPu023nPuV1j5WprXX0vx+soU27caM2eFbN/2Vda/MfhMvaRHJT974BYljOF6Ye0W\n3EfuCu/zz7mdFS98fdD0U0+/jh/eP3TzkojsvurdFNjVBatWpvjS5fN55aXBRyXp9A0cduQ89ts/\nZPp+zn77h9Hz/R14nXnX3M3aNTcDE8bdabW7hdCd7t6R6fB2h7vubGHlq02U6zyMOhVFZE918IwZ\nVfscy8Xc8+NL+fb8W4oSTfmjko4OOPrYkN85xnnlpcHbmHPeH/K5q7p5c32KDetTbFhv/PbpDBvW\nG4uf+xc6O29mcGd+MjrCIOrwXrpqM1Mm7Fr/xY7tcM1V7WzaaFx7wwquvvL/qD9CROpiOM1fn/zo\n7Tz9ZKHlw3SEMRwjcQ/vl1akuPxP2znjrDzf/t+9NDcfnHiPQUSkVrUclRSUP206OR1hsOsd3v/+\nQBM3z23lL2/s5cKP5oZdDxGRetr5qKT2PgwlDGD52i2Ew+jwzmbh9ptbeeyXGe78XjdHHV35IiUR\nkfFAZ0ntQsII3XnutU1MbGuqaZTaNzcYX/hsO1OmOrf/TTeTJg+reBGRMTEe74cx7vXlAsJw6CHN\nS0+TO/vcT3DbTUfxiT/OctkVfdGAbCIiezgljCod3uXORPiPB2/g1jv+lD/42AGjVEsRkbHX8PvG\nnb05MunKP8O35w8eQTYMv8oTv/mnUamfiMh40fAJY0dPjpamyj/DW29CuREro+kiIo2joRNG6NGQ\n5k1DHGEMnLdcTFdsi0jjaeiEkc0FhD50h/dV11zEjJlzGUgahfsKXDQqdRQRGS8a+rTapEOar1m9\nmvPPuZ/Zh+U5ZJZGkBWR3Z9Oq61RV2+OdIJzYqdOnUEY3MiPH9ref+8KEZFG09BNUtt7conu4f3C\n0jRHHh0oWYhIQ2vYhNHf4Z0gYSxdnOaY44JRqJWIyPjVsAkjmwsI3EklGA5k6aI0x52ghCEija1h\nE0ZvLiBpb8+yxWmOOV4JQ0QaW8MmjO6+fKIO784dsGF9ikMP10i0ItLYEicMM2szsyPrWZnRtKMn\nm6jDe9mSNEepw1tEJFnCMLMPAc8Dv4hfn2hmD9WzYvXk7uzozSXv8FZzlIhI4iOMG4FTgK0A7v48\nMKtOdao7B9xJ3OF9rDq8RUQSJ4ycu28rmbb7XyKewLIlaY7VEYaISOIrvZeZ2SVA2swOB64Enqxf\ntcaHHdvhrQ0pZh+mDm8RkaRHGFcAxwB9wD8D24Cr6lWp8UId3iIiAxJtCt29G7gufjQMXX8hIjIg\n6VlSj5rZlKLXU83s4fpVa3xYog5vEZF+SZuk9nb3rYUX7r4F2Lc+VRo/1OEtIjIgacIIzaz/BhBm\nNpM9/Cyp7dtg01vq8BYRKUjanXsd8ISZ/QYw4Ezgs3Wr1TiwbEk0pHk6PdY1EREZH5J2ev/CzE4C\nTosnXeXum+pXrbGnEWpFRHZWy+CDLcBmYDtwtJmdVZ8qjQ8aEkREZGeJjjDMbD7wcWAZUGjUd+Dx\nOtVrzC1bkuaKL/aNdTVERMaNpH0YFwJHuntDbEG3bYW3N6aYdag6vEVECpI2Sb0GNNWzIuPJsiVp\njjpGHd4iIsWSHmF0A8+b2S+JhgcBwN2vrEutxpg6vEVEBkuaMB6KHw1h2ZI073lffqyrISIyriQ9\nrfYH9a7IeLJkUYYrv9QQ3TUiIoklPUvqcOBW4GigtTDd3WfXqV5jZusWY8tmU4e3iEiJpJ3e9wB/\nB+SBs4F/BP6pXpUaS8uWpDj62IBULVeoiIg0gKSbxTZ3/yVg7r7K3W8EPli/ao2dpYsyumBPRKSM\npJ3efWaWAl42s8uBdcCE+lVr7Cxbkuac83JjXQ0RkXEn6RHGF4B2oluz/i7wSeDT9arUWFq6SEOa\ni4iUk/QsqQXx007g0vpVZ2xt2Wxs2WIcMnvP6PAO3SH6D3ePxqN3cBz3ommyk3TKSKdSpFM21lUR\nGVeSniX1TqIhzmcWx7j78QlizwO+TXQ0c7e7zy8zzxzgr4iuJt/o7mfH018nun94COTc/ZQk9R2u\nZYvTHHPc2HZ4uztdfXly+RDbxe1VyiBlKSwFKbOdH6nC32g+iYQO2XxIby5PPghxj8bzLyRWM4sT\niimxSMNJ2odxL/BlYAkDgw9WFfd73Am8F3gDWGBmD7r7iqJ5JgPfBd7n7uvMbO+ijwiBOfEd/upu\n6ZKxG6E2dKezJ0fgzj4TW5k+pZ2mTJS5ym2OKicTI2XRhk12jbuTD50gCMmHTj4IyeVD+vIB2VxA\nX5xYcvlolbCdUsvAs3TK+hPNQMKO/o1S+neS3UjShLHR3YdzpfcpwMvuvgrAzO4DLgBWFM1zCfAT\nd18HUHKfDaO2Idh3ybLFad7/v0a3wzsInc7eHO7O9Mlt7DulndYmDWI1HpgZTWmjKT30IujuBKET\nFv6GA6/zQUg+DMnmQ4IgJBtE03pzYf+8xeW5O4bheEn6GXykEyUfBo4W+48ctcMg9ZE0Ycwzs+8B\npWNJ/VuVuAOBNUWv1xIlkWJHAE1m9muiM6/+xt1/WCgCeNTMAuAud/+HhPUdlqWL0vzFtb31LKJf\nEIZ09uQwMw6Y2s7ek1ppzihR7I7MjEx6+Btodycs6lMqvKZkeuhR31MYDiSiXD6MjnyCkFzgZHNB\n/xFPJSmzOOEUJZvUQAJSspFKkiaMS4GjiPoYiu+HUS1hJK3DScB7gA7gKTN7yt1fAc5w9/Vmtg9R\n4lju7k+U+5Abb7yx//mcOXOYM2dOTZXY/LaxbZsxc1Z9O7xzQZQo0injoL0nsNfE1qp7sLJnMzOi\nfDNyG+ogdIIwJPRo5yQM47/u5OOjnEKiyReSTRj099sMriM7He6UzmIVpidVGl9c3s5HWQNz9s9b\nodxCIjQGEmMhF7oXJWD3KCnHz8vVKcn3s53+/aIodyedNlqa0jSlU2OajJ9+8r95+smym8/EkiaM\nk939yGF8/jpgRtHrg+JpxdYCm9y9F+g1s8eBE4BX3H09gLtvNLMHiI5OqiaM4Vi2pL4d3tl8QFdv\nnuamFLOmT2TahBbSupxc6iTqkC8csdZ25FpoTgvDorPo4g2pF730ki36rp5xV5wzjWjj3r95tYHN\ncfE2t7CRD8PoaGzgeZQUg7DwNyQfQD6MdgjTaSOTMlKpFOkUZNKp/hMY+hONFZ8sUlzRgRelm//S\nfNCbC+jszbG1M8u27iwQfWZLU5rmzOgmkFNPP5NTTz+z//Wddww6/6iqpAnjSTM72t1fqPHzFwCH\nmdlMYD1wEXBxyTwPAt8xszTRbWBPBe4ws3Yg5e6dZtYBvA+4qcbyE1sanyE10vJByI6eHK3NaQ7d\nbxJTOlp0Vo2Ma+mUkcZqzTNSRnMmzaS2Zg6Y2kE+COnJBnT1ZdnamWV7TzY6C8+M1lFMILtyOn3S\nhHEa0f0wVhL1YVhU7tCn1bp7EF8Z/ggDp9UuN7PL4vi73H2FmT0MLAYKfRUvmNks4AEz87ie97r7\nI8P6lgksXZTmAx8a2Q7vIHS292SZPX0Se01s1RkxIg0sk04xsS3FxLYm9pvSQRDGCaQ3x9buPrZ3\n5yi3KR/U0GX0n+4dzRA1fRXmjeaJZiqXGApHTsNhXq7BclABNrPc9MLZT2PNzDzJ9ygI3Xn21Y1M\n6Wjpnzbn5Il8/8ddI3bRXujO1q4+DtlnItOntI/IZ4rInisInd5snlwQ9h9pDCSFwp94ujHo/f5m\nvLj5bufnA018/Z8dJZqaMkfVI4y4qehhdz+qlg/enWx+29ixw5hxyMgkC3dnW1cfB07rULIQkUTS\nKaOjdXzfCbtqr6u7B8CLZjaj2ry7q6UjfIX31q4s+05u48BpHSPzgSIi40DSPoypwDIzewboKkx0\n9/PrUqtRtmzxyA04uK2rj6kdzRy890Sdzy4ie5SkCWNuXWsxxpYsSvPBC3a9w3tHT472lgyz95uk\nM6FEZI+TqBHG3X9DNJzHxPixPJ62RxiJI4zuvjxNaeOw/Sfr+goR2SMl2rKZ2ceAZ4A/BD4GPG1m\nH61nxUbL25uMrq5d6/DuzeYJPeTwA6ZoeA8R2WMlbZK6juhq77cA4qE6/gu4v14VGy1LF6c5+rhg\n2EOJZ/MBvbmAYw6epkEDRWSPlrTtJFVIFrG3a4gd15YtTnPcMJuj8kFIV2+eow6cSntL0twrIrJ7\nSrqV+0V8NfaP4tcfB35enyqNriWL0nzoD2rv8C5cxX3E/pOZ2Da+z50WERkJQx4lmFkLgLt/Gfh7\n4Pj4cZe7X1P/6tXfssVpjj0hX1NMGF+Yd8g+k5g6obVONRMRGV+qHWE8BZxkZj90908yMsOZjxub\nNho9PXDwjOTDikRXcWc5aK8Opk9pq2PtRETGl2oJo9nMLgFON7MPl76Z4AZK49rSxWmOPra2e2dH\nV3G3coCu4haRBlMtYfx/wCeAKcCHSt4bqRsojZmli2prjtrenWXaxBZm7KOruEWk8QyZMNz9CTN7\nEljr7l8fpTqNmmWL05z/kWzi+cPQOXBah4YpF5GGlGTwwRDYIy7SK7V0SZpjT0h2Sq27k0pFd8oS\nEWlESa+l+KWZfcT2oHaYTRtT9PXCQQcn6/DOBSHtLRkdXYhIw0qaMC4D/hXImtl2M9thZtvrWK+6\nW7E0wzE1XOHdlwt1vYWINLREF+65+8R6V2S0LV+a4ZgarvAOwpAJ4/zmJiIi9ZR08EEzsz8ys7nx\n64PN7JT6Vq2+VixtqnmEWo0VJSKNLGmT1N8CvwdcEr/uBL5blxqNkuVLM4k7vEN3UmY0K2GISANL\nmjBOdffPA70A7r4FaK5brepo5cpVfOQjN7Hl7Zv4q9vms2b16qoxuXzIhFZ1eItIY0s6+GDOzNJE\nF+sVhjcf/g0kxsjKlas499zv8OqrNwEd/OyBLhYtnMs9P76Ug2dUvmV5Nh8ybYKGARGRxpb0CONv\ngAeAfc3s68ATwC11q1WdzJ37/f5kEelg9aqb+fb8+4aMC8JQw5eLSMNLepbUvWb2LPBewIAL3X15\nXWtWB+vWhQwki4IO3nqzemxrsxKGiDS2IbeCZtZKNJ7UYcAS4O/dvbaxwMeRAw9MAV3snDS62Hd6\n5ZhCh3dLZo+4X5SIyLBV2wr+AHgnUbL4APDNuteojm6++Y859NB5REkDoIsZM+dy1TUXVYzJ5UMm\ntGU02KCINDxzrzw0hpktcffj4ucZ4Bl3P2m0KpeUmflQ36PYypWruP76e3jx1T4OOCDFVddcNGSH\n946eLPtPbWf/qRrOXET2HGaGu9e0J1wtYSwsThClr8eLWhIGRM1Mz766kSkdLVXn3drZxxEHTmFy\n+255FrGISFnDSRjVenJPKBozyoC2+LUB7u6ThlHP3YqZ6QpvERGq3w+jobeUYehYCprV4S0ikvg6\njIaUzQdMaG1Sh7eICEoYQ8rmQ/VdiIjElDCG4O6064I9ERFACWNIDrolq4hITAmjgjB00ilTh7eI\nSExbwwqy+YCJrc3q8BYRiSlhVJDNh0xs1y1ZRUQKlDAqCN01pLmISBEljAoM3cNbRKSYEkYZQeik\nUymaM0oYIiIFShhlZPOB+i9EREooYZSRy4dMbFPCEBEppoRRRhg67c1KGCIixZQwyjFoadJPIyJS\nTFvFEkGuIQ/UAAAQr0lEQVQY0pRWh7eISCkljBLZfMgk9V+IiAyihFEimw+Z0KYhzUVEStU9YZjZ\neWa2wsxeMrNrKswzx8yeM7OlZvbrWmJHmrvTpiHNRUQGqeuW0cxSwJ3Ae4E3gAVm9qC7ryiaZzLw\nXeB97r7OzPZOGlsvrc3qvxARKVXvI4xTgJfdfZW754D7gAtK5rkE+Im7rwNw9001xI6oIAxpyqRo\nSqulTkSkVL23jAcCa4per42nFTsCmGZmvzazBWb2yRpiR1RfLmRSqzq8RUTKGQ+N9RngJOA9QAfw\nlJk9VeuH3Hjjjf3P58yZw5w5c2quSDYfMKm9veY4EZHx7rHHHuOxxx7bpc+od8JYB8woen1QPK3Y\nWmCTu/cCvWb2OHBCwth+xQljuAxoVYe3iOyBSnekb7rpppo/o95NUguAw8xsppk1AxcBD5XM8yDw\nLjNLm1k7cCqwPGHsiHLXkOYiIpXUdXfa3QMzuxx4hCg53e3uy83ssuhtv8vdV5jZw8BiIADucvcX\nAMrF1quu+SCkpTlNRh3eIiJlmbuPdR12mZl5Ld8jdOfZVzcypaOlf1p3X55J7U3M2ndSPaooIjKu\nmBnubrXEaHc6ls0HTNQZUiIiFSlhFNEV3iIilSlhFGlRh7eISEVKGEAuCGltUoe3iMhQtIWkcEtW\njVArIjIUJQyiDu8Jreq/EBEZihJGrK1FCUNEZChKGDFd4S0iMrSGTxi5fNThnU41/E8hIjKkht9K\nZvMBk9vV4S0iUk3DJ4xcENKhK7xFRKpq+ISB6wpvEZEkGjphuDuYrvAWEUmioRNGPnDamjOkUzUN\n2Cgi0pAaOmH05QMmtav/QkQkiYZOGPkgZGKrzpASEUmioRMGqP9CRCSphk4YhilhiIgk1LgJw6G9\nRR3eIiJJNW7CACa06foLEZGkGjZhpFLGJN0DQ0QksYZNGM2ZlPovRERq0JAJw4DmTFoJQ0SkBubu\nY12HXWZmXuv3yOYDmjNKGCLSmMwMd6/prJ+GPMIAlCxERGrUsAlDRERqo4QhIiKJKGGIiEgiShgi\nIpKIEoaIiCSihCEiIokoYYiISCJKGCIikogShoiIJKKEISIiiShhiIhIIkoYIiKSiBKGiIgkooQh\nIiKJKGGIiEgiShgiIpKIEoaIiCSihCEiIokoYYiISCJKGCIikogShoiIJFL3hGFm55nZCjN7ycyu\nKfP+u81sq5ktjB/XF733upktMrPnzOyZetdVREQqq2vCMLMUcCfwfuAY4GIzO6rMrI+7+0nx42tF\n00Ngjru/w91PGcm6PfbYY6MSs6eWpfrtPmWN9/qNZlnjvX6jXVat6n2EcQrwsruvcvcccB9wQZn5\nrEK8Uac67qkLgFa84cfsqWWN9/qNZlnjvX6jXVat6p0wDgTWFL1eG08r9Xtm9ryZ/YeZHV003YFH\nzWyBmf1ZPSsqIiJDy4x1BYBngRnu3m1mHwB+ChwRv3eGu683s32IEsdyd39izGoqItLAzN3r9+Fm\npwE3uvt58etrAXf3+UPErAR+1903l0yfB+xw9zvKxNTvS4iI7KHcvVJ3QFn1PsJYABxmZjOB9cBF\nwMXFM5jZdHd/M35+ClES22xm7UDK3TvNrAN4H3BTuUJq/dIiIlK7uiYMdw/M7HLgEaL+krvdfbmZ\nXRa97XcBHzWzzwE5oAf4eBw+HXggPnrIAPe6+yP1rK+IiFRW1yYpERHZczTcld5mdreZvWlmi2uI\nOcjMfmVmy8xsiZldmSCmxcyeji86XBL3wSQtLxVfxPhQDTE1X+RoZpPN7F/NbHn83U5NEHNEXMbC\n+O+2hL/Hn5vZUjNbbGb3mllzwjp+If79Kv7u5f5NzWyqmT1iZi+a2cNmNjlh3EfjegZmdlLCmNvj\n3/B5M/uJmU1KEPPVon+vX5jZfknKKnrvi2YWmtm0BGXNM7O1RRfHnpe0LDO7Iv5uS8zstgRl3VdU\nzkozW5gg5gQze6qw7JrZO5PUz8yON7Mn49/xQTObUBJTdr0datkYIqbaclEad0U8veKyMURMxWWj\nUv2K3q+0XFQqq+qysRN3b6gH8C7gRGBxDTH7ASfGzycALwJHJYhrj/+mgf8BTklY3p8D/wQ8VEMd\nXwOm1vhbfB+4NH6eASbVGJ8C3gAOrjLfAXH9muPXPwY+leDzjwEWAy3xb/gIMDvJvykwH7g6fn4N\ncFvCuCOBw4FfAScljDmHqL8N4Dbg1gQxE4qeXwH8XdJlFTgI+AWwEpiWoKx5wF/Uul4Ac+LfPBO/\n3jtJ/Yre/yZwfYJyHgbeFz//APDrhPV7BnhX/PyPga+WxJRdb4daNoaIqbZcVIqruGwMEVNx2agU\nk2C5qFRW1WWj+NFwRxgenZa7pcaYDe7+fPy8E1hO+etJSuO646ctRBvkqu1/ZnYQ8L+A79VSR2q8\nyDHe0znT3e+J65p39+01lnkO8Kq7r6k6Z7TB7zCzDNBOlGiq+R3gaXfvc/cAeBz4cOlMFf5NLwB+\nED//AXBhkjh3f9HdX6bCxaQVYv7L3cP45f8QrbjVYjqLXnYQjWqQ5HsB/BXw5aT1iw15YkiFuM8R\nbUzz8TybEpZV8DHgRwliQqCwlz8FWJewfof7wGn2/wV8pCSm3Hp7EEMsG5XW9QTLRaW4isvGEDEV\nl40q26Khlouh4hKfNNRwCWNXmdkhRHs6TyeYN2VmzwEbgEfdfUGCIgr/6LV2LtV6keMsYJOZ3RMf\nit5lZm01lvlxSjYIZSvm/gbwLWA10cZgq7v/V4LPXwqcGTchtBMl0oMT1m1fj8++c/cNwL4J43bV\nZ4D/TDKjmX3NzFYDlwA3JIw5H1jj7ktqrNflcbPI96xM81wFRwBnmdn/mNmvyzUVDVHPM4EN7v5q\ngtn/HPhm/FvcDvxlwmKWxb8HRMnpoEozFq23/wNMT7Js1LKuJ4yruGyUxiRZNopjalkuytQv8bKh\nhFGDuI30fuALJXsBZbl76O7vIFqQT7Wdr2Iv9/kfBN6M9wSMGjI/0UWOJxFtVD9vZu+qMn8GOAn4\nbhzXDVybtDAzawLOB/41wbxTiPbqZhI1T00ws0uqxbn7CqLmg0eBnwPPAUHSOpZ+3DDjEjOz64Cc\nu/9zkvnd/Xp3nwHcS9T0UO3z24CvEDUj9E9OUNTfEjXlnUi08zLoWqYKMkTNnKcBVwP/kjAOotPn\nq+5MxD5HtE7NIEoe/ydh3GeIlvUFRHvi2XIzlVlvS5eFQctGret6tbihlo1yMdWWjeIYonUi0XJR\npqyalg0ljITippT7gR+6+4O1xMZNPb8Ghu5QgjOA883sNaKV7Wwz+8eEZayP/24EHiAax2soa4n2\nSH4bv76fKIEk9QHg2bi8as4BXnP3zXHT0r8BpycpxN3vcfd3uvscYCvwUsL6vWlm0wHiTsO3EsYN\ni5n9MVGyrpoIy/hnSppTKjgUOARYZNEFrgcBz5rZkEdP7r7R48Zr4B+AkxPWaw3RvxXx0XFoZntV\nCzKzNFHT4Y8TlvNpd/9pXM79VF92ied9yd3f7+4nE41TN+hopsJ6O+SyMdx1vVLcUMtGgrIGLRtl\nYhItF+XKqnXZaNSEUeveO0R7PS+4+18nKsBs78LhXbxneC6wYqgYd/+Ku89w99lEFzn+yt0/laCs\n9njPARu4yHFplbLeBNaYWWEYlvcCL1Qrq0gte5CrgdPMrNXMLC5reZJAi4aFwcxmAH9AtAKVnZWd\n/00fIuoIBfg0UGnFH2pZGGp6/3vxmSVfBs53976EMYcVvXchlX+P/jh3X+ru+7n7bHefRZT03+Hu\npcmwtKziM7A+TOVlo/S3+CnwnvgzjgCa3P3tKjEQLevL46bIJOWsM7N3x+W8l8o7BaXfq7BspIDr\ngf9dJqbceltt2ai2rldaLgbFJVg2ysVUWzZ2iqlhuShXVtJlI+IJe8f3lAfRBucNoI9oQ3Zpgpgz\niA77nidqFlkInFcl5rh4vueJzvS5rsZ6vpuEZ0kR9UcU6rYEuDZh3AlEV+M/T7QnOTlhXDuwEZhY\nw/eZFy/4i4k6GpsSxj0eL8TPEQ11n+jfFJhK1BH6ItGZPlMSxl1ItGfdQzQ6wX8miHkZWBX/ey8E\n/jZBzP3xv9XzRBus/WtdVonOPCs9G6ZcWf8Y/+7PEyWB6Ql/iwzww7ievwXenaR+wD3AZ2v4tzo9\n/vzngKeINnZJ4q6M/31XALckXW+BaZWWjSFiqi0X5eI+MNSyMURZFZeNSjEJlotKZVVdNoofunBP\nREQSadQmKRERqZEShoiIJKKEISIiiShhiIhIIkoYIiKSiBKGiIgkooQh41Y8TPM3il5/0cwSjbmU\n4LPvMbNBAxmONIuGxX7BzH5ZMn2mmXXbwDDxC+MrcWv9/JlmdnH1OUV2nRKGjGd9wIdLx/Yfa/HQ\nF0n9CfCn7v7eMu+94u4nufs74r/5YVRnFsMYjiS+OlqkJlpoZDzLA3cBf1H6RukRgpntiP++28we\nM7OfmtkrZnarmV1i0c2sFpnZrKKPOTce3XdFPPBjYYTh2+P5n7d45N/4cx83sweBZWXqc7FFN4da\nbGa3xtPmEt3H4W4zm1/m+w0aYiIe5uVui0aIfdbMPhRPnxmX/9v4cVoccivwrvgI5Qtm9mkz+07R\n5/3MzM4q/EZm9k2LRlA+zcxOin+rBWb2n0XjK11p0Y12njezRAMpSoNIOrSDHnqM9gPYTnSzl5XA\nROCLwA3xe/cAHy6eN/77bmAz0ZDVzUTj6syL37sSuKMo/ufx88OIhn1oBv4M+Eo8vZlo6JSZ8efu\nAGaUqef+RMM/TCPaCfsl0dhBEA06WW6oi5lEIwQXhoz4Tjz968Al8fPJRMNXtAGtDNyA6jBgQdH3\nfajocz8N/E3R658BZ8XPQ+Aj8fMM8H+BveLXHwPujp+vIx66hRpvqqXHnv2ouc1UZDS5e6eZ/YBo\nGOeehGELPB54zcxeJRovCKLxeeYUzfcvcRmvxPMdRTRw43Fm9ofxPJOI7rSWA55x99VlyjuZ6C5x\nm+My7wXOIhrkDioPVveKR0PLF3sf8CEzK9wIpxmYQTR+0Z1mdiLRmECHV/76FeWJR58luoPcsUT3\nUCncfKswWOAi4J/N7KdE4wuJAChhyG7hr4n2wu8pmpYnblKNN3jF9wgvHhU0LHodsvMyXzyQmsWv\nDbjC3R8trkA8mmrXEHWsdfTjoXzEo7u7FZc/j+iGRMfHfSiVkmf/7xJrLXre6+6F72zAUnc/o8xn\nfJAo4Z0PXGdmx/rAXeOkgakPQ8azwrDeW4iOBv6k6L3XgcId4C4Amobx+X9okUOJOo9fJLq/9P9f\nOGPJzA636G5/Q3mG6M500+KN+cXAYwnKL5dkHiZqOiMu/8T46WSiowyATxHd8haiZrKJRfGvAyfG\n3+tgdr63RHF5LwL7FPpCzCxjAzf4muHuvyG6odYkomZBER1hyLhWfATwLeDzRdP+AXgw7sB9mMp7\n/0MNx7yaaGM/EbjM3bNm9j2im9EsjI9c3qLM/cB3KsB9g5ldy0CS+Hd3//cE5Zd772vAt81sMdEG\nfiXRnv7fAj8xs08Bv2Dg+y4murHRc8D33f2vzex1oo755cCz5cpz95yZfRT4jkX3bUnH5b4E/JNF\n93w34K+99nu9yx5Kw5uLiEgiapISEZFElDBERCQRJQwREUlECUNERBJRwhARkUSUMEREJBElDBER\nSUQJQ0REEvl/dN8n+Ewu0AwAAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1167ca390>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig2 = plot_sfs(sfs2.get_metric_dict(), kind='std_dev')"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_score</th>\n",
" <th>ci_bound</th>\n",
" <th>cv_scores</th>\n",
" <th>feature_idx</th>\n",
" <th>std_dev</th>\n",
" <th>std_err</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0.699897</td>\n",
" <td>0.0482455</td>\n",
" <td>[0.753289473684, 0.753289473684, 0.65184484389...</td>\n",
" <td>(1, 34, 35, 5, 13, 15, 19, 22, 24, 27, 29, 30,...</td>\n",
" <td>0.0478852</td>\n",
" <td>0.0239426</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.699845</td>\n",
" <td>0.0485822</td>\n",
" <td>[0.752093301435, 0.754542606516, 0.65263323872...</td>\n",
" <td>(1, 34, 35, 5, 13, 15, 19, 22, 24, 25, 27, 29,...</td>\n",
" <td>0.0482194</td>\n",
" <td>0.0241097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.699766</td>\n",
" <td>0.0460857</td>\n",
" <td>[0.748056220096, 0.752192982456, 0.65373699148...</td>\n",
" <td>(1, 13, 15, 19, 22, 24, 27, 29, 30)</td>\n",
" <td>0.0457416</td>\n",
" <td>0.0228708</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.699766</td>\n",
" <td>0.0460857</td>\n",
" <td>[0.748056220096, 0.752192982456, 0.65373699148...</td>\n",
" <td>(1, 34, 35, 13, 15, 19, 22, 24, 27, 29, 30)</td>\n",
" <td>0.0457416</td>\n",
" <td>0.0228708</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.699766</td>\n",
" <td>0.0460857</td>\n",
" <td>[0.748056220096, 0.752192982456, 0.65373699148...</td>\n",
" <td>(1, 34, 13, 15, 19, 22, 24, 27, 29, 30)</td>\n",
" <td>0.0457416</td>\n",
" <td>0.0228708</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_score ci_bound cv_scores \\\n",
"13 0.699897 0.0482455 [0.753289473684, 0.753289473684, 0.65184484389... \n",
"14 0.699845 0.0485822 [0.752093301435, 0.754542606516, 0.65263323872... \n",
"9 0.699766 0.0460857 [0.748056220096, 0.752192982456, 0.65373699148... \n",
"11 0.699766 0.0460857 [0.748056220096, 0.752192982456, 0.65373699148... \n",
"10 0.699766 0.0460857 [0.748056220096, 0.752192982456, 0.65373699148... \n",
"\n",
" feature_idx std_dev std_err \n",
"13 (1, 34, 35, 5, 13, 15, 19, 22, 24, 27, 29, 30,... 0.0478852 0.0239426 \n",
"14 (1, 34, 35, 5, 13, 15, 19, 22, 24, 25, 27, 29,... 0.0482194 0.0241097 \n",
"9 (1, 13, 15, 19, 22, 24, 27, 29, 30) 0.0457416 0.0228708 \n",
"11 (1, 34, 35, 13, 15, 19, 22, 24, 27, 29, 30) 0.0457416 0.0228708 \n",
"10 (1, 34, 13, 15, 19, 22, 24, 27, 29, 30) 0.0457416 0.0228708 "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_clf2 = pd.DataFrame.from_dict(sfs2.get_metric_dict(confidence_interval=0.90)).T\n",
"result_clf2.sort_values('avg_score', ascending=0, inplace=True)\n",
"result_clf2.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So we can improve our AUC score to 0.6998 (standart was 0.6239 and 1st features selection was 0.6735)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"best_features_clf2 = result_clf2.feature_idx.head(1).tolist()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index([u'col_1', u'col_34', u'col_35', u'col_5', u'col_13', u'col_15',\n",
" u'col_19', u'col_22', u'col_24', u'col_27', u'col_29', u'col_30',\n",
" u'col_31'],\n",
" dtype='object')"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"select_features_clf2 = data.columns[best_features_clf2]\n",
"select_features_clf2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As we can see, select_features_clf1 & select_features_clf2 are not the same"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.6998966787830504"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cross_val_score(clf3, data[select_features_clf2], y, cv=5, scoring='roc_auc').mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's do again (3) our EnsembleVoteClassifier with our new features selections (clf3)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"roc_auc: 0.6823 (+/- 0.06) [Logistic Newton]\n",
"roc_auc: 0.7061 (+/- 0.05) [Xgb1]\n",
"roc_auc: 0.6999 (+/- 0.05) [Xgb2]\n",
"roc_auc: 0.6823 (+/- 0.06) [Logistic lbfgs]\n",
"roc_auc: 0.7104 (+/- 0.06) [Ensemble]\n"
]
}
],
"source": [
"for clf, label in zip([clf1, clf2, clf3,clf4, eclf], ['Logistic Newton', 'Xgb1', \n",
" 'Xgb2', 'Logistic lbfgs', 'Ensemble']):\n",
"\n",
" scores = cross_val_score(clf, data[select_features_clf2], y, cv=5, scoring='roc_auc')\n",
" print(\"roc_auc: %0.4f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), label))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Results :\n",
" - Our Ensemble (features selection clf3: 0.7104) is lower than our Ensemble (features selection clf1: 0.7218)\n",
" - But our last Xgb2 is is higher from our first features selection (0.6999 vs 0.6735)\n",
"\n",
"## Questions :\n",
" - Is could nice to use for each Estimators (clf) in our Ensemble (EnsembleVoteClassifier) the best features selection to perform a better score\n",
" - Do we need to tune (gridsearchCV) our clf on all features or on features selections ?\n",
" - Do we need to tune our Ensemble (Weight) on all features or on features selections (on ensemble) ?\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"___"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using Pipieline to do it "
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.pipeline import Pipeline"
]
},
{
"cell_type": "code",
"execution_count": 178,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 25/25"
]
}
],
"source": [
"# CLF1 : Logistic Newton\n",
"sfs1 = SFS(clf1, \n",
" k_features=25, \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"#sfs1 = sfs1.fit(data.values, y.values)\n",
"\n",
"clf1_pipe = Pipeline([('sfs1', sfs1),\n",
" ('Logistic Newton', clf1)])\n",
"\n",
"# CLF2 : Xgb1\n",
"sfs2 = SFS(clf2, \n",
" k_features=25, \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"#sfs2 = sfs2.fit(data.values, y.values)\n",
"\n",
"clf2_pipe = Pipeline([('sfs2', sfs2),\n",
" ('Xgb1', clf2)])\n",
"\n",
"# CLF3 : Xgb2\n",
"sfs3 = SFS(clf3, \n",
" k_features=25, \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs3 = sfs3.fit(data.values, y.values)\n",
"\n",
"#clf3_pipe = Pipeline([('sfs3', sfs3),\n",
" ('Xgb2', clf3)])\n",
"\n",
"# CLF4 : Logistic lbfgs\n",
"sfs4 = SFS(clf4, \n",
" k_features=25, \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"#sfs4 = sfs4.fit(data.values, y.values)\n",
"\n",
"clf4_pipe = Pipeline([('sfs4', sfs4),\n",
" ('Logistic lbfgs', clf4)])\n"
]
},
{
"cell_type": "code",
"execution_count": 337,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Ensemble\n",
"eclf = EnsembleVoteClassifier(clfs=[clf1_pipe, clf2_pipe, clf3_pipe, clf4_pipe],\n",
" voting='soft')"
]
},
{
"cell_type": "code",
"execution_count": 281,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 25/25"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"roc_auc: 0.6544 (+/- 0.07) [Logistic Newton]\n",
"roc_auc: 0.6620 (+/- 0.05) [Xgb1]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 25/25"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"roc_auc: 0.6575 (+/- 0.04) [Xgb2]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 25/25"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"roc_auc: 0.6542 (+/- 0.07) [Logistic lbfgs]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 25/25"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"roc_auc: 0.6687 (+/- 0.07) [Ensemble]\n"
]
}
],
"source": [
"for clf, label in zip([clf1_pipe, clf2_pipe, clf3_pipe, clf4_pipe, eclf], \n",
" ['Logistic Newton', 'Xgb1', 'Xgb2', 'Logistic lbfgs', 'Ensemble']):\n",
"\n",
" scores = cross_val_score(clf, data.values, y.values, cv=5, scoring='roc_auc')\n",
" print(\"roc_auc: %0.4f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), label))"
]
},
{
"cell_type": "code",
"execution_count": 284,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 25/25"
]
}
],
"source": [
"# CV for our first Estimator Pipeline (Logistic Newton)\n",
"scores = cross_val_score(clf1_pipe, data.values, y.values, cv=5, scoring='roc_auc')"
]
},
{
"cell_type": "code",
"execution_count": 287,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"roc_auc: 0.6544 (+/- 0.07) [Logistic Newton]\n"
]
}
],
"source": [
"print(\"roc_auc: %0.4f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), 'Logistic Newton'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Our score is **lower** than precious try (*0.7161*). It seems that our SFS (features selection) doesn't take the best score but the last features selection (aka score with *SFS.k_features*)"
]
},
{
"cell_type": "code",
"execution_count": 298,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"First SFS for clf1 : [(1, 34, 4, 5, 9, 15, 21, 22, 23, 27)]\n",
"Pipeline SFS with clf1 : (0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 25, 27, 29, 30, 31, 34, 35)\n"
]
}
],
"source": [
"# Our first SFS for clf1\n",
"print \"First SFS for clf1 : \" + str(best_features_clf1)\n",
"\n",
"print \"Pipeline SFS with clf1 : \" + str(clf1_pipe.named_steps['sfs1'].k_feature_idx_)"
]
},
{
"cell_type": "code",
"execution_count": 210,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEKCAYAAAAB0GKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcHHWZ+PHPU31M95yZSUggN+FQVA6zcigoWUVBFLwV\nWFcX97fqrseiroIiEhTlWHFR1F2vRX/+3MUVRcAr4IHKAiFyi4QjQC5yJ3P2WVXP74+qTjoz3TPV\nk+npnpnn/XrNq6/6dn27p7qe+t6iqhhjjDFjcRqdAWOMMVODBQxjjDGRWMAwxhgTiQUMY4wxkVjA\nMMYYE4kFDGOMMZHEG52BiSAi1jfYGGNqpKpSy/bTpoShqjX9XXrppZOSZjrsy/d9Cq5HNl9kIFug\ndyjPxy+6mFzBbYr8Nfv31+h9NXv+7LtozL7GY1qUMMz4qSpFz2cgW8TzfQquT77oUnR98q5P3vVw\nPZ/S8SWAAnuG8jyyYRdL53Yyu70FkZouVIwxU5AFjBkqX/ToHcqzrS/L9t4sazfvAcARwXGEWPiX\nSsRwkvERASGViNHWkmDdlj56O1MsntNOMh5rxEcxxkyS2MqVKxudhwN22WWXrRzP51i6dOmkpGmW\nfXm+T99Qng07B9iwY5D+bIGWRJyWhMPhhy0jlYzTkoiRjMdIxBxijoMjUrX0sHjJUtItcQayBbb3\nZUklYqSTY1+DTNZ3MZX/V82QZrruq9nzN1n7uuyyy1i5cuVltaSR8dZlNRMR0enwOepBVRnKu+wa\nyLGjP4eqkkrESEU4sdei6PoM5ArM60qzcHY78di0aR4zZloSEbTGRm8LGNNUruCyJ6xyKrg+iZhD\nazKO49SvrUFV6c8UiMcdDpvXRUc6Ubd9GWMOjAWMGc7zfXqHguqhwVwREaG1JU5ikq/280WPTN7l\nkJ5W5ne3EnOstGFMs7GAMYPlix5Pbe1jKOfS2hK0RTSSH5Y2UskYy+Z10tZipQ1jmokFjBlqKFfk\niS29CEJbqrlOzNmCS67gsWhOG/NmteJY91tjmoIFjBlo10COdVv7m6JUUY3vK32ZAu2pOEsO6qAl\nEZsWjeKer+SKLtm8S8xxSMYdEnGHRMyxcSmm6VnAmEFUled2D7Fx1xBdrckpcQLO5F0KRQ8EEnGH\ntmSC9nScdDJOMh6jJeE0dXtHwfXIFTwGc0V6MwWGcsX9N1AQAQRakwlaW+K0tsRJJWJ7g8l4Pp+q\n4mtQzVc6zkUEIdwfggjh48YFqlzRYzDsqt2WiltpsslZwJghPN/n2e0D7BrI0dXWMiV/mKVR5a7n\n43qKI8EI8kTcoa0lQXtqXyBJxsd3xS7CuL8bX5VcwSNbcOnPFujLFCgW/eA9HaElEatakvBV936u\nouejviLlny+ZoC0VdEbwwm19P7j1fHB9H89XXD943vP3HdulvY12tIsEJwMHQZwgTWsyQXd7kvZU\nYkK7VOeKHv2ZAjv6smQKbpA3VZLxGHNnpelua4k0NsdMPgsYM0C+6LFuax+ZvEtXW0ujszPhqgWS\ncZ33VVCUmBOOXhch5jjEHHAkvHUcHAfijoPjyN6uwYM5Fz88piay9OP5PkXXp+gpvgafT0TCAZLs\nvQ2e2/daTR9bNQgoWgosSsH1KRQ9VCGRcJjdkaIrnaS1JV5z6TRXcOnLFNjenyWbd3FESLfE9xvp\n73o+mbyL5yttLXHmzkrT1Zq02QCaiAWMaa6ZG7ebWVCVU5qgEpTgdr/nYW91TzIeIxF3pmTJLQrX\n88kVPVzXB4HOdILu9hTtqQTpZKxiqak8SOTyQbVi67AgUU3BDbpZo9Dd3sKczhQd6URTVz/OBBYw\nprHdAzme2tZPa7J5G7fN1KMalD6yheCEHo85dLe3MKstSSLuMJAtsqMvS7bgITUEiWr7yhU9coWg\nk8CczhSzO1K0tYycq8zUnwWMaUhV2dKbYeOOQTqnSOO2mbo8X8kVgtmKCavE0snYhFcl+b6SCfeT\nTDjM60ozy9o7JpUFjGlmOjRuGzOWoueTDds70skYc7uC9o6Jnu/M7M8CxjQy3Ru3jamk6PoM5Yug\nkG6J7w0eVg078SxgTAOZcGbZbX0ZYo5DuzVumxkqWOXRxVdoTyc4qDNlPa0mkAWMKSpYp6LA1nDS\nwLgjtLUk6jqzrDFTSb4YjIlRoDOV4KCuNB3phAWPA2ABY4rJFlx2DuTY3pvFVyVtPaCMGVMpeAAc\n1Jli3qxWaywfBwsYU4Dn+/RnCmztzTKQLRKPCa0tCWJWmjCmJr4qQzkX1/Pobm/h4FlttKesi25U\nFjCaWLbgsnsgx9beLL6vpJt4skBjphJVJVvwyBU9OlJxDulutV6FEVjAaEIF1+OZbQP0ZfLEHIe2\nlJUmjKmXfDFoKE8kHBb2tNHd3mIjyquwgNGEntraR99Qgc7WZKOzYsyMUXR9BnNFYo4wv6eV2R0p\nayAfxgJGk+kdyvP4c310tyWtXtWYBvB8n8FsEQXmdaWZ25W2AYEhCxhNxPN9Htmwm0TMsSsbYxrM\n95WhfBHXU+KxfbMXl2YDLs1gLA7B7MbhrMExx8EJbxMxh5gjwV/Z/anaVjKegGGhtk627slQdH1b\ny9qYJuA4Qkc6qBb2/X2zE5duix6At3dK+9JsxrBvlmNfFWHkWiQxR/bOcJyMBxeIyViwYFZna3LK\nBpRKLGDUQSbvsnl3hi5rtzCm6ewbEDsxJ3LfVzxViq5PruDhayFYAMvzWTi7jYWz2ydkP83AAsYE\nU1XW7xygJRGzkdrGzACOE6xuyLCaZ1+VzbuH6Eglps18cNbfbILtGsgxkCnS2mKx2JiZzBGhPZVg\n3bZ+Cq7X6OxMiLoHDBE5Q0TWisgTInJhhdf/RUQeEJH7ReQREXFFZFb42rMi8lD4+r31zuuBKrge\nG3YO0pG2dgtjTLB6o4jwzLaBvW0iU1lde0mJiAM8AbwKeA5YA5yjqmurbP964AJVPS18/DTwV6q6\nZ4z9NEUvqWe397NrIG9jLowx+9kzmGfhnDbmd7c1Oit7jaeXVL1LGCcAT6rqelUtAjcAbxhl+3OB\n/y57LEyRarP+bIFtvVkrXRhjRuhqTbJp5xD92UKjs3JA6n0yXgBsLHu8KXxuBBFJA2cAPy57WoHb\nRWSNiPxD3XJ5gDxfWb99gLZUwgboGWNGcByhLRVn3da+Kd2e0UxX72cBd6pqb9lzJ6vqcuBM4AMi\nckpjsja67X0ZckXPJhM0xlSVjMdQFZ7dPnXbM+rdlWczsLjs8cLwuUrOYf/qKFR1S3i7Q0RuIqji\nurNS4pUrV+69v2LFClasWDHePNckV3DZuGuIrrS1WxhjRteRTrBnMMe23gyHTHJ7xh133MEdd9xx\nQO9R70bvGPA4QaP3FuBe4FxVfWzYdl3A08BCVc2Gz7UCjqoOikgbcBtwmareVmE/DWn0VlWe3BKs\nu91mS6kaYyLwfaU3k+cFC3sa2ubZdI3equoBHyQ42T8K3KCqj4nI+0TkvWWbvhFYVQoWoXnAnSLy\nAHAPcGulYNFIewbz7BkqWLAwxkTmhEswT8X2DJt8cJyKns+fN+yiJREnEWumpiBjzFQwkC3QkUpw\n+CFdDeks03QljOnsud1D+D4WLIwx49KRTrJnKM/2vuzYGzcJO9uNw2CuaGMujDEHrLO1hWd3DDCY\nKzY6K5FYwKiRr8qz2wdIJ2M25sIYc0BiYXvGU1v7KHp+o7MzJgsYNdrRlyVbcG3VLmPMhGhJxPA8\nZf2OAZq9TdkCRg3yxdLkgjbmwhgzcTpbk+wayLO9L9forIzKLpNr8NzuIWJOsDSjMcZMpK7WJOt3\nDBCPybjOMaqgBKsDer7iq+KXblXxfVD18TQYCzIeFjAiKrgeOwdyNhOtMaYuYo7Q2hLn6W39B/xe\npWVhJVybXCrcHw8LGBHtGcwDTKv1eY0xzaUlEWvqOemsDSMCX5UtvRkb0W2MmdEsYEQwmCtSKPo2\nSM8YM6PZGTCC7b3Zpi4mGmPMZLCAMYZ80WP3UJ500gKGMWZms4Axht2DuQPqVWCMMdOFBYxReL6y\ndY/NGWWMMWABY1QD2QKu5xNz7Gsyxhg7E45ia2+GFmu7MMYYwAJGVbmCS3+mQNomGTTGGMACRlW7\nBnJWFWWMMWXsjFiB5/ts7c3ayG5jjCljAaOCvqECnq82K60xxpSxgFHB1t4MrS3WdmGMMeUsYAwz\nlC8ymHNtKhBjjBnGAsYwO/tzxG2SQWOMGcHOjGVcz2dHf5Y2q44yxpgRLGCU6R3KowqONXYbY8wI\nFjBCqsqWPdbYbYwx1VjACA3lXbIFj2TcGruNMaYSCxihHX1ZEnH7Oowxpho7QwIF12PnQM6qo4wx\nZhQWMIA9g3kAHFskyRhjqprxAcNXZUtvxuaNMsaYMcz4gDGYK1Io+iRssJ4xxoxqxp8lt/dmbRoQ\nY4yJYEYHjHzRY/dQnrStqmeMMWOa0QFj92AOAcQau40xZkwzth+p5ytb92TpSFtj92TYuGED1151\nA9u3wdx5cMGF57Bo8eKmSDPZ+zJmqpqxAWMgW8D1fGKOBYxajPfEev47rmfD+s8BbcAQD91/Cdf/\n8PyqaScrzWTvy5ipTFS10Xk4YCKitX6OtZv3UHB90skZGzNrVukkuXjJ/idJ14W+XmHPbmHPHqF3\nt/CNr17Jww9cFKYpGWLJoVfx0lMuxvcF3wf1wfPB9+G+1ZezedOFI9IcsuAqjlv+6Yr5e/D+y9my\nubY0IvDgfZfzXIV0Rz7/Kt741k+SboV0q9LaqrS2Bfe/8ZUr+cPvPjkizVlv+gLXfO0TY36PVjIx\njXbk/Fmoak318TPybOmrMpAtMqutpdFZmVKuveqGsmAB0MaG9Z/jLa+9ms6uz7Bnt0NmCDq7lO4e\nZVa30t2tbNkM+59Yg8ciPke90MeJKY6A44A4we2jD/sV07S1+Zz+Ordi/p58vHqa15w5Mk3pGuOJ\ntZXTDQ367NzhkMlAJiNkM0I2vL/2Uadimj/e4XD5JSnmL/RZsNBn/kJlwUKf7h5F5MBKQVZlZhpt\nRgYME4hyQtmyWbj3njir74pz2y9iVDpJLljo8m//nmFWt9LZpTjDulJ87AM+t940xPCr8aOPVc59\nV6Fi3v74O2XdkyPTHPVC5cyzixXT/GaV8tQTldO87g2V0wD87vbK6ZYfr1x0aa5imo99wKv4mQ4/\nEuYv9Hluk8Oae+I8t8nhuc1CIS8cssBnoP9GdmwfGXQ//5kruOZrH6e1NSj1lLMqM9MsLGDMUNVO\nKFde+x42bVzGvXfFWX13jMEB4YSXepzwUpcd231+/5uRJ8llh8PSZX7VfV1w4Tk8dP8lI6qyLrjw\n/Ianmeh9XfXl81m0eGQQHByELZsdPvpPHju2jwy6d/7e4WXHdOIrzJ6t9Mz26Zmj9MxWHnngRxVL\ndp/8yJW870MX4ThKLAaxWFA6i8Xgumt+WDHNtVdZlZkZv8htGCKSBhar6uP1zVLtam3D8FW5b92O\naVUlVeuP/GMfuJpbb/oUw0/+yeS/8qrTP8XxJ3mc+DKXw47w95YYorRhTFT+JjPNZO6r2vdeavvI\nZGD3LmH3LoddO4XdO4Wvf/kLbFx/+Yj36pp1CS865jN4vuB74HkEtz489cRlZIY+VzHNm972aQ49\nzOfQwzyWLvOZd7DuLdUcyP/YTC3jacOIFDBE5Czgi0BSVQ8VkeOAz6rq2ePL6sSa6QGj2o/83797\nPuIsZfNGZ+/fpo3Cpo0Oj/35s7juZ0e814kvu5jv31j9CtSuPg/MeE7IYwWZWtIcf9KVvPLVn+Lp\ndTGeWefwzDqHXFZYuiwIIE+svZwn1o7sADBWY74dF1NPPRu9VwInAHcAqOqDInJoTbkzdfNvV1Zu\njD7rVf/KkqWXsGCRH/4pr36Rx8JFPt/6us/tvxxZvTR33uj7WrR48ZhVGqa6RYsXc/0Pz+faq75Q\ndnId/ep9IqvMrrx2ZJVZfx88sy7G0+sc7l9TuQPAHb9x+Mg/ppl3sDLvEJ+585R5B/vMPdinWFjP\n+95l7SUzQdQSxj2qepKIPKCqLw6fe1hVj6l7DiOYiSUM34cH/hRj1c8T/OC7n6dYrK20YFUPU0uj\nq8xeesoVvOWcT7Fti7B9m8O2rQ7btwrbtjo899xnUf/jI9JE6WJsGqeeJYxHReQ8ICYiRwAfBu6q\nNYNmbKP9yH0f7lsTY9XPEqz6RYLOTuWM1xc5+RU+d1RojB6ttDCeK13TOOMp2Y0nTbWSyeVfPJ9F\niyv3NHvnW4rce/fIUsltv4xx2adSvHyFy4knu7QN3wSryppqopYwWoGLgdeET60CLlfVyn0OJ9l0\nKWFUuupftOQSPnLh/+H+NYdx2y8SdPcEQeL01xc5/Ai/ajorLZjxmqgOFK945RWc+NKL+ePvEzzy\nQIwXHetxyqkup5xa5KgX+WzeZMdtI9Wt0bvZTZeAUe2H19l5Ne95/yc54/VFlh1eufuqXamZRoly\nwZLJwOq74tx5R/DX3y+k0ivZvLH2BnYzMepWJSUitwNvU9Xe8HE3cIOqnl57Nk0127dBpQbHo17k\n8k8X5EdNa43RplGiVG+2tsJfn+by16cFI+43bRTec27lBvZNGyYv76Y2Udsw5pSCBYCq7hGRuXXK\n04zU1wtbt8SA2nsuGdNotV6wLFykHH2s8uzTI4/3Rx9JsOL4Dl5ykstLTnQ5/kSPZYf7+40VsdJ0\nY0QNGL6ILFbVDQAisgSY+nVZTcD34ab/SXDNFSle9vJ34rmXsGljbSOVjZmKqjWw/+cN5+MWh1iz\nOsafVsf5xldSZLPwVyd4HHHk0/z4f65n2xbrwtsIURu9zwC+CfweEODlwHtVdVV9sxfNVG3DeOxR\nh8s+laZYhJVX5Dj6WM+unsyMEvV437JZ+NPqOF+55krWPzOy3eP0113Bdd/6+KTlezqoa6O3iMwB\nTgof3qOqO2vMX91MtYAxOABf+WKKW36S4IJP5HnbeQVitkqsMWP627dezeq7Pj/iecf5DIuXXsqx\nyz2OW+5x3HKXI4/ySYTL3diF2Ej1nt68BdgdpnmBiKCqfxgrUVg6uZZgOdjvqOpVw17/F+BvCKq4\nEsBRhG0mY6WdalTh5z9NcOXnUrxihcsv7hikZ7bV7BkTVdCeN7Ld48yzPd7/4QwP3hfjwfvj/Nf3\nkmze6PCCoz2WHf40v73tenbusGqsAxW1Suoq4B3Ao0CpX6eONZeUiDjAE8CrgOeANcA5qrq2yvav\nBy5Q1dNqSdusJYzyq5p0q9C7+3xyuaWsvCLL8uO9uu7bmOmoljFHA/3w8IMxrrzsah5/bGQ11ste\nfgVfvz6YUn60/U3Xkkk9SxhvBJ6nqqP37RzpBOBJVV0PICI3AG8AKgYM4Fzgv8eZtqlUOrC7ey7h\nh7ecz9Jl0+OAM2ay1TJDQUcnnPwKj1ndHpW67z70gPDSoztZcqjPsctdjn2xx7HLgx5ZsZitKVJJ\n1IDxNEF1Ua0BYwGwsezxJoJAMEI4ffoZwAdqTduMKq1Ot2f357juGhuUZMyBqLULb7VqrFe+2ueK\nL/Xz2KMxHro/xt13xvnGV1vYucPh6GM9du6ovA5JlDVFpquoASMDPCgiv6EsaKjqhycwL2cBd5aP\n95iqdu8S7vpj5SU8g8F5xpjJMtpsv8kWOHZ5ULIo2bNbePjBGCsvqjywcOP6oD1y+MqIM0HUgHFL\n+FerzUB52W1h+Fwl57CvOqrWtKxcuXLv/RUrVrBixYracjoBVOGWnyS48rIUXbOEXTttEJ4xjVbr\nRJvdPcqpr3RZfryyedPI3/Bjf0nwipd0cPyJLie81OMlJ7kcNgEDC+s9I/Hqu/7I6rvuHDMfo6nr\nXFIiEgMeJ2i43gLcC5yrqo8N266LoNproapma0kbbtvwRu9NG4VLL0yzY7vD56/JMqv7GZtYzZgp\nrFoD+3/ecD7qL2XNPTHWrI6z5p44mSE4/iSPI454mht/+A22bqntdz+eCUQPdNLReq64dwRwBfAC\nIFV6XlWXRUh7BvBl9nWNvVJE3hck12+G27wbOF1VzxsrbZV9NCxgeB58/z+TfP3aFv7+/QXe8/68\n9f02ZpqoZWDhmtVxvvLFK9nw7MgeWYsWX8UrXnkx6TSkW5XWViVVdv8H372S1XddNCLdqa+6gk98\n+kJE2PvnOMHtFZddxW9v++SINFFXR7z1pi/ULWDcCVwK/BtBW8P5gKOqn6llZ/XSqICx9i8OF/9L\nmnQaPnd1lkMPqzyTrDFmZqg2sHDZ4Z/mb/7uYrIZyGaETFbIZSGTEbIZYfX/fpa+vpFrsKfTn2HB\nokvx/aDKu/Tn+7Bt62UU8iMXTpt90CWc966LWbzUZ8lSn8VLfbp7gnXb9y+VtNetW21aVX8jwZl5\nPbBSRO4DmiJg1Nvwq4x/uuAcbvnJEdzw/SQf+2SOt55bxHEanUtjTKNV65H1wqOVv31PoXIi4GMf\n8Ln1ppHpTjvD45qvDVZJ41VMs2gRuC787vY4G9Y7bHg2hu/DkqU+u3f/iC2bP8fIxvxoogaMfDiQ\n7kkR+SBB43P7uPY4xVSqJ/zlrZ/hpS//R279zVzmzrOR2saYwHjWXx9vumpprvn6+SxavP8IiN49\nwvpnHS66oFLPr+iiVkkdDzwGzAI+B3QBV6vqPePe8wSqZ5VUtUWNbJEXY0wlzdpLCoafz8RW3Iui\nloBRrU7yxJddzPdvtIBhjJk6DrQNI1LNu4i8RERuEpH7ReTh0t+4cjzF7KuTLGfjKYwxU09pTMpZ\nb/rCuNJHrZJ6HPg48Aj7Jh+kNM9To9WzhLFxwwbe/fbr2bTBxlMYY6aPek4+uENVxzPSe8pbtHgx\nf/cP7+Ub113NssPdMUeJGmPMdBU1YFwqIt8Ghs8l9ZO65KrJrL7rMD560YW85Zxio7NijDENEzVg\nnA88n2DG2r3rYQDTPmAMDsDdd8a54kuZRmfFGGMaKmrAOF5Vn1fXnDSpX69KcMJJLp1djc6JMcY0\nVtTxyXeJyAvqmpMm9fObE7zuDVYVZYwxUUsYJxGsh/EMQRuGEEweeEzdctYE9uwW7rs3zrX/YdVR\nxhgTNWCcUddcNKnbfxnnlFNd2sY/kt4YY6aNMQNGuC7FKlV9/iTkp6n8/OYk57271lVpjTFmehqz\nDUNVPeBxEZlRAw927hD+/HCMU1/pNjorxhjTFKJWSXUDj4rIvZTNk6GqZ9clV03gVz9L8NenFUml\nG50TY4xpDlEDxiV1zUUT+vnNCd77AauOMsaYkkgBQ1V/LyLzgOPDp+5V1e31y1ZjbdksrHvS4eRT\nrTrKGGNKos5W+3bgXuBtwNuB1SLy1npmrJF++bMEp53ukkw2OifGGNM8olZJXUww2ns7gIgcBPwa\nuLFeGWukn9+c4KMXWnWUMcaUizrS2xlWBbWrhrRTyoZnHZ7b5HDiyVYdZYwx5aKWMH4lIquA/w4f\nvwP4RX2y1Fi/uDXB6a8rEo/6zRhjzAwxailBRFoAVPXjwDeAY8K/b6rqhfXP3uSzuaOMMaaysa6j\n7waWi8j3VfVvmebTmT/1pMOe3cJfneA1OivGGNN0xgoYSRE5D3iZiLx5+IvTbQGlX9yc4MyzizjT\nsnXGGGMOzFgB4/3A3wCzgLOGvTatFlBSDaqjrv5yttFZMcaYpjRqwFDVO0XkLmCTqn5+kvLUEI89\n6lAsCMe82KqjjDGmkiiTD/rAtB2kV/LzmxOc+YYCIo3OiTHGNKeotfW/EZG3iEzP06kq/PKWJGee\nbb2jjDGmmqgB433Aj4CCiPSLyICI9NcxX5Pq4QdiJJLKUS/0G50VY4xpWlEnH+yod0YaqTT2YnqW\nn4wxZmJEnXxQROSdInJJ+HiRiJxQ36xNDt8PRnefaYP1jDFmVFGrpL4OvBQ4L3w8CHytLjmaZPfd\nG6O7Rzn8CKuOMsaY0USdMelEVV0uIg8AqOoeEZkWk3/bVCDGGBNN1BJGUURiBIP1StObT/lLcteF\nVT9PcObZhUZnxRhjml7UgPEV4CZgroh8HrgT+ELdcjVJVv9vnPkLfRYv0UZnxRhjml7UXlI/EJH7\ngFcBArxRVR+ra84mgVVHGWNMdKMGDBFJEcwndTjwCPANVZ0WKwsVC/DrVXE+9LFco7NijDFTwlhV\nUt8DXkIQLF4LfLHuOZok99yZ5LAjfA5ZYNVRxhgTxVhVUi9Q1aMBROQ7wL31z1J9PfPMej796ev5\n1SqX+QuFjRvezqLFixudLWOMaXpjBYy9Ffyq6k71qaSeeWY9r371daxbdxnQxu5dQ5z/jku4/ofn\nW9AwxpgxiGr1KhkR8YCh0kMgDWTC+6qqnXXPYQQioqN9jpJ3vvMyfvCDfwHayp4d4qw3fYFrvvaJ\nuuXPjJ/nK0XXY7wVh6rgq6IKqooS3AKIBK9DeECHaRwR0i1xEjFbSctMX0fOn4Wq1lQKGGs9jNiB\nZam5bN7ss3+wAGhj+7ZG5MaUU1VcTym4HkXPD87eAomYQ1tLYlyrIDoCjuPgCMQch5gjOCI44a0I\n4a3gCIgIqkp/tsCO/hyDuSICtLbEScan1U/BmHGJOtJ7WliwwCEoMO1fwpg7r0EZmqF8Xyl6PgXX\nw/MVNDhZp5IxetpbaG2Jk0rGaUnEGnKV35ZKcEh3G7mCS3+2yM7+HL2DeRClJREnlYgx1atnjRmP\nUaukpoqoVVLD2zBgiMVLrA1jsgzmiriuTywmtKUSdKYTpJPBCTiZiOE08Um44HoMZIvsGsjRlymA\nQjIRI5Vs7nwbU814qqRmVMCAfb2kHl+XZ/58hwsuPMeCxSToG8rT2hJn2bxOUsmpXbB1PZ/BXJHd\ng3n2DObxw2PPEQmqvZzgNhZWh1lpxDQjCxgR+arct24Hs9pa6pgrA8F33TeUZ3ZHiqVzO4iNpzGi\niXm+ksm7FD2PgutTKHoUPJ9C0afgebiej/oErersa1x3RIjHHJJxh7g1rpsGmPBGb2MOhOv59GcL\nLOxpY35P27S80o45Qkc6ASQqvq6qeL7i+orn+3he8LjgeWTzHv3ZAoO5oPe6I0Iy7pCMx3Cc6fdd\nmanPAobGXIcnAAAXn0lEQVSpi3zRI5N3OfzgLmZ3pBqdnYYREeIxIehkVbmnVdHzyRU8Mvki/Zki\n/bkCvh90BU7Eg1JIIuZMy4BrphYLGGbCZfIunu/zgkXdtKcqX3mbfRIxh0TaoSOdYN6soFSSd31y\nBZeBXJG+oQJ9mUKpp/GIwBEljARJgq7E5e9R+XHwZCzsgmxMiQUMM6H6MwVaEjGet6CHVMLGLoyH\niJBKxEglYsxqa2HRbPD8oBSSK3p7Bx6WN9vp3tuRT4Zb4/tBm1JpIKOvivqKVxrUqIq/93nIFl18\nVTrSSWIWOAwWMMwECRq3C/S0J1k6t9MacidYzHFoSzm0TWKJzfV8dvRn2bx7CAjaaqwL8cxmAcMc\nMM/36RsqML+njQWz2+ykMk3EYw6HdLcxuyPFtr4sW/ZkiDsO7am4tafMUHW/DBSRM0RkrYg8ISIX\nVtlmhYg8ICJ/FpHflT3/rIg8FL425WfKnY4KrkdfpsCygztZNKfdgsU0lIzHWDS7nWOWzGZWW5Le\noQKZ/LRYFsfUqK4lDBFxgK8SrNT3HLBGRG5W1bVl23QBXwNeo6qbRWRO2Vv4wApV3VPPfJrxyRZc\niq7PUQu76UwnG50dU2epRIxl8zqZNyvNpl1D7B7M05qMTfmBmCa6ev+nTwCeVNX1ACJyA/AGYG3Z\nNucBP1bVzQCqurPsNWESSkGmNp7vM5AtkkrEeOGibjthzDBtLQmeN38W/dkCm3YOsmcwT1vKJmic\nCP2ZPH7EMchSdq/U2SGdjJOM168Ldr1/6QuAjWWPNxEEkXJHAomwKqod+Iqqfj98TYHbw2nWv6mq\n36pzfpuG72vQxbGJqniyBZd80SPmCAt62jioK21TgM9gnekkRy3spncoz8ZdQ/QO5WlPJazDwzhl\n8i6pZJznzZ+1tzvzvp5wuvf+3h5xZd3kPF8ZzBXZ1Z+jb6gAElQlTvRcZ81waRgHlgOvJJgR8G4R\nuVtVnwJOVtUtInIQQeB4TFXvbGRmJ8NQrojr+wiCr7q3n3winEZiMk/Snu8zlCt1r0yweE67dbM0\ne4kI3e0pOltb2D2YY9OuQbycoijxWGzv1CfWtjW6YOZmn+ct6Nk/4MqIO1Wlk3EO6kxTcINBs7sG\ncuwZKqC+Eo87pJPxA/7d1jtgbAbKZ/ZbGD5XbhOwU1VzQE5E/gAcCzylqlsAVHWHiNxEUDqpGDBW\nrly59/6KFStYsWLFBH2EyaOq9GcKtLbEOergbhJxZ+/8RNmiy1C2yFDeZShcp0EJpqYozUk0kfM0\nlZcmDulupae9xaqeTFUxRzioM01Pe4p80aPgegzmigxkiwxmi/tN0JiMOyTiMbvoCPm+Mpgt8vwF\nsyZk7FIyHiMZD8bweL7PUN6ldzDPqtt/y71334mE68yMR10nHxSRGPA4QaP3FoI1wc9V1cfKtnk+\ncB1wBtACrAbeATwLOKo6KCJtwG3AZap6W4X9TPnJBz1f6csUmNeVYtGc9lFP/p4fLDSUL3pkCy5D\nOZfBXJGi5++9DgmCiRNMS+E4xGIy5lVeqTTh+T6drUkOntVqpQlzwEoj14PjtchAJjheXd/fu00y\nHtt7rM600eV7BvMsPqidg2e11nU/qkqm4NKfKbCzP8cxS+c01+SDquqJyAcJTvYO8B1VfUxE3he8\nrN9U1bUisgp4GCi1VfxFRA4FbhIRDfP5g0rBYjooXY0tPaiDuV3pMdstYo6QTsZJJ+P7BT3X8ym4\nPq7nU/T8vQElW/DIZov7GtPCOSZKU3CLQK7ghv3urTRhJlb5yPWu1iQHzwqeDy56fHLF4KInW3AZ\nyhfxfN3vwqdUDRuPybSb7TiYybmFeV3puu9LRGhrSdDWEiwQNq73sOnNGyuTd3E9n8MP7qSrzvlx\nvVIwUVwv+KHmisGqd3M6UlaaME2hdMFTdINVGTOli56wG/doytdmH36fssfpcEXHRsrkXRwHjlrQ\n3ZCOAuGSxM1TwjCj6xvKh70iJqdrajwWNEDumzu28QHTmOFKx2mloT1euLxvMSxJA3t7E4bzKyJl\nkyyOfCxkckW29mbYM5gnHpNwzfjJvVAquj5Fz+NF83umVK8yCxg1GMwG9a4HenXi+0pfJk9PR4ql\nB3VMqQPGmEYKqlFjB9Q4nErE6OlIBT2JBnNs783iq05aqcP3lYFcgefNnzXlqn6nVm4bzPV8Fh3U\nzu7BPL2DeZDai7ZFz2cgW2DRnHYOmdXaVOMsjJlJWlvitLa0M7+7lb6hAlv7sntLHa0tibpVz/Zl\n8iya094UVeK1soARUcH1SLXEOHhWKwfPaiVfDFZL29Gfixw8Sl1Vjzyki+72mbuokDHNJOY49HSk\n6OlIkS247BwISh2er7S2TGypoz9ToKcjxSF17hFVL9boHVF/psD8ntaKvQtKwWNnf46BcIzE8ODR\nnymQiDsccUgX6SlWDDVmpvF8n/5Mga29WQayRWKO0JY6sFJHtuAiwPMXdjfFDAnW6F1Hvq90tlae\nYK8lEeOgRHrvKMtSP+c9Q3mEIED1tLWwdF5nUxwoxpjRxRyH7vYU3e0TU+oodXN/4aKeKX0OsIAR\ngecrsZhEKhkk4zHmdKaZUxY8PF85qCtt0yMYMwWlk3EWza7c1hGlh5WvykC2wJGHdNHaMrVPuVM7\n95MkV3DpaU/VfMIvBQ9jzNRX3tZRSw+rvqECC3rapkW7pQWMCIquT3f71OvRYIypjxE9rKqM6xjI\nFpjVlmR+z/hGVjcbCxhjUFUQpnxR0hgz8YaXOnb2Z9nen0NVicccYo6wdG7HtKmOtrPgGAquT2c6\nMaUbqowx9dfaEmfxQR0smN1G71CBXQM5Fsxum1YLS1nAGEOu4DJvbkejs2GMmSJijsPsjhSzO6Z+\nm8Vwdtk8Bh/oSCUanQ1jjGk4CxijcD2fZNxp+KyWxhjTDCxgjCJb8JjTkbL5nowxBgsYo/J8n65W\n605rjDFgAaMqXxURse60xhgTsoBRRb7oMavNVqAzxpgSCxhV5IsePVNwvnpjjKkXCxjVKLSnrTut\nMcaUWMCooOB6pFvi02qEpjHGHCgLGBXkCh5zOq06yhhjylnAqMD3lY505cWSjDFmprKAMUwtiyUZ\nY8xMYgFjmPEulmSMMdOdBYxhbLEkY4ypzAJGmdJiSW02utsYY0awgFGmtFhS3BZLMsaYEezMWCZX\ncOmZhoueGGPMRLCAUcYWSzLGmOosYIRKiyWlrDutMcZUZAEjVFosyRhjTGUWMEK2WJIxxozOAga2\nWJIxxkRhAYNg7YtuWyzJGGNGZQGDUsCw6ihjjBmNBYyQLZZkjDGjm/EBo+B6tCZtsSRjjBnLjA8Y\nuYLH7E7rTmuMMWOZ8QHDV6XDqqOMMWZMMzpgeL4SdxxabXS3McaMaUYHjGCywRbEFksyxpgxzeiA\nUfR8Zll3WmOMiWRGBwywxZKMMSaqmRswFDpbbbEkY4yJauaeLQV62q07rTHGRDVjA0Y6GbPFkowx\npgYzMmAIMKcjbYslGWNMDURVG52HAyYiOh0+hzHGTBYRQVVrGlMwI0sYxhhjamcBwxhjTCQWMIwx\nxkRiAcMYY0wkdQ8YInKGiKwVkSdE5MIq26wQkQdE5M8i8rta0hpjjJkcdQ0YIuIAXwVOB14InCsi\nzx+2TRfwNeD1qvoi4G1R0x6IO+64Y1LSTNd9Wf6mzr6aPX+Tua9mz99k76tW9S5hnAA8qarrVbUI\n3AC8Ydg25wE/VtXNAKq6s4a04zZdDwD74Y0/zXTdV7PnbzL31ez5m+x91areAWMBsLHs8abwuXJH\nAj0i8jsRWSMif1tDWmOMMZOkGYY6x4HlwCuBNuBuEbm7sVkyxhgzXF1HeovIScBKVT0jfHwRoKp6\nVdk2FwIpVb0sfPxt4JfA5rHSlr2HDfM2xpga1TrSu94ljDXA4SKyBNgCnAOcO2ybm4HrRCQGtAAn\nAl8CHo+QFqj9QxtjjKldXQOGqnoi8kHgNoL2ku+o6mMi8r7gZf2mqq4VkVXAw4AHfFNV/wJQKW09\n82uMMaa6aTH5oDHGmPqbcSO9ReQ7IrJNRB6uIc1CEfmtiDwqIo+IyIcjpGkRkdXhgMRHROTSGvbn\niMj9InJLDWmeFZGHwv3dGzFNl4j8SEQeCz/biRHSHBnu4/7wti/i9/GRcGDmwyLyAxFJRszjP4ff\nX9XvvdL/VES6ReQ2EXlcRFaF432ipHtrmE9PRJZHTHN1+B0+KCI/FpHOCGk+W/b/+pWIHBxlX2Wv\nfUxEfBHpibCvS0VkU/g/u19Ezoi6LxH5UPjZHhGRKyPs64ay/TwjIvdHSHOsiNxdOnZF5CVR8ici\nx4jIXeH3eLOItA9LU/F3O9qxMUqasY6L4ek+FD5f9dgYJU3VY6Na/sper3ZcVNvXmMfGflR1Rv0B\npwDHAQ/XkOZg4LjwfjtB+8rzI6RrDW9jwD3ACRH39xHg/wG31JDHp4HuGr+L7wLnh/fjQGeN6R3g\nOWDRGNvND/OXDB//EHhXhPd/IUFVZUv4Hd4GLIvyPwWuAj4R3r8QuDJiuucBRwC/BZZHTHMa4IT3\nrwSuiJCmvez+h4B/j3qsAguBXwHPAD0R9nUp8NFafxfAivA7j4eP50TJX9nrXwQ+HWE/q4DXhPdf\nC/wuYv7uBU4J7/8d8NlhaSr+bkc7NkZJM9ZxUS1d1WNjlDRVj41qaSIcF9X2NeaxUf4340oYqnon\nsKfGNFtV9cHw/iDwGBHGhKhqJrzbQnBCHrP+T0QWAmcC364ljwTrQkX+f4ZXOi9X1evDvLqq2l/j\nPk8D1qnqxjG3DE74bSISB1oJAs1YjgJWq2peVT3gD8Cbh29U5X/6BuB74f3vAW+Mkk5VH1fVJwm+\nzxGqpPm1qvrhw3sIfrhjpRkse9gG+AwzyrH6b8DHo+YvNGrHkCrp/pHgZOqG2+yMkKbc24H/jpDG\nB0pX+bMIekhGyd8R4fMAvwbeMixNpd/tQkY5Nqr91iMcF9XSVT02RklT9dgY41w02nExWrrInYZm\nXMA4UCKylOBKZ3WEbR0ReQDYCtyuqmsi7KL0T6+1cUmB2yUY/PgPEbY/FNgpIteHRdFviki6xn2+\ng2EnhIoZU30OuAbYQHAy6FXVX0d4/z8DLw+rEFoJAumiiHmbq6rbwv1vBeZGTHeg3kPQLXxMInK5\niGwgmO3gMxHTnA1sVNVHaszXB8NqkW9Lheq5Ko4EXiEi90gwsHZEVdEo+Xw5sFVV10XY/CPAF8Pv\n4mrgkxF382j4fUAQnBZW27Dsd3sPMC/KsVHLbz1iuqrHxvA0UY6N8jS1HBcV8hf52LCAUYOwjvRG\n4J+HXQVUpKq+qr6Y4EA+UUReMMb7vw7YFl4JCDVEfuBkVV1OcFL9gIicMsb2pQGTXwvTZYCLou5M\nRBLA2cCPImw7i+CqbglB9VS7iJw3VjpVXUtQfXA78AvgAYKedONR994dInIxUFTV/4qyvap+WlUX\nAz8gqHoY6/3TwKcIqhH2Ph1hV18nqMo7juDi5UtR8kdwjHSr6knAJ4D/iZgOgi7wY15MhP6R4De1\nmCB4/GfEdO8hONbXEFyJFyptVOF3O/xYGHFs1PpbHyvdaMdGpTRjHRvlaQh+E5GOiwr7qunYsIAR\nUViVciPwfVW9uZa0YVXP74DRG5TgZOBsEXma4Mf21yLyfyPuY0t4uwO4iWAurtFsIrgi+VP4+EaC\nABLVa4H7wv2N5TTgaVXdHVYt/QR4WZSdqOr1qvoSVV0B9AJPRMzfNhGZBxA2Gm6PmG5cROTvCIL1\nmIGwgv9iWHVKFYcBS4GHROQZgguR+0Rk1NKTqu7QsPIa+BZwfMR8bST4XxGWjn0RmT1WIgnGVL2Z\noK0qiner6k/D/dzI2Mcu4bZPqOrpqno8wVxzI0ozVX63ox4b4/2tV0s32rERYV8jjo0KaSIdF5X2\nVeuxMVMDRq1X7xBc9fxFVb8caQcic0rFu/DK8NXA2tHSqOqnVHWxqi4jGKj4W1V9V4R9tYZXDohI\nG/Aaguqc0fa1DdgoIkeGT70K+MtY+ypTyxXkBuAkEUmJiIT7ijSmRkQOCm8XA28i+AFV3JT9/6e3\nEDSEArybYIBolHTDXxszTdiz5OPA2aqaj5jm8LLX3kj172NvOlX9s6oerKrLVPVQgqD/YlUdHgyH\n76u8B9abqX5sDP8ufkowZQ/hcZJQ1V1jpIHgWH8srIqMsp/NInJquJ9XUf2iYPjnKh0bDvBp4D8q\npKn0ux3r2Bjrt17tuBiRLsKxUSnNWMfGfmlqOC4q7SvqsRHQiK3j0+WP4ITzHJAnOJGdHyHNyQTF\nvgcJqkXuB84YI83R4XYPEvT0ubjGfJ5KxF5SBO0Rpbw9AlwUMd2xBKPxHyS4kuyKmK4V2AF01PB5\nLg0P/IcJGhoTEdP9ITyIHwBWRP2fAt0EDaGPE/T0mRUx3RsJrqyzBDMM/DJCmieB9eH/+37g6xHS\n3Bj+rx4kOGEdUuuxStDzbHhvmEr7+r/h9/4gQRCYF/G7iAPfD/P5J+DUKPkDrgfeW8P/6mXh+z8A\n3E1wsouS7sPh/3ct8IWov1ugp9qxMUqasY6LSuleO9qxMcq+qh4b1dJEOC6q7WvMY6P8zwbuGWOM\niWSmVkkZY4ypkQUMY4wxkVjAMMYYE4kFDGOMMZFYwDDGGBOJBQxjjDGRWMAwTSucpvlfyx5/TEQi\nzbkU4b2vF5ERExlONAmmxf6LiPxm2PNLRCQj+6aJvz8ciVvr+y8RkYorURoz0SxgmGaWB948fG7/\nRgunvojq74H/o6qvqvDaU6q6XFVfHN6648jOoYxjOpJwdLQxNbGDxjQzF/gm8NHhLwwvIYjIQHh7\nqojcISI/FZGnROQKETlPgsWsHhKRQ8ve5tXh7L5rw4kfSzMMXx1u/6CEM/+G7/sHEbkZeLRCfs6V\nYHGoh0XkivC5SwjWcfiOiFxV4fONmGIinOblOxLMEHufiJwVPr8k3P+fwr+TwiRXAKeEJZR/FpF3\ni8h1Ze93q4i8ovQdicgXJZhB+SQRWR5+V2tE5Jdl8yt9WIKFdh4UkUgTKZoZIurUDvZnf5P9B/QT\nLPbyDNABfAz4TPja9cCby7cNb08FdhNMWZ0kmFfn0vC1DwNfKkv/i/D+4QTTPiSBfwA+FT6fJJg6\nZUn4vgPA4gr5PIRg+ocegouw3xDMHQTBpJOVprpYQjBDcGnKiOvC5z8PnBfe7yKYviINpNi3ANXh\nwJqyz3tL2fu+G/hK2eNbgVeE933gLeH9OPC/wOzw8duB74T3NxNO3UKNi2rZ3/T+q7nO1JjJpKqD\nIvI9gmmcsxGTrdFw4jURWUcwXxAE8/OsKNvuf8J9PBVu93yCiRuPFpG3hdt0Eqy0VgTuVdUNFfZ3\nPMEqcbvDff4AeAXBJHdQfbK6pzSYWr7ca4CzRKS0EE4SWEwwf9FXReQ4gjmBjqj+8atyCWefJVhB\n7kUEa6iUFt8qTRb4EPBfIvJTgvmFjAGwgGGmhC8TXIVfX/acS1ilGp7wytcIL58V1C977LP/MV8+\nkZqEjwX4kKreXp6BcDbVoVHyWOvsx6N5iwaru5Xv/1KCBYmOCdtQqgXPvd9LKFV2P6eqpc8swJ9V\n9eQK7/E6goB3NnCxiLxI960aZ2Ywa8Mwzaw0rfcegtLA35e99ixQWgHuDUBiHO//NgkcRtB4/DjB\n+tL/VOqxJCJHSLDa32juJViZric8mZ8L3BFh/5WCzCqCqjPC/R8X3u0iKGUAvItgyVsIqsk6ytI/\nCxwXfq5F7L+2RPn+HgcOKrWFiEhc9i3wtVhVf0+woFYnQbWgMVbCME2tvARwDfCBsue+BdwcNuCu\novrV/2jTMW8gONl3AO9T1YKIfJtgMZr7w5LLdiqsB77fDlS3ishF7AsSP1PVn0XYf6XXLgeuFZGH\nCU7wzxBc6X8d+LGIvAv4Ffs+78MECxs9AHxXVb8sIs8SNMw/BtxXaX+qWhSRtwLXSbBuSyzc7xPA\n/5NgzXcBvqy1r/Vupimb3twYY0wkViVljDEmEgsYxhhjIrGAYYwxJhILGMYYYyKxgGGMMSYSCxjG\nGGMisYBhjDEmEgsYxhhjIvn/B8IjU8qXVigAAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x117baa4d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig1bis = plot_sfs(clf1_pipe.named_steps['sfs1'].get_metric_dict(), kind='std_dev')\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As you can see on this graph, the best performance is with 10 features and during the pipeline process we take the performance of 25 features"
]
},
{
"cell_type": "code",
"execution_count": 219,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_score</th>\n",
" <th>ci_bound</th>\n",
" <th>cv_scores</th>\n",
" <th>feature_idx</th>\n",
" <th>std_dev</th>\n",
" <th>std_err</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.7161</td>\n",
" <td>0.0535366</td>\n",
" <td>[0.689443779904, 0.807644110276, 0.66146326080...</td>\n",
" <td>(1, 34, 4, 5, 9, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0531368</td>\n",
" <td>0.0265684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.7161</td>\n",
" <td>0.0535366</td>\n",
" <td>[0.689443779904, 0.807644110276, 0.66146326080...</td>\n",
" <td>(1, 34, 4, 5, 9, 15, 21, 23, 27)</td>\n",
" <td>0.0531368</td>\n",
" <td>0.0265684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.716091</td>\n",
" <td>0.0531213</td>\n",
" <td>[0.690340909091, 0.806704260652, 0.66146326080...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0527245</td>\n",
" <td>0.0263623</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.71601</td>\n",
" <td>0.0554486</td>\n",
" <td>[0.691238038278, 0.812969924812, 0.66114790287...</td>\n",
" <td>(1, 34, 4, 5, 23, 9, 27, 21)</td>\n",
" <td>0.0550345</td>\n",
" <td>0.0275172</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.715621</td>\n",
" <td>0.0560501</td>\n",
" <td>[0.683612440191, 0.812343358396, 0.66099022390...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 11, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0556315</td>\n",
" <td>0.0278158</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_score ci_bound cv_scores \\\n",
"10 0.7161 0.0535366 [0.689443779904, 0.807644110276, 0.66146326080... \n",
"9 0.7161 0.0535366 [0.689443779904, 0.807644110276, 0.66146326080... \n",
"11 0.716091 0.0531213 [0.690340909091, 0.806704260652, 0.66146326080... \n",
"8 0.71601 0.0554486 [0.691238038278, 0.812969924812, 0.66114790287... \n",
"12 0.715621 0.0560501 [0.683612440191, 0.812343358396, 0.66099022390... \n",
"\n",
" feature_idx std_dev std_err \n",
"10 (1, 34, 4, 5, 9, 15, 21, 22, 23, 27) 0.0531368 0.0265684 \n",
"9 (1, 34, 4, 5, 9, 15, 21, 23, 27) 0.0531368 0.0265684 \n",
"11 (1, 34, 35, 4, 5, 9, 15, 21, 22, 23, 27) 0.0527245 0.0263623 \n",
"8 (1, 34, 4, 5, 23, 9, 27, 21) 0.0550345 0.0275172 \n",
"12 (1, 34, 35, 4, 5, 9, 11, 15, 21, 22, 23, 27) 0.0556315 0.0278158 "
]
},
"execution_count": 219,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_clf1_pipe = pd.DataFrame.from_dict(clf1_pipe.named_steps['sfs1'].get_metric_dict(confidence_interval=0.90)).T\n",
"result_clf1_pipe.sort_values('avg_score', ascending=0, inplace=True)\n",
"result_clf1_pipe.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## How can we take the features selection with the best performance (SFS) during a Pipeline process ?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"------------------------------"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Find Manually the best k_features for SFS and fit our ensemble "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### clf1 - Logistic Newton"
]
},
{
"cell_type": "code",
"execution_count": 301,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 10/10"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature selection (1, 34, 4, 5, 9, 15, 21, 22, 23, 27)\n",
"Score : 0.716100012599\n"
]
}
],
"source": [
"sfs1_manual = SFS(clf1, \n",
" k_features=10, #seems the best \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs1_manual = sfs1_manual.fit(data.values, y.values)\n",
"\n",
"print \"Feature selection \" + str(sfs1_manual.k_feature_idx_)\n",
"print \"Score : \" + str(sfs1_manual.k_score_)"
]
},
{
"cell_type": "code",
"execution_count": 303,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_score</th>\n",
" <th>ci_bound</th>\n",
" <th>cv_scores</th>\n",
" <th>feature_idx</th>\n",
" <th>std_dev</th>\n",
" <th>std_err</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.7161</td>\n",
" <td>0.0682963</td>\n",
" <td>[0.689443779904, 0.807644110276, 0.66146326080...</td>\n",
" <td>(1, 34, 4, 5, 9, 15, 21, 23, 27)</td>\n",
" <td>0.0531368</td>\n",
" <td>0.0265684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.7161</td>\n",
" <td>0.0682963</td>\n",
" <td>[0.689443779904, 0.807644110276, 0.66146326080...</td>\n",
" <td>(1, 34, 4, 5, 9, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0531368</td>\n",
" <td>0.0265684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.71601</td>\n",
" <td>0.0707353</td>\n",
" <td>[0.691238038278, 0.812969924812, 0.66114790287...</td>\n",
" <td>(1, 34, 4, 5, 23, 9, 27, 21)</td>\n",
" <td>0.0550345</td>\n",
" <td>0.0275172</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_score ci_bound cv_scores \\\n",
"9 0.7161 0.0682963 [0.689443779904, 0.807644110276, 0.66146326080... \n",
"10 0.7161 0.0682963 [0.689443779904, 0.807644110276, 0.66146326080... \n",
"8 0.71601 0.0707353 [0.691238038278, 0.812969924812, 0.66114790287... \n",
"\n",
" feature_idx std_dev std_err \n",
"9 (1, 34, 4, 5, 9, 15, 21, 23, 27) 0.0531368 0.0265684 \n",
"10 (1, 34, 4, 5, 9, 15, 21, 22, 23, 27) 0.0531368 0.0265684 \n",
"8 (1, 34, 4, 5, 23, 9, 27, 21) 0.0550345 0.0275172 "
]
},
"execution_count": 303,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sfs1_manual_df = pd.DataFrame.from_dict(sfs1_manual.get_metric_dict()).T\n",
"sfs1_manual_df.sort_values('avg_score', ascending=0, inplace=True)\n",
"sfs1_manual_df.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### clf2 - Xgboost 1"
]
},
{
"cell_type": "code",
"execution_count": 307,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature selection (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)\n",
"Score : 0.630448411818\n"
]
}
],
"source": [
"# Looking for best number of k_features\n",
"sfs2_manual = SFS(clf2, \n",
" k_features=len(data.columns), #All features\n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs2_manual = sfs2_manual.fit(data.values, y.values)\n",
"\n",
"print \"Feature selection \" + str(sfs2_manual.k_feature_idx_)\n",
"print \"Score : \" + str(sfs2_manual.k_score_)"
]
},
{
"cell_type": "code",
"execution_count": 306,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_score</th>\n",
" <th>ci_bound</th>\n",
" <th>cv_scores</th>\n",
" <th>feature_idx</th>\n",
" <th>std_dev</th>\n",
" <th>std_err</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.709337</td>\n",
" <td>0.0569933</td>\n",
" <td>[0.740879186603, 0.776002506266, 0.68148848943...</td>\n",
" <td>(1, 34, 3, 5, 35, 33, 8, 13, 15, 19, 22, 23, 2...</td>\n",
" <td>0.0443427</td>\n",
" <td>0.0221714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.709308</td>\n",
" <td>0.0563241</td>\n",
" <td>[0.740879186603, 0.773966165414, 0.67959634184...</td>\n",
" <td>(1, 34, 3, 5, 35, 33, 8, 13, 15, 19, 22, 27, 2...</td>\n",
" <td>0.043822</td>\n",
" <td>0.021911</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.708846</td>\n",
" <td>0.0564633</td>\n",
" <td>[0.739683014354, 0.775062656642, 0.68117313150...</td>\n",
" <td>(1, 34, 3, 5, 35, 33, 8, 11, 13, 15, 19, 22, 2...</td>\n",
" <td>0.0439304</td>\n",
" <td>0.0219652</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_score ci_bound cv_scores \\\n",
"15 0.709337 0.0569933 [0.740879186603, 0.776002506266, 0.68148848943... \n",
"14 0.709308 0.0563241 [0.740879186603, 0.773966165414, 0.67959634184... \n",
"16 0.708846 0.0564633 [0.739683014354, 0.775062656642, 0.68117313150... \n",
"\n",
" feature_idx std_dev std_err \n",
"15 (1, 34, 3, 5, 35, 33, 8, 13, 15, 19, 22, 23, 2... 0.0443427 0.0221714 \n",
"14 (1, 34, 3, 5, 35, 33, 8, 13, 15, 19, 22, 27, 2... 0.043822 0.021911 \n",
"16 (1, 34, 3, 5, 35, 33, 8, 11, 13, 15, 19, 22, 2... 0.0439304 0.0219652 "
]
},
"execution_count": 306,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sfs2_manual_df = pd.DataFrame.from_dict(sfs2_manual.get_metric_dict()).T\n",
"sfs2_manual_df.sort_values('avg_score', ascending=0, inplace=True)\n",
"sfs2_manual_df.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** --> 15 seems the best number of k_features**"
]
},
{
"cell_type": "code",
"execution_count": 308,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 15/15"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature selection (1, 34, 3, 5, 35, 33, 8, 13, 15, 19, 22, 23, 27, 29, 30)\n",
"Score : 0.709336603474\n"
]
}
],
"source": [
"sfs2_manual = SFS(clf2, \n",
" k_features=15, # Best numbers \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs2_manual = sfs2_manual.fit(data.values, y.values)\n",
"\n",
"print \"Feature selection \" + str(sfs2_manual.k_feature_idx_)\n",
"print \"Score : \" + str(sfs2_manual.k_score_)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### clf3 - Xgboost 2"
]
},
{
"cell_type": "code",
"execution_count": 309,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature selection (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)\n",
"Score : 0.623919955669\n"
]
}
],
"source": [
"# Looking for best number of k_features\n",
"sfs3_manual = SFS(clf3, \n",
" k_features=len(data.columns), #All features\n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs3_manual = sfs3_manual.fit(data.values, y.values)\n",
"\n",
"print \"Feature selection \" + str(sfs3_manual.k_feature_idx_)\n",
"print \"Score : \" + str(sfs3_manual.k_score_)"
]
},
{
"cell_type": "code",
"execution_count": 310,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_score</th>\n",
" <th>ci_bound</th>\n",
" <th>cv_scores</th>\n",
" <th>feature_idx</th>\n",
" <th>std_dev</th>\n",
" <th>std_err</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0.699897</td>\n",
" <td>0.0615464</td>\n",
" <td>[0.753289473684, 0.753289473684, 0.65184484389...</td>\n",
" <td>(1, 34, 35, 5, 13, 15, 19, 22, 24, 27, 29, 30,...</td>\n",
" <td>0.0478852</td>\n",
" <td>0.0239426</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.699845</td>\n",
" <td>0.061976</td>\n",
" <td>[0.752093301435, 0.754542606516, 0.65263323872...</td>\n",
" <td>(1, 34, 35, 5, 13, 15, 19, 22, 24, 25, 27, 29,...</td>\n",
" <td>0.0482194</td>\n",
" <td>0.0241097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.699766</td>\n",
" <td>0.0587912</td>\n",
" <td>[0.748056220096, 0.752192982456, 0.65373699148...</td>\n",
" <td>(1, 19, 24, 13, 30, 29)</td>\n",
" <td>0.0457416</td>\n",
" <td>0.0228708</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_score ci_bound cv_scores \\\n",
"13 0.699897 0.0615464 [0.753289473684, 0.753289473684, 0.65184484389... \n",
"14 0.699845 0.061976 [0.752093301435, 0.754542606516, 0.65263323872... \n",
"6 0.699766 0.0587912 [0.748056220096, 0.752192982456, 0.65373699148... \n",
"\n",
" feature_idx std_dev std_err \n",
"13 (1, 34, 35, 5, 13, 15, 19, 22, 24, 27, 29, 30,... 0.0478852 0.0239426 \n",
"14 (1, 34, 35, 5, 13, 15, 19, 22, 24, 25, 27, 29,... 0.0482194 0.0241097 \n",
"6 (1, 19, 24, 13, 30, 29) 0.0457416 0.0228708 "
]
},
"execution_count": 310,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sfs3_manual_df = pd.DataFrame.from_dict(sfs3_manual.get_metric_dict()).T\n",
"sfs3_manual_df.sort_values('avg_score', ascending=0, inplace=True)\n",
"sfs3_manual_df.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** --> 13 seems the best number of k_features**"
]
},
{
"cell_type": "code",
"execution_count": 312,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 13/13"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature selection (1, 34, 35, 5, 13, 15, 19, 22, 24, 27, 29, 30, 31)\n",
"Score : 0.699896678783\n"
]
}
],
"source": [
"sfs3_manual = SFS(clf3, \n",
" k_features=13, # Best numbers \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs3_manual = sfs3_manual.fit(data.values, y.values)\n",
"\n",
"print \"Feature selection \" + str(sfs3_manual.k_feature_idx_)\n",
"print \"Score : \" + str(sfs3_manual.k_score_)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### clf4 - Logistic lbfgs"
]
},
{
"cell_type": "code",
"execution_count": 313,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature selection (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)\n",
"Score : 0.675050736414\n"
]
}
],
"source": [
"# Looking for best number of k_features\n",
"sfs4_manual = SFS(clf4, \n",
" k_features=len(data.columns), #All features\n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs4_manual = sfs4_manual.fit(data.values, y.values)\n",
"\n",
"print \"Feature selection \" + str(sfs4_manual.k_feature_idx_)\n",
"print \"Score : \" + str(sfs4_manual.k_score_)"
]
},
{
"cell_type": "code",
"execution_count": 314,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_score</th>\n",
" <th>ci_bound</th>\n",
" <th>cv_scores</th>\n",
" <th>feature_idx</th>\n",
" <th>std_dev</th>\n",
" <th>std_err</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.7161</td>\n",
" <td>0.0682963</td>\n",
" <td>[0.689443779904, 0.807644110276, 0.66146326080...</td>\n",
" <td>(1, 34, 4, 5, 9, 15, 21, 23, 27)</td>\n",
" <td>0.0531368</td>\n",
" <td>0.0265684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.7161</td>\n",
" <td>0.0682963</td>\n",
" <td>[0.689443779904, 0.807644110276, 0.66146326080...</td>\n",
" <td>(1, 34, 4, 5, 9, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0531368</td>\n",
" <td>0.0265684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.716091</td>\n",
" <td>0.0677664</td>\n",
" <td>[0.690340909091, 0.806704260652, 0.66146326080...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0527245</td>\n",
" <td>0.0263623</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_score ci_bound cv_scores \\\n",
"9 0.7161 0.0682963 [0.689443779904, 0.807644110276, 0.66146326080... \n",
"10 0.7161 0.0682963 [0.689443779904, 0.807644110276, 0.66146326080... \n",
"11 0.716091 0.0677664 [0.690340909091, 0.806704260652, 0.66146326080... \n",
"\n",
" feature_idx std_dev std_err \n",
"9 (1, 34, 4, 5, 9, 15, 21, 23, 27) 0.0531368 0.0265684 \n",
"10 (1, 34, 4, 5, 9, 15, 21, 22, 23, 27) 0.0531368 0.0265684 \n",
"11 (1, 34, 35, 4, 5, 9, 15, 21, 22, 23, 27) 0.0527245 0.0263623 "
]
},
"execution_count": 314,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sfs4_manual_df = pd.DataFrame.from_dict(sfs4_manual.get_metric_dict()).T\n",
"sfs4_manual_df.sort_values('avg_score', ascending=0, inplace=True)\n",
"sfs4_manual_df.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** --> 9 seems the best number of k_features**"
]
},
{
"cell_type": "code",
"execution_count": 315,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 9/9"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature selection (1, 34, 4, 5, 9, 15, 21, 23, 27)\n",
"Score : 0.716100012599\n"
]
}
],
"source": [
"sfs4_manual = SFS(clf4, \n",
" k_features=9, #Best numbers \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs4_manual = sfs4_manual.fit(data.values, y.values)\n",
"\n",
"print \"Feature selection \" + str(sfs4_manual.k_feature_idx_)\n",
"print \"Score : \" + str(sfs4_manual.k_score_)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Eclf - Ensemble"
]
},
{
"cell_type": "code",
"execution_count": 317,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf1_pipe_manual = Pipeline([('sfs1_manual', sfs1_manual),\n",
" ('Logistic Newton', clf1)])\n",
"\n",
"clf2_pipe_manual = Pipeline([('sfs2_manual', sfs2_manual),\n",
" ('Xgb1', clf2)])\n",
"\n",
"clf3_pipe_manual = Pipeline([('sfs3_manual', sfs3_manual),\n",
" ('Xgb2', clf3)])\n",
"\n",
"clf4_pipe_manual = Pipeline([('sfs4_manual', sfs4_manual),\n",
" ('Logistic lbfgs', clf4)])\n",
"\n",
"\n",
"\n",
"eclf = EnsembleVoteClassifier(clfs=[clf1_pipe_manual, clf2_pipe_manual, \n",
" clf3_pipe_manual, clf4_pipe_manual],\n",
" voting='soft')\n"
]
},
{
"cell_type": "code",
"execution_count": 318,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 15/15"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"roc_auc: 0.6387 (+/- 0.08) [Logistic Newton]\n",
"roc_auc: 0.6698 (+/- 0.05) [Xgb1]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 13/13"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"roc_auc: 0.6603 (+/- 0.04) [Xgb2]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 9/9"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"roc_auc: 0.6474 (+/- 0.08) [Logistic lbfgs]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 9/9"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"roc_auc: 0.6564 (+/- 0.08) [Ensemble]\n"
]
}
],
"source": [
"for clf, label in zip([clf1_pipe_manual, clf2_pipe_manual, clf3_pipe_manual, clf4_pipe_manual, eclf], \n",
" ['Logistic Newton', 'Xgb1', 'Xgb2', 'Logistic lbfgs', 'Ensemble']):\n",
"\n",
" scores = cross_val_score(clf, data.values, y.values, cv=5, scoring='roc_auc')\n",
" print(\"roc_auc: %0.4f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), label))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Looking for the bad score in SFS Pipeline :"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### The score of our CLF1 was 0.6387 (+/- 0.08) with the pipeline process and 0.7161 (+/- 0.05)) with the manual SFS. Searching why ..."
]
},
{
"cell_type": "code",
"execution_count": 329,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature selection (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)\n",
"Score : 0.675488559599\n"
]
}
],
"source": [
"# SFS with full numbers of k_features\n",
"sfs1_manual_all = SFS(clf1, \n",
" k_features=len(data.columns), \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" cv=5)\n",
"sfs1_manual_all = sfs1_manual_all.fit(data.values, y.values)\n",
"\n",
"print \"Feature selection \" + str(sfs1_manual_all.k_feature_idx_)\n",
"print \"Score : \" + str(sfs1_manual_all.k_score_)\n"
]
},
{
"cell_type": "code",
"execution_count": 330,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_score</th>\n",
" <th>ci_bound</th>\n",
" <th>cv_scores</th>\n",
" <th>feature_idx</th>\n",
" <th>std_dev</th>\n",
" <th>std_err</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.662581</td>\n",
" <td>0.068987</td>\n",
" <td>[0.691686602871, 0.743577694236, 0.61857458215...</td>\n",
" <td>(1,)</td>\n",
" <td>0.0536742</td>\n",
" <td>0.0268371</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.694117</td>\n",
" <td>0.0623785</td>\n",
" <td>[0.706040669856, 0.768327067669, 0.64001892147...</td>\n",
" <td>(1, 9)</td>\n",
" <td>0.0485326</td>\n",
" <td>0.0242663</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.701039</td>\n",
" <td>0.0758659</td>\n",
" <td>[0.698863636364, 0.791823308271, 0.65231788079...</td>\n",
" <td>(1, 23, 9)</td>\n",
" <td>0.0590263</td>\n",
" <td>0.0295131</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.707582</td>\n",
" <td>0.0730441</td>\n",
" <td>[0.713217703349, 0.804824561404, 0.66083254493...</td>\n",
" <td>(27, 1, 23, 9)</td>\n",
" <td>0.0568308</td>\n",
" <td>0.0284154</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.711155</td>\n",
" <td>0.0800364</td>\n",
" <td>[0.708283492823, 0.814223057644, 0.63954588457...</td>\n",
" <td>(23, 1, 27, 5, 9)</td>\n",
" <td>0.0622711</td>\n",
" <td>0.0311355</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.712508</td>\n",
" <td>0.0834603</td>\n",
" <td>[0.705293062201, 0.817355889724, 0.63923052664...</td>\n",
" <td>(1, 34, 5, 23, 9, 27)</td>\n",
" <td>0.064935</td>\n",
" <td>0.0324675</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.715193</td>\n",
" <td>0.0813482</td>\n",
" <td>[0.67793062201, 0.825031328321, 0.655786818038...</td>\n",
" <td>(1, 34, 5, 23, 9, 27, 21)</td>\n",
" <td>0.0632917</td>\n",
" <td>0.0316458</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.71601</td>\n",
" <td>0.0707353</td>\n",
" <td>[0.691238038278, 0.812969924812, 0.66114790287...</td>\n",
" <td>(1, 34, 4, 5, 23, 9, 27, 21)</td>\n",
" <td>0.0550345</td>\n",
" <td>0.0275172</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.7161</td>\n",
" <td>0.0682963</td>\n",
" <td>[0.689443779904, 0.807644110276, 0.66146326080...</td>\n",
" <td>(1, 34, 4, 5, 9, 15, 21, 23, 27)</td>\n",
" <td>0.0531368</td>\n",
" <td>0.0265684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.7161</td>\n",
" <td>0.0682963</td>\n",
" <td>[0.689443779904, 0.807644110276, 0.66146326080...</td>\n",
" <td>(1, 34, 4, 5, 9, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0531368</td>\n",
" <td>0.0265684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.716091</td>\n",
" <td>0.0677664</td>\n",
" <td>[0.690340909091, 0.806704260652, 0.66146326080...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0527245</td>\n",
" <td>0.0263623</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.715621</td>\n",
" <td>0.0715027</td>\n",
" <td>[0.683612440191, 0.812343358396, 0.66099022390...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 11, 15, 21, 22, 23, 27)</td>\n",
" <td>0.0556315</td>\n",
" <td>0.0278158</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0.714314</td>\n",
" <td>0.0710532</td>\n",
" <td>[0.682864832536, 0.80545112782, 0.65862503942,...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 11, 15, 21, 22, 23, 27, 30)</td>\n",
" <td>0.0552818</td>\n",
" <td>0.0276409</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.713406</td>\n",
" <td>0.0729</td>\n",
" <td>[0.68211722488, 0.80701754386, 0.659098076317,...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 11, 15, 16, 21, 22, 23, 2...</td>\n",
" <td>0.0567186</td>\n",
" <td>0.0283593</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.713279</td>\n",
" <td>0.0727949</td>\n",
" <td>[0.68211722488, 0.80701754386, 0.659098076317,...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 11, 15, 16, 17, 21, 22, 2...</td>\n",
" <td>0.0566369</td>\n",
" <td>0.0283184</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.711636</td>\n",
" <td>0.0710219</td>\n",
" <td>[0.683014354067, 0.802631578947, 0.65767896562...</td>\n",
" <td>(1, 34, 35, 4, 5, 9, 11, 15, 16, 17, 21, 22, 2...</td>\n",
" <td>0.0552574</td>\n",
" <td>0.0276287</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>0.710515</td>\n",
" <td>0.0770536</td>\n",
" <td>[0.689892344498, 0.806547619048, 0.66162093976...</td>\n",
" <td>(1, 4, 5, 9, 11, 15, 16, 17, 21, 22, 23, 25, 2...</td>\n",
" <td>0.0599503</td>\n",
" <td>0.0299751</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>0.707521</td>\n",
" <td>0.0758251</td>\n",
" <td>[0.69019138756, 0.802631578947, 0.660674865973...</td>\n",
" <td>(0, 1, 4, 5, 9, 11, 15, 16, 17, 21, 22, 23, 25...</td>\n",
" <td>0.0589945</td>\n",
" <td>0.0294972</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>0.704932</td>\n",
" <td>0.0744002</td>\n",
" <td>[0.688397129187, 0.795739348371, 0.66051718700...</td>\n",
" <td>(0, 1, 4, 5, 9, 11, 15, 16, 17, 21, 22, 23, 25...</td>\n",
" <td>0.0578859</td>\n",
" <td>0.0289429</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0.702572</td>\n",
" <td>0.0710805</td>\n",
" <td>[0.694976076555, 0.785087719298, 0.65815200252...</td>\n",
" <td>(0, 1, 4, 5, 9, 10, 11, 15, 16, 17, 21, 22, 23...</td>\n",
" <td>0.0553031</td>\n",
" <td>0.0276515</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>0.700254</td>\n",
" <td>0.0736301</td>\n",
" <td>[0.6875, 0.787593984962, 0.655944497004, 0.740...</td>\n",
" <td>(0, 1, 4, 5, 9, 10, 11, 14, 15, 16, 17, 21, 22...</td>\n",
" <td>0.0572868</td>\n",
" <td>0.0286434</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0.697627</td>\n",
" <td>0.0758364</td>\n",
" <td>[0.673444976077, 0.791353383459, 0.65373699148...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 14, 15, 16, 17, 21,...</td>\n",
" <td>0.0590033</td>\n",
" <td>0.0295016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>0.697175</td>\n",
" <td>0.0577431</td>\n",
" <td>[0.685107655502, 0.750939849624, 0.64963733837...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 12, 14, 15, 16, 17,...</td>\n",
" <td>0.0449261</td>\n",
" <td>0.022463</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0.697112</td>\n",
" <td>0.057829</td>\n",
" <td>[0.685107655502, 0.750939849624, 0.64932198044...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,...</td>\n",
" <td>0.0449929</td>\n",
" <td>0.0224965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0.695608</td>\n",
" <td>0.0601725</td>\n",
" <td>[0.683911483254, 0.75313283208, 0.645537685273...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,...</td>\n",
" <td>0.0468162</td>\n",
" <td>0.0234081</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>0.692777</td>\n",
" <td>0.0605234</td>\n",
" <td>[0.683313397129, 0.752506265664, 0.65436770734...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,...</td>\n",
" <td>0.0470892</td>\n",
" <td>0.0235446</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>0.690958</td>\n",
" <td>0.0590519</td>\n",
" <td>[0.682715311005, 0.750313283208, 0.65216020182...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,...</td>\n",
" <td>0.0459444</td>\n",
" <td>0.0229722</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>0.690835</td>\n",
" <td>0.0589853</td>\n",
" <td>[0.682416267943, 0.750313283208, 0.65216020182...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,...</td>\n",
" <td>0.0458926</td>\n",
" <td>0.0229463</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0.687406</td>\n",
" <td>0.0600364</td>\n",
" <td>[0.66716507177, 0.750313283208, 0.653421633554...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,...</td>\n",
" <td>0.0467104</td>\n",
" <td>0.0233552</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>0.687284</td>\n",
" <td>0.0599615</td>\n",
" <td>[0.666866028708, 0.75, 0.653421633554, 0.73478...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,...</td>\n",
" <td>0.0466521</td>\n",
" <td>0.0233261</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>0.684083</td>\n",
" <td>0.057131</td>\n",
" <td>[0.665669856459, 0.747807017544, 0.65247555976...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,...</td>\n",
" <td>0.0444498</td>\n",
" <td>0.0222249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>0.684023</td>\n",
" <td>0.057163</td>\n",
" <td>[0.665370813397, 0.747807017544, 0.65247555976...</td>\n",
" <td>(0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,...</td>\n",
" <td>0.0444748</td>\n",
" <td>0.0222374</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>0.68181</td>\n",
" <td>0.0593787</td>\n",
" <td>[0.665968899522, 0.747493734336, 0.65405234941...</td>\n",
" <td>(0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, ...</td>\n",
" <td>0.0461987</td>\n",
" <td>0.0230993</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>0.678433</td>\n",
" <td>0.0560664</td>\n",
" <td>[0.669258373206, 0.745614035088, 0.64932198044...</td>\n",
" <td>(0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 1...</td>\n",
" <td>0.0436216</td>\n",
" <td>0.0218108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>0.680614</td>\n",
" <td>0.056615</td>\n",
" <td>[0.671949760766, 0.74373433584, 0.658152002523...</td>\n",
" <td>(0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14...</td>\n",
" <td>0.0440484</td>\n",
" <td>0.0220242</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>0.675489</td>\n",
" <td>0.0656649</td>\n",
" <td>[0.625897129187, 0.75469924812, 0.65752128666,...</td>\n",
" <td>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...</td>\n",
" <td>0.0510896</td>\n",
" <td>0.0255448</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_score ci_bound cv_scores \\\n",
"1 0.662581 0.068987 [0.691686602871, 0.743577694236, 0.61857458215... \n",
"2 0.694117 0.0623785 [0.706040669856, 0.768327067669, 0.64001892147... \n",
"3 0.701039 0.0758659 [0.698863636364, 0.791823308271, 0.65231788079... \n",
"4 0.707582 0.0730441 [0.713217703349, 0.804824561404, 0.66083254493... \n",
"5 0.711155 0.0800364 [0.708283492823, 0.814223057644, 0.63954588457... \n",
"6 0.712508 0.0834603 [0.705293062201, 0.817355889724, 0.63923052664... \n",
"7 0.715193 0.0813482 [0.67793062201, 0.825031328321, 0.655786818038... \n",
"8 0.71601 0.0707353 [0.691238038278, 0.812969924812, 0.66114790287... \n",
"9 0.7161 0.0682963 [0.689443779904, 0.807644110276, 0.66146326080... \n",
"10 0.7161 0.0682963 [0.689443779904, 0.807644110276, 0.66146326080... \n",
"11 0.716091 0.0677664 [0.690340909091, 0.806704260652, 0.66146326080... \n",
"12 0.715621 0.0715027 [0.683612440191, 0.812343358396, 0.66099022390... \n",
"13 0.714314 0.0710532 [0.682864832536, 0.80545112782, 0.65862503942,... \n",
"14 0.713406 0.0729 [0.68211722488, 0.80701754386, 0.659098076317,... \n",
"15 0.713279 0.0727949 [0.68211722488, 0.80701754386, 0.659098076317,... \n",
"16 0.711636 0.0710219 [0.683014354067, 0.802631578947, 0.65767896562... \n",
"17 0.710515 0.0770536 [0.689892344498, 0.806547619048, 0.66162093976... \n",
"18 0.707521 0.0758251 [0.69019138756, 0.802631578947, 0.660674865973... \n",
"19 0.704932 0.0744002 [0.688397129187, 0.795739348371, 0.66051718700... \n",
"20 0.702572 0.0710805 [0.694976076555, 0.785087719298, 0.65815200252... \n",
"21 0.700254 0.0736301 [0.6875, 0.787593984962, 0.655944497004, 0.740... \n",
"22 0.697627 0.0758364 [0.673444976077, 0.791353383459, 0.65373699148... \n",
"23 0.697175 0.0577431 [0.685107655502, 0.750939849624, 0.64963733837... \n",
"24 0.697112 0.057829 [0.685107655502, 0.750939849624, 0.64932198044... \n",
"25 0.695608 0.0601725 [0.683911483254, 0.75313283208, 0.645537685273... \n",
"26 0.692777 0.0605234 [0.683313397129, 0.752506265664, 0.65436770734... \n",
"27 0.690958 0.0590519 [0.682715311005, 0.750313283208, 0.65216020182... \n",
"28 0.690835 0.0589853 [0.682416267943, 0.750313283208, 0.65216020182... \n",
"29 0.687406 0.0600364 [0.66716507177, 0.750313283208, 0.653421633554... \n",
"30 0.687284 0.0599615 [0.666866028708, 0.75, 0.653421633554, 0.73478... \n",
"31 0.684083 0.057131 [0.665669856459, 0.747807017544, 0.65247555976... \n",
"32 0.684023 0.057163 [0.665370813397, 0.747807017544, 0.65247555976... \n",
"33 0.68181 0.0593787 [0.665968899522, 0.747493734336, 0.65405234941... \n",
"34 0.678433 0.0560664 [0.669258373206, 0.745614035088, 0.64932198044... \n",
"35 0.680614 0.056615 [0.671949760766, 0.74373433584, 0.658152002523... \n",
"36 0.675489 0.0656649 [0.625897129187, 0.75469924812, 0.65752128666,... \n",
"\n",
" feature_idx std_dev std_err \n",
"1 (1,) 0.0536742 0.0268371 \n",
"2 (1, 9) 0.0485326 0.0242663 \n",
"3 (1, 23, 9) 0.0590263 0.0295131 \n",
"4 (27, 1, 23, 9) 0.0568308 0.0284154 \n",
"5 (23, 1, 27, 5, 9) 0.0622711 0.0311355 \n",
"6 (1, 34, 5, 23, 9, 27) 0.064935 0.0324675 \n",
"7 (1, 34, 5, 23, 9, 27, 21) 0.0632917 0.0316458 \n",
"8 (1, 34, 4, 5, 23, 9, 27, 21) 0.0550345 0.0275172 \n",
"9 (1, 34, 4, 5, 9, 15, 21, 23, 27) 0.0531368 0.0265684 \n",
"10 (1, 34, 4, 5, 9, 15, 21, 22, 23, 27) 0.0531368 0.0265684 \n",
"11 (1, 34, 35, 4, 5, 9, 15, 21, 22, 23, 27) 0.0527245 0.0263623 \n",
"12 (1, 34, 35, 4, 5, 9, 11, 15, 21, 22, 23, 27) 0.0556315 0.0278158 \n",
"13 (1, 34, 35, 4, 5, 9, 11, 15, 21, 22, 23, 27, 30) 0.0552818 0.0276409 \n",
"14 (1, 34, 35, 4, 5, 9, 11, 15, 16, 21, 22, 23, 2... 0.0567186 0.0283593 \n",
"15 (1, 34, 35, 4, 5, 9, 11, 15, 16, 17, 21, 22, 2... 0.0566369 0.0283184 \n",
"16 (1, 34, 35, 4, 5, 9, 11, 15, 16, 17, 21, 22, 2... 0.0552574 0.0276287 \n",
"17 (1, 4, 5, 9, 11, 15, 16, 17, 21, 22, 23, 25, 2... 0.0599503 0.0299751 \n",
"18 (0, 1, 4, 5, 9, 11, 15, 16, 17, 21, 22, 23, 25... 0.0589945 0.0294972 \n",
"19 (0, 1, 4, 5, 9, 11, 15, 16, 17, 21, 22, 23, 25... 0.0578859 0.0289429 \n",
"20 (0, 1, 4, 5, 9, 10, 11, 15, 16, 17, 21, 22, 23... 0.0553031 0.0276515 \n",
"21 (0, 1, 4, 5, 9, 10, 11, 14, 15, 16, 17, 21, 22... 0.0572868 0.0286434 \n",
"22 (0, 1, 3, 4, 5, 9, 10, 11, 14, 15, 16, 17, 21,... 0.0590033 0.0295016 \n",
"23 (0, 1, 3, 4, 5, 9, 10, 11, 12, 14, 15, 16, 17,... 0.0449261 0.022463 \n",
"24 (0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,... 0.0449929 0.0224965 \n",
"25 (0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,... 0.0468162 0.0234081 \n",
"26 (0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,... 0.0470892 0.0235446 \n",
"27 (0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,... 0.0459444 0.0229722 \n",
"28 (0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,... 0.0458926 0.0229463 \n",
"29 (0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,... 0.0467104 0.0233552 \n",
"30 (0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,... 0.0466521 0.0233261 \n",
"31 (0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,... 0.0444498 0.0222249 \n",
"32 (0, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16,... 0.0444748 0.0222374 \n",
"33 (0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, ... 0.0461987 0.0230993 \n",
"34 (0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 1... 0.0436216 0.0218108 \n",
"35 (0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14... 0.0440484 0.0220242 \n",
"36 (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,... 0.0510896 0.0255448 "
]
},
"execution_count": 330,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sfs1_manual_all = pd.DataFrame.from_dict(sfs1_manual_all.get_metric_dict()).T\n",
"sfs1_manual_all"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"No SFS avg_score match with the score find during the Pipeline process (0.6387)"
]
},
{
"cell_type": "code",
"execution_count": 331,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from IPython.display import Image"
]
},
{
"cell_type": "code",
"execution_count": 335,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<img src=\"http://i.giphy.com/ePeHKwWSed0Ag.gif\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 335,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Image(url=\"http://i.giphy.com/ePeHKwWSed0Ag.gif\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"[Githubt issue](https://github.com/rasbt/mlxtend/issues/41)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Model No tuned + Features selection VS Model tuned + Features selection"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The goal is to compare the score of two Logistic regression (one classic & the other one tuned) with features selection"
]
},
{
"cell_type": "code",
"execution_count": 339,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"model_classic = LogisticRegression(class_weight='balanced', random_state=17)\n",
"model_tuned = LogisticRegression(class_weight='balanced', solver='newton-cg', C=100.0, random_state=17)"
]
},
{
"cell_type": "code",
"execution_count": 348,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best features selection for classic model['col_1', 'col_34', 'col_25', 'col_21', 'col_23', 'col_9', 'col_27', 'col_15']\n",
"roc_auc: 0.7106 (+/- 0.06) [Classic Model]\n"
]
}
],
"source": [
"# Classic model \n",
"sfs_classic = SFS(model_classic, \n",
" k_features=len(data.columns), \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" print_progress=True,\n",
" cv=5)\n",
"sfs_classic = sfs_classic.fit(data.values, y.values)\n",
"\n",
"result_sfs_classic = pd.DataFrame.from_dict(sfs_classic.get_metric_dict()).T\n",
"result_sfs_classic.sort_values('avg_score', ascending=0, inplace=True)\n",
"features_sfs_classic = result_sfs_classic.feature_idx.head(1).tolist()\n",
"select_features_sfs_classic = data.columns[features_sfs_classic]\n",
"print \"Best features selection for classic model\" + str(list(select_features_sfs_classic))\n",
"\n",
"scores = cross_val_score(model_classic, data[select_features_sfs_classic], y, cv=5, scoring='roc_auc')\n",
"print(\"roc_auc: %0.4f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), \"Classic Model\"))"
]
},
{
"cell_type": "code",
"execution_count": 349,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best features selection for tuned model['col_1', 'col_34', 'col_4', 'col_5', 'col_9', 'col_15', 'col_21', 'col_23', 'col_27']\n",
"roc_auc: 0.7161 (+/- 0.05) [Tuned Model]\n"
]
}
],
"source": [
"# Tuned model \n",
"sfs_tuned = SFS(model_tuned, \n",
" k_features=len(data.columns), \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" print_progress=True,\n",
" cv=5)\n",
"sfs_tuned = sfs_tuned.fit(data.values, y.values)\n",
"\n",
"result_sfs_tuned = pd.DataFrame.from_dict(sfs_tuned.get_metric_dict()).T\n",
"result_sfs_tuned.sort_values('avg_score', ascending=0, inplace=True)\n",
"features_sfs_tuned = result_sfs_tuned.feature_idx.head(1).tolist()\n",
"select_features_sfs_tuned = data.columns[features_sfs_tuned]\n",
"print \"Best features selection for tuned model\" + str(list(select_features_sfs_tuned))\n",
"\n",
"scores = cross_val_score(model_tuned, data[select_features_sfs_tuned], y, cv=5, scoring='roc_auc')\n",
"print(\"roc_auc: %0.4f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), \"Tuned Model\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The Features selection for these models **are not the same**."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Tuned model have a better score (0.7161) than Classic model (0.7106)"
]
},
{
"cell_type": "code",
"execution_count": 350,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"roc_auc: 0.7133 (+/- 0.06) [Tuned Model with classic SFS]\n"
]
}
],
"source": [
"# Tuned model with Features selection of Classic model :\n",
"scores = cross_val_score(model_tuned, data[select_features_sfs_classic], y, cv=5, scoring='roc_auc')\n",
"print(\"roc_auc: %0.4f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), \"Tuned Model with classic SFS\"))"
]
},
{
"cell_type": "code",
"execution_count": 351,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"roc_auc: 0.7080 (+/- 0.06) [Classic Model with tuned SFS]\n"
]
}
],
"source": [
"# Classic model with Features selection of Tuned model :\n",
"scores = cross_val_score(model_classic, data[select_features_sfs_tuned], y, cv=5, scoring='roc_auc')\n",
"print(\"roc_auc: %0.4f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), \"Classic Model with tuned SFS\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Both scores are lower than Model + Features selection"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Let's try to find the best score for a model (find the best tuned model + features selection)"
]
},
{
"cell_type": "code",
"execution_count": 354,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Looking for C : 0.01 and solver : newton-cg\n",
"Looking for C : 0.01 and solver : lbfgs"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 0.01 and solver : liblinear"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 0.01 and solver : sag"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/babou/anaconda/lib/python2.7/site-packages/sklearn/linear_model/sag.py:267: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
" \"the coef_ did not converge\", ConvergenceWarning)\n",
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 1 and solver : newton-cg"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 1 and solver : lbfgs"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 1 and solver : liblinear"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 1 and solver : sag"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 10 and solver : newton-cg"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 10 and solver : lbfgs"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 10 and solver : liblinear"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 10 and solver : sag"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 50 and solver : newton-cg"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 50 and solver : lbfgs"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 50 and solver : liblinear"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 50 and solver : sag"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 70 and solver : newton-cg"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 70 and solver : lbfgs"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 70 and solver : liblinear"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 70 and solver : sag"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 100 and solver : newton-cg"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 100 and solver : lbfgs"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 100 and solver : liblinear"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Looking for C : 100 and solver : sag"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 36/36"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"# Model's params\n",
"C_params = [0.01 , 1, 10, 50, 70, 100]\n",
"solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag']\n",
"\n",
"my_result_list = []\n",
"for C_param in C_params:\n",
" for solver in solvers:\n",
" print \"Looking for C : %s and solver : %s\" % (C_param, solver)\n",
" model = LogisticRegression(class_weight='balanced', random_state=17, \n",
" solver=solver, C=C_param)\n",
" sfs = SFS(model, \n",
" k_features=len(data.columns), \n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" print_progress=False,\n",
" cv=5)\n",
" sfs = sfs.fit(data.values, y.values)\n",
"\n",
" result_sfs = pd.DataFrame.from_dict(sfs.get_metric_dict()).T\n",
" result_sfs.sort_values('avg_score', ascending=0, inplace=True)\n",
" features_sfs = result_sfs.feature_idx.head(1).tolist()\n",
" select_features_sfs = list(data.columns[features_sfs])\n",
"\n",
" scores = cross_val_score(model, data[select_features_sfs], y, cv=5, scoring='roc_auc')\n",
" my_result_list.append({'C' : C_param,\n",
" 'solver' : solver,\n",
" 'auc' : scores.mean(),\n",
" 'std' : scores.std(),\n",
" 'best_columns' : select_features_sfs})\n",
" \n",
"my_result = pd.DataFrame(my_result_list)"
]
},
{
"cell_type": "code",
"execution_count": 408,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C</th>\n",
" <th>auc</th>\n",
" <th>best_columns</th>\n",
" <th>solver</th>\n",
" <th>std</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>100.0</td>\n",
" <td>0.716163</td>\n",
" <td>[col_1, col_34, col_4, col_5, col_9, col_15, c...</td>\n",
" <td>sag</td>\n",
" <td>0.053320</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>70.0</td>\n",
" <td>0.716100</td>\n",
" <td>[col_1, col_34, col_4, col_5, col_9, col_15, c...</td>\n",
" <td>lbfgs</td>\n",
" <td>0.053137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>70.0</td>\n",
" <td>0.716100</td>\n",
" <td>[col_1, col_34, col_4, col_5, col_9, col_15, c...</td>\n",
" <td>newton-cg</td>\n",
" <td>0.053137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>50.0</td>\n",
" <td>0.716100</td>\n",
" <td>[col_1, col_34, col_4, col_5, col_9, col_15, c...</td>\n",
" <td>liblinear</td>\n",
" <td>0.053137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>50.0</td>\n",
" <td>0.716100</td>\n",
" <td>[col_1, col_34, col_4, col_5, col_9, col_15, c...</td>\n",
" <td>newton-cg</td>\n",
" <td>0.053137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>50.0</td>\n",
" <td>0.716100</td>\n",
" <td>[col_1, col_34, col_4, col_5, col_9, col_15, c...</td>\n",
" <td>lbfgs</td>\n",
" <td>0.053137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>100.0</td>\n",
" <td>0.716100</td>\n",
" <td>[col_1, col_34, col_4, col_5, col_9, col_15, c...</td>\n",
" <td>liblinear</td>\n",
" <td>0.053137</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C auc best_columns \\\n",
"23 100.0 0.716163 [col_1, col_34, col_4, col_5, col_9, col_15, c... \n",
"17 70.0 0.716100 [col_1, col_34, col_4, col_5, col_9, col_15, c... \n",
"16 70.0 0.716100 [col_1, col_34, col_4, col_5, col_9, col_15, c... \n",
"14 50.0 0.716100 [col_1, col_34, col_4, col_5, col_9, col_15, c... \n",
"12 50.0 0.716100 [col_1, col_34, col_4, col_5, col_9, col_15, c... \n",
"13 50.0 0.716100 [col_1, col_34, col_4, col_5, col_9, col_15, c... \n",
"22 100.0 0.716100 [col_1, col_34, col_4, col_5, col_9, col_15, c... \n",
"\n",
" solver std \n",
"23 sag 0.053320 \n",
"17 lbfgs 0.053137 \n",
"16 newton-cg 0.053137 \n",
"14 liblinear 0.053137 \n",
"12 newton-cg 0.053137 \n",
"13 lbfgs 0.053137 \n",
"22 liblinear 0.053137 "
]
},
"execution_count": 408,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"my_result.sort_values('auc', ascending=0, inplace=True)\n",
"my_result.head(7)"
]
},
{
"cell_type": "code",
"execution_count": 361,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The parameters for a logisitic regress is solver = sag & C = 100 with [['col_1', 'col_34', 'col_4', 'col_5', 'col_9', 'col_15', 'col_21', 'col_23', 'col_27']]\n"
]
}
],
"source": [
"print \"The parameters for a logisitic regress is solver = sag & C = 100 with \" + str(my_result.best_columns.head(1).tolist())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using ColumnSelector as @rasbt said ([link](https://github.com/rasbt/mlxtend/issues/41#issuecomment-212673089))"
]
},
{
"cell_type": "code",
"execution_count": 362,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from mlxtend.feature_selection import ColumnSelector"
]
},
{
"cell_type": "code",
"execution_count": 414,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['col_1', 'col_34', 'col_25', 'col_21', 'col_23', 'col_9', 'col_27', 'col_15']\n"
]
}
],
"source": [
"model_classic = LogisticRegression(class_weight='balanced', random_state=17)\n",
"\n",
"sfs1 = SFS(model_classic, \n",
" k_features=len(data.columns),\n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" print_progress=False,\n",
" cv=5)\n",
"\n",
"sfs1.fit(data.values, y.values)\n",
"\n",
"\n",
"result_sfs = pd.DataFrame.from_dict(sfs1.get_metric_dict()).T\n",
"result_sfs.sort_values('avg_score', ascending=0, inplace=True)\n",
"features_sfs = result_sfs.feature_idx.head(1).tolist()\n",
"select_features_sfs = list(data.columns[features_sfs])\n",
"\n",
"col_sel = ColumnSelector(cols=select_features_sfs)\n",
"print col_sel.cols"
]
},
{
"cell_type": "code",
"execution_count": 415,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(1, 34, 25, 21, 23, 9, 27, 15)"
]
},
"execution_count": 415,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get features selection with the best score in SFS.get_metric_dict [We can improve it maybe]\n",
"sfs1.get_metric_dict()[sorted(sfs1.get_metric_dict().keys(), key=lambda x: (sfs1.get_metric_dict()[x]['avg_score']), reverse=True)[0]]['feature_idx']"
]
},
{
"cell_type": "code",
"execution_count": 416,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_score</th>\n",
" <th>ci_bound</th>\n",
" <th>cv_scores</th>\n",
" <th>feature_idx</th>\n",
" <th>std_dev</th>\n",
" <th>std_err</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.710604</td>\n",
" <td>0.0784646</td>\n",
" <td>[0.681519138756, 0.808270676692, 0.68905707978...</td>\n",
" <td>(1, 34, 25, 21, 23, 9, 27, 15)</td>\n",
" <td>0.0610481</td>\n",
" <td>0.0305241</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.710604</td>\n",
" <td>0.0784646</td>\n",
" <td>[0.681519138756, 0.808270676692, 0.68905707978...</td>\n",
" <td>(1, 34, 9, 15, 21, 22, 23, 25, 27)</td>\n",
" <td>0.0610481</td>\n",
" <td>0.0305241</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.710604</td>\n",
" <td>0.0784646</td>\n",
" <td>[0.681519138756, 0.808270676692, 0.68905707978...</td>\n",
" <td>(1, 34, 35, 9, 15, 21, 22, 23, 25, 27)</td>\n",
" <td>0.0610481</td>\n",
" <td>0.0305241</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_score ci_bound cv_scores \\\n",
"8 0.710604 0.0784646 [0.681519138756, 0.808270676692, 0.68905707978... \n",
"9 0.710604 0.0784646 [0.681519138756, 0.808270676692, 0.68905707978... \n",
"10 0.710604 0.0784646 [0.681519138756, 0.808270676692, 0.68905707978... \n",
"\n",
" feature_idx std_dev std_err \n",
"8 (1, 34, 25, 21, 23, 9, 27, 15) 0.0610481 0.0305241 \n",
"9 (1, 34, 9, 15, 21, 22, 23, 25, 27) 0.0610481 0.0305241 \n",
"10 (1, 34, 35, 9, 15, 21, 22, 23, 25, 27) 0.0610481 0.0305241 "
]
},
"execution_count": 416,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_sfs.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Let's do a gridseachCV with ColumnSelector"
]
},
{
"cell_type": "code",
"execution_count": 419,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'SequentialFeatureSelector' object has no attribute 'subsets_'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-419-c1e0d06c465d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m cv=5)\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mcol_sel1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mColumnSelector\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msfs1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_metric_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msfs1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_metric_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msfs1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_metric_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'avg_score'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreverse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'feature_idx'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m pipe = Pipeline([('sfs1', sfs1),\n",
"\u001b[0;32m/Users/babou/anaconda/lib/python2.7/site-packages/mlxtend/feature_selection/sequential_feature_selector.pyc\u001b[0m in \u001b[0;36mget_metric_dict\u001b[0;34m(self, confidence_interval)\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_metric_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfidence_interval\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.95\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0mfdict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdeepcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msubsets_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfdict\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0mstd_dev\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msubsets_\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cv_scores'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'SequentialFeatureSelector' object has no attribute 'subsets_'"
]
}
],
"source": [
"clf1 = LogisticRegression(class_weight='balanced', random_state=17)\n",
"\n",
"sfs1 = SFS(model_classic, \n",
" k_features=len(data.columns),\n",
" forward=True, \n",
" floating=False, \n",
" scoring='roc_auc',\n",
" print_progress=False,\n",
" cv=5)\n",
"\n",
"col_sel1 = ColumnSelector(cols=sfs1.get_metric_dict()[sorted(sfs1.get_metric_dict().keys(), key=lambda x: (sfs1.get_metric_dict()[x]['avg_score']), reverse=True)[0]]['feature_idx'])\n",
"\n",
"pipe = Pipeline([('col_sel1', col_sel1),\n",
" ('model_classic', model_classic)])\n",
"\n",
"\n",
"params = {\"model_classis__C\" : [0.01 , 1, 10, 50, 70, 100],\n",
" \"model_classis__solver\" : ['newton-cg', 'lbfgs', 'liblinear', 'sag']}\n",
"\n",
"grid = GridSearchCV(pipe, params, cv=5, scoring='roc_auc')\n",
"grid.fit(data.values, y.values)"
]
},
{
"cell_type": "code",
"execution_count": 426,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<img src=\"http://i.giphy.com/Y39CB1ynF9sxG.gif\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 426,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Image(url=\"http://i.giphy.com/Y39CB1ynF9sxG.gif\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment