Created
November 6, 2021 11:08
-
-
Save BadreeshShetty/8a30f5ff4e5890e52d35a044d53e1882 to your computer and use it in GitHub Desktop.
NLP ML (Built-In)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### For TF-IDFVectorizer " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:122: FutureWarning: You are accessing a training score ('split0_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n", | |
" warnings.warn(*warn_args, **warn_kwargs)\n", | |
"C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:122: FutureWarning: You are accessing a training score ('split1_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n", | |
" warnings.warn(*warn_args, **warn_kwargs)\n", | |
"C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:122: FutureWarning: You are accessing a training score ('split2_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n", | |
" warnings.warn(*warn_args, **warn_kwargs)\n", | |
"C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:122: FutureWarning: You are accessing a training score ('split3_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n", | |
" warnings.warn(*warn_args, **warn_kwargs)\n", | |
"C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:122: FutureWarning: You are accessing a training score ('split4_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n", | |
" warnings.warn(*warn_args, **warn_kwargs)\n", | |
"C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:122: FutureWarning: You are accessing a training score ('mean_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n", | |
" warnings.warn(*warn_args, **warn_kwargs)\n", | |
"C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:122: FutureWarning: You are accessing a training score ('std_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n", | |
" warnings.warn(*warn_args, **warn_kwargs)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>mean_fit_time</th>\n", | |
" <th>std_fit_time</th>\n", | |
" <th>mean_score_time</th>\n", | |
" <th>std_score_time</th>\n", | |
" <th>param_max_depth</th>\n", | |
" <th>param_n_estimators</th>\n", | |
" <th>params</th>\n", | |
" <th>split0_test_score</th>\n", | |
" <th>split1_test_score</th>\n", | |
" <th>split2_test_score</th>\n", | |
" <th>...</th>\n", | |
" <th>mean_test_score</th>\n", | |
" <th>std_test_score</th>\n", | |
" <th>rank_test_score</th>\n", | |
" <th>split0_train_score</th>\n", | |
" <th>split1_train_score</th>\n", | |
" <th>split2_train_score</th>\n", | |
" <th>split3_train_score</th>\n", | |
" <th>split4_train_score</th>\n", | |
" <th>mean_train_score</th>\n", | |
" <th>std_train_score</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>55.497712</td>\n", | |
" <td>13.798953</td>\n", | |
" <td>0.863907</td>\n", | |
" <td>0.499025</td>\n", | |
" <td>90</td>\n", | |
" <td>150</td>\n", | |
" <td>{'max_depth': 90, 'n_estimators': 150}</td>\n", | |
" <td>0.978475</td>\n", | |
" <td>0.977538</td>\n", | |
" <td>0.975741</td>\n", | |
" <td>...</td>\n", | |
" <td>0.975031</td>\n", | |
" <td>0.002984</td>\n", | |
" <td>1</td>\n", | |
" <td>0.998877</td>\n", | |
" <td>0.998877</td>\n", | |
" <td>0.999326</td>\n", | |
" <td>0.999102</td>\n", | |
" <td>0.998877</td>\n", | |
" <td>0.999012</td>\n", | |
" <td>0.000180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>38.151178</td>\n", | |
" <td>2.753181</td>\n", | |
" <td>0.527299</td>\n", | |
" <td>0.081872</td>\n", | |
" <td>None</td>\n", | |
" <td>150</td>\n", | |
" <td>{'max_depth': None, 'n_estimators': 150}</td>\n", | |
" <td>0.978475</td>\n", | |
" <td>0.977538</td>\n", | |
" <td>0.973944</td>\n", | |
" <td>...</td>\n", | |
" <td>0.973594</td>\n", | |
" <td>0.004131</td>\n", | |
" <td>2</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>52.709507</td>\n", | |
" <td>11.039492</td>\n", | |
" <td>0.590059</td>\n", | |
" <td>0.170319</td>\n", | |
" <td>None</td>\n", | |
" <td>300</td>\n", | |
" <td>{'max_depth': None, 'n_estimators': 300}</td>\n", | |
" <td>0.978475</td>\n", | |
" <td>0.974843</td>\n", | |
" <td>0.973046</td>\n", | |
" <td>...</td>\n", | |
" <td>0.973594</td>\n", | |
" <td>0.003496</td>\n", | |
" <td>2</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>79.593953</td>\n", | |
" <td>4.012067</td>\n", | |
" <td>0.853713</td>\n", | |
" <td>0.067788</td>\n", | |
" <td>90</td>\n", | |
" <td>300</td>\n", | |
" <td>{'max_depth': 90, 'n_estimators': 300}</td>\n", | |
" <td>0.976682</td>\n", | |
" <td>0.975741</td>\n", | |
" <td>0.973046</td>\n", | |
" <td>...</td>\n", | |
" <td>0.973235</td>\n", | |
" <td>0.002874</td>\n", | |
" <td>4</td>\n", | |
" <td>0.999326</td>\n", | |
" <td>0.999102</td>\n", | |
" <td>0.999102</td>\n", | |
" <td>0.999102</td>\n", | |
" <td>0.999102</td>\n", | |
" <td>0.999147</td>\n", | |
" <td>0.000090</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>70.385356</td>\n", | |
" <td>11.837262</td>\n", | |
" <td>4.052717</td>\n", | |
" <td>1.883449</td>\n", | |
" <td>60</td>\n", | |
" <td>300</td>\n", | |
" <td>{'max_depth': 60, 'n_estimators': 300}</td>\n", | |
" <td>0.977578</td>\n", | |
" <td>0.973944</td>\n", | |
" <td>0.971249</td>\n", | |
" <td>...</td>\n", | |
" <td>0.972337</td>\n", | |
" <td>0.003148</td>\n", | |
" <td>5</td>\n", | |
" <td>0.993711</td>\n", | |
" <td>0.992591</td>\n", | |
" <td>0.994387</td>\n", | |
" <td>0.993938</td>\n", | |
" <td>0.993264</td>\n", | |
" <td>0.993578</td>\n", | |
" <td>0.000612</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 22 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" mean_fit_time std_fit_time mean_score_time std_score_time \\\n", | |
"7 55.497712 13.798953 0.863907 0.499025 \n", | |
"10 38.151178 2.753181 0.527299 0.081872 \n", | |
"11 52.709507 11.039492 0.590059 0.170319 \n", | |
"8 79.593953 4.012067 0.853713 0.067788 \n", | |
"5 70.385356 11.837262 4.052717 1.883449 \n", | |
"\n", | |
" param_max_depth param_n_estimators \\\n", | |
"7 90 150 \n", | |
"10 None 150 \n", | |
"11 None 300 \n", | |
"8 90 300 \n", | |
"5 60 300 \n", | |
"\n", | |
" params split0_test_score \\\n", | |
"7 {'max_depth': 90, 'n_estimators': 150} 0.978475 \n", | |
"10 {'max_depth': None, 'n_estimators': 150} 0.978475 \n", | |
"11 {'max_depth': None, 'n_estimators': 300} 0.978475 \n", | |
"8 {'max_depth': 90, 'n_estimators': 300} 0.976682 \n", | |
"5 {'max_depth': 60, 'n_estimators': 300} 0.977578 \n", | |
"\n", | |
" split1_test_score split2_test_score ... mean_test_score \\\n", | |
"7 0.977538 0.975741 ... 0.975031 \n", | |
"10 0.977538 0.973944 ... 0.973594 \n", | |
"11 0.974843 0.973046 ... 0.973594 \n", | |
"8 0.975741 0.973046 ... 0.973235 \n", | |
"5 0.973944 0.971249 ... 0.972337 \n", | |
"\n", | |
" std_test_score rank_test_score split0_train_score split1_train_score \\\n", | |
"7 0.002984 1 0.998877 0.998877 \n", | |
"10 0.004131 2 1.000000 1.000000 \n", | |
"11 0.003496 2 1.000000 1.000000 \n", | |
"8 0.002874 4 0.999326 0.999102 \n", | |
"5 0.003148 5 0.993711 0.992591 \n", | |
"\n", | |
" split2_train_score split3_train_score split4_train_score \\\n", | |
"7 0.999326 0.999102 0.998877 \n", | |
"10 1.000000 1.000000 1.000000 \n", | |
"11 1.000000 1.000000 1.000000 \n", | |
"8 0.999102 0.999102 0.999102 \n", | |
"5 0.994387 0.993938 0.993264 \n", | |
"\n", | |
" mean_train_score std_train_score \n", | |
"7 0.999012 0.000180 \n", | |
"10 1.000000 0.000000 \n", | |
"11 1.000000 0.000000 \n", | |
"8 0.999147 0.000090 \n", | |
"5 0.993578 0.000612 \n", | |
"\n", | |
"[5 rows x 22 columns]" | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"rf = RandomForestClassifier()\n", | |
"param = {'n_estimators': [10, 150, 300],\n", | |
" 'max_depth': [30, 60, 90, None]}\n", | |
"\n", | |
"gs = GridSearchCV(rf, param, cv=5, n_jobs=-1)# n_jobs=-1 for parallelizing search\n", | |
"gs_fit = gs.fit(X_tfidf_feat, data['label'])\n", | |
"pd.DataFrame(gs_fit.cv_results_).sort_values('mean_test_score', ascending=False).head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.2" | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": false, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
}, | |
"varInspector": { | |
"cols": { | |
"lenName": 16, | |
"lenType": 16, | |
"lenVar": 40 | |
}, | |
"kernels_config": { | |
"python": { | |
"delete_cmd_postfix": "", | |
"delete_cmd_prefix": "del ", | |
"library": "var_list.py", | |
"varRefreshCmd": "print(var_dic_list())" | |
}, | |
"r": { | |
"delete_cmd_postfix": ") ", | |
"delete_cmd_prefix": "rm(", | |
"library": "var_list.r", | |
"varRefreshCmd": "cat(var_dic_list()) " | |
} | |
}, | |
"types_to_exclude": [ | |
"module", | |
"function", | |
"builtin_function_or_method", | |
"instance", | |
"_Feature" | |
], | |
"window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment