Skip to content

Instantly share code, notes, and snippets.

@lowener
Last active July 19, 2022 12:19
Show Gist options
  • Save lowener/af17f0aa8455eec34213580a4334b3a3 to your computer and use it in GitHub Desktop.
Save lowener/af17f0aa8455eec34213580a4334b3a3 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "5ff606c1-a2c1-461f-9ef1-e912d5735c79",
"metadata": {},
"source": [
"# CountVectorizer + Complement\n",
"Complement naive Bayes models should be coupled with a CountVectorizer to have the best results."
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "da779d1d-d21f-4a45-909b-4a84f4b8d1c9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3 0.360943\n",
"1 0.274531\n",
"2 0.256485\n",
"4 0.108042\n",
"Name: CATEGORY, dtype: float64"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEFCAYAAAD9mKAdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAd8UlEQVR4nO3df7RddX3m8fdjgjFAgQQChSSYKBGE1FETA9ZfaJSkgwN0FtTLWIk2NgPFls50qqCrKxbNDMx0ypQ1QpuRSEAHiKmWjJbBFAYcLSYGRDBghquJ5JqQBBMxoAQDz/yxv1f3PZy778/cS5LntdZZZ5/P3t/v+e59b85z968T2SYiIqI3LxvtAURExEtbgiIiIholKCIiolGCIiIiGiUoIiKiUYIiIiIaJSgOEJI2SXp3L/PeJmnDSI/ppUSVz0naJWltP9vcKOnT+3hcZ0rq2pfv0cf7f1LS58v0iZKeljRmmPr+W0l/UaaHdT3zOz2yEhQHAdv/1/bJfS1X/9A4AL0VeA8wxfac1pmSPijpGyM/rJcO24/bPtz2803L9Xdb2b7Y9qeGY2ySLOmkWt/9+p2O4ZGgiBEhaewoD+GVwCbbz4zyOA4Kw7VXEi8NCYoDy+slPSTpKUm3SXoFvHi3X9LHJP1Y0m5JGyTNlTQf+DjwvnL44btl2RMkrZK0U1KnpD+s9TNe0vJyOOdRSR9teZ9N5b0eAp6RNFbS5ZJ+UN77EUm/W1v+g5K+KekaST+V9ENJv13qmyVtl7Sgt5XvbaySFgKfBd5c1u0vW9q9Fvjb2vyf1mZPkPTVMt41kl5da3eKpNXl/TZI+r2GsU0sh762lO31D70s17R9TpJ0b/n5PinptlJX2Wbby7yHJM3spf/ppY/dklYDx9TmTSt/uY8trz9Yfga7JW2U9P7etpWqw3TXS/pHSc8A71SbQ3eSPl7GvknS+2v1eyR9uPb6V3stkr5eyt8t7/m+Nr/Try19/FTSeknn1ObdKOkz7X6OA9l2BzXbeRwAD2ATsBY4AZgIPApcXOadCXSV6ZOBzcAJ5fU04NVl+pPA51v6vRe4DngF8HpgBzC3zLuqzJ8ATAEe6n6f2pgeBKYC40vtgjLGlwHvA54Bji/zPgjsBT4EjAE+DTwOfAYYB5wF7AYO72UbNI31g8A3Grbfi+YDNwI7gTnAWOALwK1l3mFlO36ozHsj8CRwWi/9fxW4rWyrQ4B3tP5s+rF9bgE+Uea9Anhrqc8D7geOAgS8trtNm3HcB/x12Z5vL9vz87XfBZf1OQz4GXBymXd897o1bKungLfUxncj8Onaeu6tvfc7yrp1938P8OHefh5lXCfVXv9qu5Xt2Un1h87LgXeV9TrZff8c+73tDuZH9igOLNfa3mJ7J/C/qD4sWz1P9Q/1VEmH2N5k+wftOpM0lerY/sdsP2v7Qaq/zD9QFvk94D/a3mW7C7i2lzFttv0LANtfLGN8wfZtwGNU/4C7bbT9OVfHyW+jCpkrbe+x/TXgOeCk1jfpx1gH60u219reS/UB8/pSfy/VoazP2d5r+wHg74Hz24zteOB3qIJ7l+1f2r633Zv1sX1+SXUI7YSyjt+o1X8DOAWQ7Udtb20zjhOBNwF/Ubbn16l+T3rzAjBT0njbW22vb1gW4Hbb3yxjf7aXZbrf+16q8Ox1L2wAzgAOB66y/Zztu4GvABfWlunt59ivbXewS1AcWJ6oTf+c6h9PD7Y7gT+l2nvYLulWSSf00t8JwE7bu2u1HwGTa/M31+bVp9vWJF0k6cFyiOCnwExqhz+AbbXp7nBprb1ovfox1sHqbZu+Eji9ez3Kurwf+M02fUwtY9vV15v1sX0+SvVX79pyeOUPAMoH43+n2vPaJmmppCPadH8CsMs9z9P8qN04yjLvAy4GtpbDNqf0Mfx2P/+6du/d2+/eQJwAbLb9Qkvf9Z9925/jALbdQS1BcRCy/T9tv5Xqw87A1d2zWhbdAkyU9Bu12onAj8v0VqpDTt2mtnu77glJrwT+B/AR4GjbRwHfo/rwG6q+xtqXgX6N8mbgXttH1R6H276kl2UnSjqqqcO+to/tJ2z/oe0TgH8LXKdyJZDta23PAk4DXgP8eZu32Ep1zuWwWu3E3sZj+07b76E67PT9MjbofVv1tQ3bvfeWMv0McGhtXrvA7c0WYKqk+udZv3/2/dx2B7UExUFG0smS3iVpHPAs1V/o3ZdDbgOmdf+Ds70Z+GfgP0l6haTXAQupdt0BVgBXSJogaTLVB1yTw6g+THaUsXyI6i/mIevHWPuyDZgi6eX9XP4rwGskfUDSIeXxpnKyt3VsW4E7qD7YJ5Rl396mz8btI+kCSd3BvKss+3x539MlHUL1gfssv/6Z1sfxI2Ad8JeSXi7prcC/ardyko6TdE75YN8DPE3P35OBbKu67vd+G9Xhuy+W+oPAv5Z0aAm/hS3ttgGv6qXPNVTr/dGybc8s63VrX4Pp77Y72CUoDj7jqE5CP0m1O34s1UlA+PU/2p9IeqBMX0h1knML8GVgse3VZd6VQBewEfgnYCXVh0pbth8B/ivVCdVtwG8B3xyOlerHWPtyN7AeeELSk30tXA5xnQV0lPd7gmrPbFwvTT5AdTz8+8B2qsN/rX32tX3eBKyR9DSwCrjM9kbgCKq/9ndRHXL5CfBXvYzj3wCnU53cXQzc1MtyLwP+rKzbTqqTz39U5g1oW9U8Uca4hSrAL7b9/TLvGqrzT9uA5bw44D8JLC+H5Hqc17D9HHAO1XmgJ6kuaLio1neTgWy7g5bs/MdFMTwkXQJ02H7HaI8lIoZP9ihi0CQdL+ktkl4m6WSqv0C/PNrjiojh1WdQSFpWbkb5Xkv9j1XdZLRe0n+u1a9QdbPTBknzavVZkh4u866VpFIfp+rmsM5yI8y0WpsFkh4rj15vtIpR83Lg76iuWb8buJ1qtz8iDiB9HnoqJ92eBm6yPbPU3kl148/ZtvdIOtb2dkmnUt0UNIfqkrV/Al5j+3lVX8R2GfAt4B+prq+/Q9IfAa+zfbGkDuB3bb9P0kSqE2+zqU7a3Q/M6s8lhhERMXz63KMoN+XsbClfQnVzy56yzPZSP5fqjsc95SRbJzCn3HB0hO37XCXTTcB5tTbLy/RKYG7Z25gHrLbdff35amD+INczIiIGabDnKF4DvK0cKrpX0ptKfTI9b7rpKrXJZbq13qNNuWvyKeDohr4iImIEDfYbPcdSfWfNGVSX7K2Q9Cra3zjlhjqDbNODpEXAIoDDDjts1imn9HUDaURE1N1///1P2p7Ubt5gg6KL6rtTTPV1Ai9Qfc1AFz3vzp1Cdc10Fz3v4O2uU2vTpepbK4+kOtTVRfXFX/U297QbjO2lwFKA2bNne926dYNcrYiIg5Oktl/nAoM/9PQPVN/QiKTXUF398iTVTUAd5Uqm6cAMYG25M3W3pDPK+YeLqK6QobTpvqLpfODuEkB3AmeVO1knUN3cdOcgxxsREYPU5x6FpFuo/rI/RtX3vy8GlgHLyiWzzwELyof7ekkrgEeovlL4Uv/6f8u6hOrrfsdTfZ3BHaV+A3CzpE6qPYkOANs7JX0K+HZZ7kpX34oaEREj6IC7MzuHniIiBk7S/bZnt5uXO7MjIqJRgiIiIholKCIiolGCIiIiGiUoIiKi0WBvuDuoTLv8q6M9hH7ZdNXZoz2EiDgAZY8iIiIaJSgiIqJRgiIiIholKCIiolGCIiIiGiUoIiKiUYIiIiIaJSgiIqJRgiIiIholKCIiolGCIiIiGiUoIiKiUZ9BIWmZpO3l/8dunfcfJFnSMbXaFZI6JW2QNK9WnyXp4TLvWkkq9XGSbiv1NZKm1doskPRYeSwY8tpGRMSA9WeP4kZgfmtR0lTgPcDjtdqpQAdwWmlznaQxZfb1wCJgRnl097kQ2GX7JOAa4OrS10RgMXA6MAdYLGnCwFYvIiKGqs+gsP11YGebWdcAHwVcq50L3Gp7j+2NQCcwR9LxwBG277Nt4CbgvFqb5WV6JTC37G3MA1bb3ml7F7CaNoEVERH71qDOUUg6B/ix7e+2zJoMbK697iq1yWW6td6jje29wFPA0Q19RUTECBrwf1wk6VDgE8BZ7Wa3qbmhPtg2rWNaRHVYixNPPLHdIhERMUiD2aN4NTAd+K6kTcAU4AFJv0n1V//U2rJTgC2lPqVNnXobSWOBI6kOdfXW14vYXmp7tu3ZkyZNGsQqRUREbwYcFLYftn2s7Wm2p1F9oL/R9hPAKqCjXMk0neqk9VrbW4Hdks4o5x8uAm4vXa4Cuq9oOh+4u5zHuBM4S9KEchL7rFKLiIgR1OehJ0m3AGcCx0jqAhbbvqHdsrbXS1oBPALsBS61/XyZfQnVFVTjgTvKA+AG4GZJnVR7Eh2lr52SPgV8uyx3pe12J9UjImIf6jMobF/Yx/xpLa+XAEvaLLcOmNmm/ixwQS99LwOW9TXGiIjYd3JndkRENEpQREREowRFREQ0SlBERESjBEVERDRKUERERKMERURENEpQREREowRFREQ0SlBERESjBEVERDRKUERERKMERURENEpQREREowRFREQ0SlBERESjBEVERDRKUERERKMERURENOozKCQtk7Rd0vdqtf8i6fuSHpL0ZUlH1eZdIalT0gZJ82r1WZIeLvOulaRSHyfptlJfI2larc0CSY+Vx4LhWumIiOi//uxR3AjMb6mtBmbafh3w/4ArACSdCnQAp5U210kaU9pcDywCZpRHd58LgV22TwKuAa4ufU0EFgOnA3OAxZImDHwVIyJiKPoMCttfB3a21L5me295+S1gSpk+F7jV9h7bG4FOYI6k44EjbN9n28BNwHm1NsvL9EpgbtnbmAestr3T9i6qcGoNrIiI2MeG4xzFHwB3lOnJwObavK5Sm1ymW+s92pTweQo4uqGvF5G0SNI6Set27NgxpJWJiIiehhQUkj4B7AW+0F1qs5gb6oNt07NoL7U92/bsSZMmNQ86IiIGZNBBUU4uvxd4fzmcBNVf/VNri00BtpT6lDb1Hm0kjQWOpDrU1VtfERExggYVFJLmAx8DzrH989qsVUBHuZJpOtVJ67W2twK7JZ1Rzj9cBNxea9N9RdP5wN0leO4EzpI0oZzEPqvUIiJiBI3tawFJtwBnAsdI6qK6EukKYBywulzl+i3bF9teL2kF8AjVIalLbT9furqE6gqq8VTnNLrPa9wA3Cypk2pPogPA9k5JnwK+XZa70naPk+oREbHv9RkUti9sU76hYfklwJI29XXAzDb1Z4ELeulrGbCsrzFGRMS+02dQRAy3aZd/dbSH0C+brjp7tIcQ8ZKQr/CIiIhGCYqIiGiUoIiIiEYJioiIaJSgiIiIRgmKiIholKCIiIhGCYqIiGiUoIiIiEa5MztiP5c73WNfyx5FREQ0SlBERESjBEVERDRKUERERKMERURENEpQREREowRFREQ06jMoJC2TtF3S92q1iZJWS3qsPE+ozbtCUqekDZLm1eqzJD1c5l2r8p9tSxon6bZSXyNpWq3NgvIej0laMGxrHRER/dafPYobgfkttcuBu2zPAO4qr5F0KtABnFbaXCdpTGlzPbAImFEe3X0uBHbZPgm4Bri69DURWAycDswBFtcDKSIiRkafQWH768DOlvK5wPIyvRw4r1a/1fYe2xuBTmCOpOOBI2zfZ9vATS1tuvtaCcwtexvzgNW2d9reBazmxYEVERH72GDPURxneytAeT621CcDm2vLdZXa5DLdWu/RxvZe4Cng6Ia+IiJiBA33yWy1qbmhPtg2Pd9UWiRpnaR1O3bs6NdAIyKifwYbFNvK4STK8/ZS7wKm1pabAmwp9Slt6j3aSBoLHEl1qKu3vl7E9lLbs23PnjRp0iBXKSIi2hlsUKwCuq9CWgDcXqt3lCuZplOdtF5bDk/tlnRGOf9wUUub7r7OB+4u5zHuBM6SNKGcxD6r1CIiYgT1+TXjkm4BzgSOkdRFdSXSVcAKSQuBx4ELAGyvl7QCeATYC1xq+/nS1SVUV1CNB+4oD4AbgJsldVLtSXSUvnZK+hTw7bLclbZbT6pHRMQ+1mdQ2L6wl1lze1l+CbCkTX0dMLNN/VlK0LSZtwxY1tcYIyJi38md2RER0ShBERERjRIUERHRKEERERGNEhQREdEoQREREY0SFBER0ShBERERjRIUERHRKEERERGNEhQREdEoQREREY0SFBER0ShBERERjRIUERHRKEERERGNEhQREdEoQREREY2GFBSS/p2k9ZK+J+kWSa+QNFHSakmPlecJteWvkNQpaYOkebX6LEkPl3nXSlKpj5N0W6mvkTRtKOONiIiBG3RQSJoM/Akw2/ZMYAzQAVwO3GV7BnBXeY2kU8v804D5wHWSxpTurgcWATPKY36pLwR22T4JuAa4erDjjYiIwRnqoaexwHhJY4FDgS3AucDyMn85cF6ZPhe41fYe2xuBTmCOpOOBI2zfZ9vATS1tuvtaCczt3tuIiIiRMeigsP1j4K+Ax4GtwFO2vwYcZ3trWWYrcGxpMhnYXOuiq9Qml+nWeo82tvcCTwFHD3bMERExcEM59DSB6i/+6cAJwGGSfr+pSZuaG+pNbVrHskjSOknrduzY0TzwiIgYkKEceno3sNH2Dtu/BL4E/DawrRxOojxvL8t3AVNr7adQHarqKtOt9R5tyuGtI4GdrQOxvdT2bNuzJ02aNIRVioiIVkMJiseBMyQdWs4bzAUeBVYBC8oyC4Dby/QqoKNcyTSd6qT12nJ4arekM0o/F7W06e7rfODuch4jIiJGyNjBNrS9RtJK4AFgL/AdYClwOLBC0kKqMLmgLL9e0grgkbL8pbafL91dAtwIjAfuKA+AG4CbJXVS7Ul0DHa8ERExOIMOCgDbi4HFLeU9VHsX7ZZfAixpU18HzGxTf5YSNBERMTpyZ3ZERDRKUERERKMERURENEpQREREowRFREQ0SlBERESjBEVERDRKUERERKMERURENEpQREREowRFREQ0SlBERESjBEVERDRKUERERKMERURENEpQREREowRFREQ0SlBERESjIQWFpKMkrZT0fUmPSnqzpImSVkt6rDxPqC1/haROSRskzavVZ0l6uMy7VpJKfZyk20p9jaRpQxlvREQM3FD3KP4G+N+2TwH+BfAocDlwl+0ZwF3lNZJOBTqA04D5wHWSxpR+rgcWATPKY36pLwR22T4JuAa4eojjjYiIARp0UEg6Ang7cAOA7eds/xQ4F1heFlsOnFemzwVutb3H9kagE5gj6XjgCNv32TZwU0ub7r5WAnO79zYiImJkDGWP4lXADuBzkr4j6bOSDgOOs70VoDwfW5afDGyute8qtcllurXeo43tvcBTwNFDGHNERAzQUIJiLPBG4HrbbwCeoRxm6kW7PQE31Jva9OxYWiRpnaR1O3bsaB51REQMyFCCogvosr2mvF5JFRzbyuEkyvP22vJTa+2nAFtKfUqbeo82ksYCRwI7Wwdie6nt2bZnT5o0aQirFBERrQYdFLafADZLOrmU5gKPAKuABaW2ALi9TK8COsqVTNOpTlqvLYendks6o5x/uKilTXdf5wN3l/MYERExQsYOsf0fA1+Q9HLgh8CHqMJnhaSFwOPABQC210taQRUme4FLbT9f+rkEuBEYD9xRHlCdKL9ZUifVnkTHEMcbEREDNKSgsP0gMLvNrLm9LL8EWNKmvg6Y2ab+LCVoIiJidOTO7IiIaJSgiIiIRgmKiIholKCIiIhGCYqIiGiUoIiIiEYJioiIaJSgiIiIRgmKiIholKCIiIhGCYqIiGiUoIiIiEYJioiIaJSgiIiIRkP9/ygiIg4Y0y7/6mgPoV82XXX2iL5f9igiIqJRgiIiIholKCIiotGQg0LSGEnfkfSV8nqipNWSHivPE2rLXiGpU9IGSfNq9VmSHi7zrpWkUh8n6bZSXyNp2lDHGxERAzMcexSXAY/WXl8O3GV7BnBXeY2kU4EO4DRgPnCdpDGlzfXAImBGecwv9YXALtsnAdcAVw/DeCMiYgCGFBSSpgBnA5+tlc8Flpfp5cB5tfqttvfY3gh0AnMkHQ8cYfs+2wZuamnT3ddKYG733kZERIyMoe5R/Dfgo8ALtdpxtrcClOdjS30ysLm2XFepTS7TrfUebWzvBZ4Cjh7imCMiYgAGHRSS3gtst31/f5u0qbmh3tSmdSyLJK2TtG7Hjh39HE5ERPTHUPYo3gKcI2kTcCvwLkmfB7aVw0mU5+1l+S5gaq39FGBLqU9pU+/RRtJY4EhgZ+tAbC+1Pdv27EmTJg1hlSIiotWgg8L2Fban2J5GdZL6btu/D6wCFpTFFgC3l+lVQEe5kmk61UnrteXw1G5JZ5TzDxe1tOnu6/zyHi/ao4iIiH1nX3yFx1XACkkLgceBCwBsr5e0AngE2Atcavv50uYS4EZgPHBHeQDcANwsqZNqT6JjH4w3IiIaDEtQ2L4HuKdM/wSY28tyS4AlberrgJlt6s9SgiYiIkZH7syOiIhGCYqIiGiUoIiIiEYJioiIaJSgiIiIRgmKiIholKCIiIhGCYqIiGiUoIiIiEYJioiIaJSgiIiIRgmKiIholKCIiIhGCYqIiGiUoIiIiEYJioiIaJSgiIiIRgmKiIhoNOigkDRV0v+R9Kik9ZIuK/WJklZLeqw8T6i1uUJSp6QNkubV6rMkPVzmXStJpT5O0m2lvkbStCGsa0REDMJQ9ij2An9m+7XAGcClkk4FLgfusj0DuKu8pszrAE4D5gPXSRpT+roeWATMKI/5pb4Q2GX7JOAa4OohjDciIgZh0EFhe6vtB8r0buBRYDJwLrC8LLYcOK9MnwvcanuP7Y1AJzBH0vHAEbbvs23gppY23X2tBOZ2721ERMTIGJZzFOWQ0BuANcBxtrdCFSbAsWWxycDmWrOuUptcplvrPdrY3gs8BRw9HGOOiIj+GXJQSDoc+HvgT23/rGnRNjU31JvatI5hkaR1ktbt2LGjryFHRMQADCkoJB1CFRJfsP2lUt5WDidRnreXehcwtdZ8CrCl1Ke0qfdoI2kscCSws3Uctpfanm179qRJk4ayShER0WIoVz0JuAF41PZf12atAhaU6QXA7bV6R7mSaTrVSeu15fDUbklnlD4vamnT3df5wN3lPEZERIyQsUNo+xbgA8DDkh4stY8DVwErJC0EHgcuALC9XtIK4BGqK6Yutf18aXcJcCMwHrijPKAKopsldVLtSXQMYbwRETEIgw4K29+g/TkEgLm9tFkCLGlTXwfMbFN/lhI0ERExOnJndkRENEpQREREowRFREQ0SlBERESjBEVERDRKUERERKMERURENEpQREREowRFREQ0SlBERESjBEVERDRKUERERKMERURENEpQREREowRFREQ0SlBERESjBEVERDRKUERERKMERURENNovgkLSfEkbJHVKuny0xxMRcTB5yQeFpDHAZ4DfAU4FLpR06uiOKiLi4PGSDwpgDtBp+4e2nwNuBc4d5TFFRBw0ZHu0x9BI0vnAfNsfLq8/AJxu+yO1ZRYBi8rLk4ENIz7QgTsGeHK0B3EAyfYcXtmew2d/2ZavtD2p3YyxIz2SQVCbWo90s70UWDoywxkektbZnj3a4zhQZHsOr2zP4XMgbMv94dBTFzC19noKsGWUxhIRcdDZH4Li28AMSdMlvRzoAFaN8pgiIg4aL/lDT7b3SvoIcCcwBlhme/0oD2s47FeHyvYD2Z7DK9tz+Oz32/IlfzI7IiJG1/5w6CkiIkZRgiIiIholKCIiolGCYoRImiPpTWX6VEn/XtK/HO1xRUg6RdJcSYe31OeP1pgOJJJuGu0xDFVOZo8ASYupvqtqLLAaOB24B3g3cKftJaM3ugOLpA/Z/txoj2N/IelPgEuBR4HXA5fZvr3Me8D2G0dxePsdSa2X7gt4J3A3gO1zRnxQwyBBMQIkPUz1j3Ac8AQwxfbPJI0H1th+3WiO70Ai6XHbJ472OPYX5XfzzbafljQNWAncbPtvJH3H9htGd4T7F0kPAI8An6X6BgkBt1Dd/4Xte0dvdIP3kr+P4gCx1/bzwM8l/cD2zwBs/0LSC6M8tv2OpId6mwUcN5JjOQCMsf00gO1Nks4EVkp6Je2/PieazQYuAz4B/LntByX9Yn8NiG4JipHxnKRDbf8cmNVdlHQkkKAYuOOAecCulrqAfx754ezXnpD0etsPApQ9i/cCy4DfGtWR7YdsvwBcI+mL5XkbB8Dn7H6/AvuJt9veA7/6Rep2CLBgdIa0X/sKcHj3h1udpHtGfDT7t4uAvfWC7b3ARZL+bnSGtP+z3QVcIOls4GejPZ6hyjmKiIholMtjIyKiUYIiIiIaJSgiIqJRgiIiIholKCIiotH/B7Zo3hlb7o2iAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# First let's visualize the class imbalance\n",
"\n",
"dataset['CATEGORY'].value_counts().to_pandas().plot(kind='bar', title='histogram of the class distributions')\n",
"dataset['CATEGORY'].value_counts() / len(dataset)"
]
},
{
"cell_type": "code",
"execution_count": 252,
"id": "65a127f2-be0d-4ab8-b87f-45aabbe4a7fa",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 56.8 ms, sys: 12.3 ms, total: 69.1 ms\n",
"Wall time: 69.2 ms\n",
"CPU times: user 22.4 ms, sys: 7.27 ms, total: 29.7 ms\n",
"Wall time: 29.8 ms\n"
]
},
{
"data": {
"text/plain": [
"0.9502959251403809"
]
},
"execution_count": 252,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vec = CountVectorizer(stop_words='english', ngram_range=(1,3))\n",
"x_train = vec.fit_transform(X_train_text)\n",
"x_test = vec.transform(X_test_text)\n",
"\n",
"cnb = ComplementNB()\n",
"%time cnb.fit(x_train, y_train)\n",
"%time cnb.score(x_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 253,
"id": "73005bfc-6ffb-45af-9809-00d5345c597a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 67.5 ms, sys: 31.8 ms, total: 99.3 ms\n",
"Wall time: 99.5 ms\n",
"CPU times: user 26.6 ms, sys: 11.4 ms, total: 38 ms\n",
"Wall time: 37.7 ms\n"
]
},
{
"data": {
"text/plain": [
"0.9449836611747742"
]
},
"execution_count": 253,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vec = CountVectorizer(stop_words='english', ngram_range=(1,3))\n",
"x_train = vec.fit_transform(X_train_text)\n",
"x_test = vec.transform(X_test_text)\n",
"x_train_np, x_test_np = x_train.get(), x_test.get()\n",
"y_train_np, y_test_np = y_train.to_numpy(), y_test.to_numpy()\n",
"\n",
"cnb = ComplementNB_sk()\n",
"%time mnb.fit(x_train_np, y_train_np)\n",
"%time mnb.score(x_test_np, y_test_np)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment