indiranell/Deepchecks-ptodetector2.ipynb

## Deepchecks-ptodetector2.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 251,
   "id": "cdf70556",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import pandas as pd\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.stem import SnowballStemmer\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.feature_selection import SelectKBest, chi2\n",
    "from deepchecks.nlp import TextData\n",
    "from deepchecks.nlp.suites import full_suite\n",
    "from deepchecks.nlp.suites import data_integrity\n",
    "from deepchecks.nlp.suites import train_test_validation\n",
    "from deepchecks.nlp.suites import model_evaluation\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.linear_model import LogisticRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 226,
   "id": "7185dc88",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('pto_messages.csv', names=['sentence', 'label'])\n",
    "data = data.sample(frac=1).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 219,
   "id": "3823aa3f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sentence</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>My work isnt completed yet in college so will ...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sorry to hear that Take care Shiva</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Down with Flu cant make it today</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>My dental appointment is postponed to tomorrow...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>My pto tomorrow has been cancelled</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            sentence  label\n",
       "0  My work isnt completed yet in college so will ...      1\n",
       "1                 Sorry to hear that Take care Shiva      0\n",
       "2                   Down with Flu cant make it today      1\n",
       "3  My dental appointment is postponed to tomorrow...      1\n",
       "4                 My pto tomorrow has been cancelled      0"
      ]
     },
     "execution_count": 219,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "id": "d2607269",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Pre-processing\n",
    "stemmer = SnowballStemmer('english')\n",
    "words = stopwords.words(\"english\")\n",
    "NOT_STOP_WORDS = ['not','off','be','will','before','after','out']\n",
    "ADD_STOP_WORDS = ['today', 'tomorrow', 'yesterday']\n",
    "for word in NOT_STOP_WORDS:\n",
    "    words.remove(word)\n",
    "for word in ADD_STOP_WORDS:\n",
    "    words.append(word)\n",
    "#data['cleaned'] = data['sentence'].apply(lambda x: \" \".join([stemmer.stem(i) for i in re.sub(\"[^a-zA-Z]\", \" \", x).split() if i not in words]).lower())\n",
    "X_train, X_test, y_train, y_test = train_test_split(data['sentence'], data.label, test_size=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 253,
   "id": "17455c58",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8726114649681529\n"
     ]
    }
   ],
   "source": [
    "pipeline = Pipeline([('vect', TfidfVectorizer(ngram_range=(1, 4), stop_words=words, sublinear_tf=True)),\n",
    "                     ('chi',  SelectKBest(chi2, k=1000)),  \n",
    "                     ('lgr', LogisticRegression(C=1.0, penalty='l2', max_iter=1000))])\n",
    "                    #('rfc',RandomForestClassifier(n_estimators=100))])\n",
    "\n",
    "model = pipeline.fit(X_train, y_train)\n",
    "accuracy_score = model.score(X_test, y_test)\n",
    "\n",
    "print (accuracy_score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 255,
   "id": "20fa2e9d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <style>\n",
       "        progress {\n",
       "            -webkit-appearance: none;\n",
       "            border: none;\n",
       "            border-radius: 3px;\n",
       "            width: 300px;\n",
       "            height: 20px;\n",
       "            vertical-align: middle;\n",
       "            margin-right: 10px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-bar {\n",
       "            border-radius: 3px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-value {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "        progress::-moz-progress-bar {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "    </style>\n",
       "    "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "938c172f693b479584f2e6605aa19333",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_MJEOJ14ZBPX1IE690ZW2D539V\">Data Integrity Sui…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#Data integrity\n",
    "train = TextData(X_train, label=y_train, task_type='text_classification')\n",
    "test = TextData(X_test, label=y_test, task_type='text_classification')\n",
    "data_integrity_suite = data_integrity()\n",
    "\n",
    "data_integrity_suite.run(train, test)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 257,
   "id": "2344dcb6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <style>\n",
       "        progress {\n",
       "            -webkit-appearance: none;\n",
       "            border: none;\n",
       "            border-radius: 3px;\n",
       "            width: 300px;\n",
       "            height: 20px;\n",
       "            vertical-align: middle;\n",
       "            margin-right: 10px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-bar {\n",
       "            border-radius: 3px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-value {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "        progress::-moz-progress-bar {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "    </style>\n",
       "    "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "deepchecks - WARNING - Could not find model's classes, using the observed classes. In order to make sure the classes used by the model are inferred correctly, please use the model_classes argument\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "266fbe3b6400468e94a7333a379af81b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_NF14E7GML76C67VHMEDOQS00D\">Train Test Validat…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#Train Test Evaluation\n",
    "train_test_validation().run(train, test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 254,
   "id": "e30d6caf",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "deepchecks - WARNING - Could not find model's classes, using the observed classes. In order to make sure the classes used by the model are inferred correctly, please use the model_classes argument\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <style>\n",
       "        progress {\n",
       "            -webkit-appearance: none;\n",
       "            border: none;\n",
       "            border-radius: 3px;\n",
       "            width: 300px;\n",
       "            height: 20px;\n",
       "            vertical-align: middle;\n",
       "            margin-right: 10px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-bar {\n",
       "            border-radius: 3px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-value {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "        progress::-moz-progress-bar {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "    </style>\n",
       "    "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "00d861061f5a435abd48a36ecbb0f6d0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_NRX7VYKUW92IZ19B7BF7CCGWE\">Model Evaluation S…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "accuracy_score = model.score(X_test, y_test)\n",
    "train_preds = model.predict(X_train)  # Get predicted labels (0 or 1)\n",
    "train_probs = model.predict_proba(X_train)  # Get predicted probabilities for positive class (class 1)\n",
    "test_preds = model.predict(X_test)  # Get predicted labels (0 or 1)\n",
    "test_probs = model.predict_proba(X_test)  # Get predicted probabilities for positive class (class 1)\n",
    "\n",
    "model_evaluation().run(train, test,    \n",
    "                                train_predictions=train_preds,\n",
    "                                test_predictions=test_preds,\n",
    "                                train_probabilities=train_probs,\n",
    "                                test_probabilities=test_probs)                        \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c535b6c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "suite = full_suite()\n",
    "suite.run(train_dataset=train,\n",
    "    test_dataset=test,\n",
    "    with_display=True,\n",
    "    train_predictions=train_preds,\n",
    "    test_predictions=test_preds,\n",
    "    train_probabilities=train_probs,\n",
    "    test_probabilities=test_probs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 205,
   "id": "c7450be0",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "deepchecks - WARNING - Could not find model's classes, using the observed classes. In order to make sure the classes used by the model are inferred correctly, please use the model_classes argument\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "380e83b027ba4057b639ed62c0144496",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(HTML(value='<h4><b>Confusion Matrix Report</b></h4>'), HTML(value='<p>Calculate the confusion m…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from deepchecks.nlp.checks import ConfusionMatrixReport\n",
    "check = ConfusionMatrixReport()\n",
    "result = check.run(train, predictions=train_preds)\n",
    "result.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "06bf6904",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f9878494",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4350888e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f4a97de",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 251,
	"id": "cdf70556",
	"metadata": {},
	"outputs": [],
	"source": [
	"import re\n",
	"import pandas as pd\n",
	"from nltk.corpus import stopwords\n",
	"from nltk.stem import SnowballStemmer\n",
	"from sklearn.feature_extraction.text import TfidfVectorizer\n",
	"from sklearn.pipeline import Pipeline\n",
	"from sklearn.model_selection import train_test_split\n",
	"from sklearn.feature_selection import SelectKBest, chi2\n",
	"from deepchecks.nlp import TextData\n",
	"from deepchecks.nlp.suites import full_suite\n",
	"from deepchecks.nlp.suites import data_integrity\n",
	"from deepchecks.nlp.suites import train_test_validation\n",
	"from deepchecks.nlp.suites import model_evaluation\n",
	"from sklearn.ensemble import RandomForestClassifier\n",
	"from sklearn.linear_model import LogisticRegression"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 226,
	"id": "7185dc88",
	"metadata": {},
	"outputs": [],
	"source": [
	"data = pd.read_csv('pto_messages.csv', names=['sentence', 'label'])\n",
	"data = data.sample(frac=1).reset_index(drop=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 219,
	"id": "3823aa3f",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>sentence</th>\n",
	" <th>label</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>My work isnt completed yet in college so will ...</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>Sorry to hear that Take care Shiva</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>Down with Flu cant make it today</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>My dental appointment is postponed to tomorrow...</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>My pto tomorrow has been cancelled</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" sentence label\n",
	"0 My work isnt completed yet in college so will ... 1\n",
	"1 Sorry to hear that Take care Shiva 0\n",
	"2 Down with Flu cant make it today 1\n",
	"3 My dental appointment is postponed to tomorrow... 1\n",
	"4 My pto tomorrow has been cancelled 0"
	]
	},
	"execution_count": 219,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"data.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 227,
	"id": "d2607269",
	"metadata": {},
	"outputs": [],
	"source": [
	"#Pre-processing\n",
	"stemmer = SnowballStemmer('english')\n",
	"words = stopwords.words(\"english\")\n",
	"NOT_STOP_WORDS = ['not','off','be','will','before','after','out']\n",
	"ADD_STOP_WORDS = ['today', 'tomorrow', 'yesterday']\n",
	"for word in NOT_STOP_WORDS:\n",
	" words.remove(word)\n",
	"for word in ADD_STOP_WORDS:\n",
	" words.append(word)\n",
	"#data['cleaned'] = data['sentence'].apply(lambda x: \" \".join([stemmer.stem(i) for i in re.sub(\"[^a-zA-Z]\", \" \", x).split() if i not in words]).lower())\n",
	"X_train, X_test, y_train, y_test = train_test_split(data['sentence'], data.label, test_size=0.1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 253,
	"id": "17455c58",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"0.8726114649681529\n"
	]
	}
	],
	"source": [
	"pipeline = Pipeline([('vect', TfidfVectorizer(ngram_range=(1, 4), stop_words=words, sublinear_tf=True)),\n",
	" ('chi', SelectKBest(chi2, k=1000)), \n",
	" ('lgr', LogisticRegression(C=1.0, penalty='l2', max_iter=1000))])\n",
	" #('rfc',RandomForestClassifier(n_estimators=100))])\n",
	"\n",
	"model = pipeline.fit(X_train, y_train)\n",
	"accuracy_score = model.score(X_test, y_test)\n",
	"\n",
	"print (accuracy_score)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 255,
	"id": "20fa2e9d",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"\n",
	" <style>\n",
	" progress {\n",
	" -webkit-appearance: none;\n",
	" border: none;\n",
	" border-radius: 3px;\n",
	" width: 300px;\n",
	" height: 20px;\n",
	" vertical-align: middle;\n",
	" margin-right: 10px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-bar {\n",
	" border-radius: 3px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-value {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" progress::-moz-progress-bar {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" </style>\n",
	" "
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/html": []
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "938c172f693b479584f2e6605aa19333",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_MJEOJ14ZBPX1IE690ZW2D539V\">Data Integrity Sui…"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"#Data integrity\n",
	"train = TextData(X_train, label=y_train, task_type='text_classification')\n",
	"test = TextData(X_test, label=y_test, task_type='text_classification')\n",
	"data_integrity_suite = data_integrity()\n",
	"\n",
	"data_integrity_suite.run(train, test)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 257,
	"id": "2344dcb6",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"\n",
	" <style>\n",
	" progress {\n",
	" -webkit-appearance: none;\n",
	" border: none;\n",
	" border-radius: 3px;\n",
	" width: 300px;\n",
	" height: 20px;\n",
	" vertical-align: middle;\n",
	" margin-right: 10px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-bar {\n",
	" border-radius: 3px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-value {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" progress::-moz-progress-bar {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" </style>\n",
	" "
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/html": []
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"deepchecks - WARNING - Could not find model's classes, using the observed classes. In order to make sure the classes used by the model are inferred correctly, please use the model_classes argument\n"
	]
	},
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "266fbe3b6400468e94a7333a379af81b",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_NF14E7GML76C67VHMEDOQS00D\">Train Test Validat…"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"#Train Test Evaluation\n",
	"train_test_validation().run(train, test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 254,
	"id": "e30d6caf",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"deepchecks - WARNING - Could not find model's classes, using the observed classes. In order to make sure the classes used by the model are inferred correctly, please use the model_classes argument\n"
	]
	},
	{
	"data": {
	"text/html": [
	"\n",
	" <style>\n",
	" progress {\n",
	" -webkit-appearance: none;\n",
	" border: none;\n",
	" border-radius: 3px;\n",
	" width: 300px;\n",
	" height: 20px;\n",
	" vertical-align: middle;\n",
	" margin-right: 10px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-bar {\n",
	" border-radius: 3px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-value {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" progress::-moz-progress-bar {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" </style>\n",
	" "
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/html": []
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "00d861061f5a435abd48a36ecbb0f6d0",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_NRX7VYKUW92IZ19B7BF7CCGWE\">Model Evaluation S…"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"accuracy_score = model.score(X_test, y_test)\n",
	"train_preds = model.predict(X_train) # Get predicted labels (0 or 1)\n",
	"train_probs = model.predict_proba(X_train) # Get predicted probabilities for positive class (class 1)\n",
	"test_preds = model.predict(X_test) # Get predicted labels (0 or 1)\n",
	"test_probs = model.predict_proba(X_test) # Get predicted probabilities for positive class (class 1)\n",
	"\n",
	"model_evaluation().run(train, test, \n",
	" train_predictions=train_preds,\n",
	" test_predictions=test_preds,\n",
	" train_probabilities=train_probs,\n",
	" test_probabilities=test_probs) \n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "c535b6c4",
	"metadata": {},
	"outputs": [],
	"source": [
	"suite = full_suite()\n",
	"suite.run(train_dataset=train,\n",
	" test_dataset=test,\n",
	" with_display=True,\n",
	" train_predictions=train_preds,\n",
	" test_predictions=test_preds,\n",
	" train_probabilities=train_probs,\n",
	" test_probabilities=test_probs)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 205,
	"id": "c7450be0",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"deepchecks - WARNING - Could not find model's classes, using the observed classes. In order to make sure the classes used by the model are inferred correctly, please use the model_classes argument\n"
	]
	},
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "380e83b027ba4057b639ed62c0144496",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"VBox(children=(HTML(value='<h4><b>Confusion Matrix Report</b></h4>'), HTML(value='<p>Calculate the confusion m…"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"from deepchecks.nlp.checks import ConfusionMatrixReport\n",
	"check = ConfusionMatrixReport()\n",
	"result = check.run(train, predictions=train_preds)\n",
	"result.show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "06bf6904",
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "f9878494",
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "4350888e",
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "1f4a97de",
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.12"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}