indiranell/pto_detector.ipynb Secret

## pto_detector.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "cdf70556",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import pandas as pd\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.stem import SnowballStemmer\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.feature_selection import SelectKBest, chi2\n",
    "from deepchecks.nlp import TextData\n",
    "from deepchecks.nlp.suites import full_suite\n",
    "from deepchecks.nlp.suites import data_integrity\n",
    "from deepchecks.nlp.suites import train_test_validation\n",
    "from deepchecks.nlp.suites import model_evaluation\n",
    "from sklearn.ensemble import RandomForestClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "7185dc88",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('pto_messages.csv', names=['sentence', 'label'])\n",
    "data = data.sample(frac=1).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "3823aa3f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sentence</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>I m still not feeling wellfever is gone but se...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Will be taking the day off as Im not feeling well</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>so yesterday itself you thought about it</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>I have some emergency work I have to go now</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>taking</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            sentence  label\n",
       "0  I m still not feeling wellfever is gone but se...      1\n",
       "1  Will be taking the day off as Im not feeling well      1\n",
       "2           so yesterday itself you thought about it      0\n",
       "3        I have some emergency work I have to go now      0\n",
       "4                                             taking      0"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "d2607269",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Pre-processing\n",
    "stemmer = SnowballStemmer('english')\n",
    "words = stopwords.words(\"english\")\n",
    "NOT_STOP_WORDS = ['not','off','be','will','before','after','out']\n",
    "ADD_STOP_WORDS = ['today', 'tomorrow', 'yesterday']\n",
    "for word in NOT_STOP_WORDS:\n",
    "    words.remove(word)\n",
    "for word in ADD_STOP_WORDS:\n",
    "    words.append(word)\n",
    "#data['cleaned'] = data['sentence'].apply(lambda x: \" \".join([stemmer.stem(i) for i in re.sub(\"[^a-zA-Z]\", \" \", x).split() if i not in words]).lower())\n",
    "X_train, X_test, y_train, y_test = train_test_split(data['sentence'], data.label, test_size=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "17455c58",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8974358974358975\n"
     ]
    }
   ],
   "source": [
    "pipeline = Pipeline([('vect', TfidfVectorizer(ngram_range=(1, 4), stop_words=words, sublinear_tf=True)),\n",
    "                     ('chi',  SelectKBest(chi2, k=1000)),                   \n",
    "                    ('rfc',RandomForestClassifier(n_estimators=100))])\n",
    "\n",
    "model = pipeline.fit(X_train, y_train)\n",
    "accuracy_score = model.score(X_test, y_test)\n",
    "\n",
    "print (accuracy_score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "20fa2e9d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <style>\n",
       "        progress {\n",
       "            -webkit-appearance: none;\n",
       "            border: none;\n",
       "            border-radius: 3px;\n",
       "            width: 300px;\n",
       "            height: 20px;\n",
       "            vertical-align: middle;\n",
       "            margin-right: 10px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-bar {\n",
       "            border-radius: 3px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-value {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "        progress::-moz-progress-bar {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "    </style>\n",
       "    "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cff84a605b43477eba59256ffafe097f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_GBN0PKFT2TNG60JQ4BMWMFXGM\">Data Integrity Sui…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#Data integrity\n",
    "train = TextData(X_train, label=y_train, task_type='text_classification')\n",
    "test = TextData(X_test, label=y_test, task_type='text_classification')\n",
    "data_integrity_suite = data_integrity()\n",
    "\n",
    "data_integrity_suite.run(train, test)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "id": "2344dcb6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <style>\n",
       "        progress {\n",
       "            -webkit-appearance: none;\n",
       "            border: none;\n",
       "            border-radius: 3px;\n",
       "            width: 300px;\n",
       "            height: 20px;\n",
       "            vertical-align: middle;\n",
       "            margin-right: 10px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-bar {\n",
       "            border-radius: 3px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-value {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "        progress::-moz-progress-bar {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "    </style>\n",
       "    "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "deepchecks - WARNING - Could not find model's classes, using the observed classes. In order to make sure the classes used by the model are inferred correctly, please use the model_classes argument\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4f3fe89150bb402b98c3903977bb3af1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_41IT4B12ZYS9KQAEQXYMKP74C\">Train Test Validat…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#Train Test Evaluation\n",
    "train_test_validation().run(train, test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "id": "e30d6caf",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "deepchecks - WARNING - Could not find model's classes, using the observed classes. In order to make sure the classes used by the model are inferred correctly, please use the model_classes argument\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <style>\n",
       "        progress {\n",
       "            -webkit-appearance: none;\n",
       "            border: none;\n",
       "            border-radius: 3px;\n",
       "            width: 300px;\n",
       "            height: 20px;\n",
       "            vertical-align: middle;\n",
       "            margin-right: 10px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-bar {\n",
       "            border-radius: 3px;\n",
       "            background-color: aliceblue;\n",
       "        }\n",
       "        progress::-webkit-progress-value {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "        progress::-moz-progress-bar {\n",
       "            background-color: #9d60fb;\n",
       "        }\n",
       "    </style>\n",
       "    "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fc385d09711f4ce8b083f53c9b187b14",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_38JMFB4AMHRC5JGA59KUAYPGM\">Model Evaluation S…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train_preds = model.predict(X_train)  # Get predicted labels (0 or 1)\n",
    "train_probs = model.predict_proba(X_train)  # Get predicted probabilities for positive class (class 1)\n",
    "test_preds = model.predict(X_test)  # Get predicted labels (0 or 1)\n",
    "test_probs = model.predict_proba(X_test)  # Get predicted probabilities for positive class (class 1)\n",
    "\n",
    "model_evaluation().run(train, test,    \n",
    "                                train_predictions=train_preds,\n",
    "                                test_predictions=test_preds,\n",
    "                                train_probabilities=train_probs,\n",
    "                                test_probabilities=test_probs)\n",
    "                         "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c535b6c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "suite = full_suite()\n",
    "suite.run(train_dataset=train,\n",
    "    test_dataset=test,\n",
    "    with_display=True,\n",
    "    train_predictions=train_preds,\n",
    "    test_predictions=test_preds,\n",
    "    train_probabilities=train_probs,\n",
    "    test_probabilities=test_probs)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 103,
	"id": "cdf70556",
	"metadata": {},
	"outputs": [],
	"source": [
	"import re\n",
	"import pandas as pd\n",
	"from nltk.corpus import stopwords\n",
	"from nltk.stem import SnowballStemmer\n",
	"from sklearn.feature_extraction.text import TfidfVectorizer\n",
	"from sklearn.pipeline import Pipeline\n",
	"from sklearn.model_selection import train_test_split\n",
	"from sklearn.feature_selection import SelectKBest, chi2\n",
	"from deepchecks.nlp import TextData\n",
	"from deepchecks.nlp.suites import full_suite\n",
	"from deepchecks.nlp.suites import data_integrity\n",
	"from deepchecks.nlp.suites import train_test_validation\n",
	"from deepchecks.nlp.suites import model_evaluation\n",
	"from sklearn.ensemble import RandomForestClassifier"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 121,
	"id": "7185dc88",
	"metadata": {},
	"outputs": [],
	"source": [
	"data = pd.read_csv('pto_messages.csv', names=['sentence', 'label'])\n",
	"data = data.sample(frac=1).reset_index(drop=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 122,
	"id": "3823aa3f",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>sentence</th>\n",
	" <th>label</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>I m still not feeling wellfever is gone but se...</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>Will be taking the day off as Im not feeling well</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>so yesterday itself you thought about it</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>I have some emergency work I have to go now</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>taking</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" sentence label\n",
	"0 I m still not feeling wellfever is gone but se... 1\n",
	"1 Will be taking the day off as Im not feeling well 1\n",
	"2 so yesterday itself you thought about it 0\n",
	"3 I have some emergency work I have to go now 0\n",
	"4 taking 0"
	]
	},
	"execution_count": 122,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"data.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 123,
	"id": "d2607269",
	"metadata": {},
	"outputs": [],
	"source": [
	"#Pre-processing\n",
	"stemmer = SnowballStemmer('english')\n",
	"words = stopwords.words(\"english\")\n",
	"NOT_STOP_WORDS = ['not','off','be','will','before','after','out']\n",
	"ADD_STOP_WORDS = ['today', 'tomorrow', 'yesterday']\n",
	"for word in NOT_STOP_WORDS:\n",
	" words.remove(word)\n",
	"for word in ADD_STOP_WORDS:\n",
	" words.append(word)\n",
	"#data['cleaned'] = data['sentence'].apply(lambda x: \" \".join([stemmer.stem(i) for i in re.sub(\"[^a-zA-Z]\", \" \", x).split() if i not in words]).lower())\n",
	"X_train, X_test, y_train, y_test = train_test_split(data['sentence'], data.label, test_size=0.1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 124,
	"id": "17455c58",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"0.8974358974358975\n"
	]
	}
	],
	"source": [
	"pipeline = Pipeline([('vect', TfidfVectorizer(ngram_range=(1, 4), stop_words=words, sublinear_tf=True)),\n",
	" ('chi', SelectKBest(chi2, k=1000)), \n",
	" ('rfc',RandomForestClassifier(n_estimators=100))])\n",
	"\n",
	"model = pipeline.fit(X_train, y_train)\n",
	"accuracy_score = model.score(X_test, y_test)\n",
	"\n",
	"print (accuracy_score)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 135,
	"id": "20fa2e9d",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"\n",
	" <style>\n",
	" progress {\n",
	" -webkit-appearance: none;\n",
	" border: none;\n",
	" border-radius: 3px;\n",
	" width: 300px;\n",
	" height: 20px;\n",
	" vertical-align: middle;\n",
	" margin-right: 10px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-bar {\n",
	" border-radius: 3px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-value {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" progress::-moz-progress-bar {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" </style>\n",
	" "
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/html": []
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "cff84a605b43477eba59256ffafe097f",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_GBN0PKFT2TNG60JQ4BMWMFXGM\">Data Integrity Sui…"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"#Data integrity\n",
	"train = TextData(X_train, label=y_train, task_type='text_classification')\n",
	"test = TextData(X_test, label=y_test, task_type='text_classification')\n",
	"data_integrity_suite = data_integrity()\n",
	"\n",
	"data_integrity_suite.run(train, test)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 136,
	"id": "2344dcb6",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"\n",
	" <style>\n",
	" progress {\n",
	" -webkit-appearance: none;\n",
	" border: none;\n",
	" border-radius: 3px;\n",
	" width: 300px;\n",
	" height: 20px;\n",
	" vertical-align: middle;\n",
	" margin-right: 10px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-bar {\n",
	" border-radius: 3px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-value {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" progress::-moz-progress-bar {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" </style>\n",
	" "
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/html": []
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"deepchecks - WARNING - Could not find model's classes, using the observed classes. In order to make sure the classes used by the model are inferred correctly, please use the model_classes argument\n"
	]
	},
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "4f3fe89150bb402b98c3903977bb3af1",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_41IT4B12ZYS9KQAEQXYMKP74C\">Train Test Validat…"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"#Train Test Evaluation\n",
	"train_test_validation().run(train, test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 138,
	"id": "e30d6caf",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"deepchecks - WARNING - Could not find model's classes, using the observed classes. In order to make sure the classes used by the model are inferred correctly, please use the model_classes argument\n"
	]
	},
	{
	"data": {
	"text/html": [
	"\n",
	" <style>\n",
	" progress {\n",
	" -webkit-appearance: none;\n",
	" border: none;\n",
	" border-radius: 3px;\n",
	" width: 300px;\n",
	" height: 20px;\n",
	" vertical-align: middle;\n",
	" margin-right: 10px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-bar {\n",
	" border-radius: 3px;\n",
	" background-color: aliceblue;\n",
	" }\n",
	" progress::-webkit-progress-value {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" progress::-moz-progress-bar {\n",
	" background-color: #9d60fb;\n",
	" }\n",
	" </style>\n",
	" "
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/html": []
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "fc385d09711f4ce8b083f53c9b187b14",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Accordion(children=(VBox(children=(HTML(value='\\n<h1 id=\"summary_38JMFB4AMHRC5JGA59KUAYPGM\">Model Evaluation S…"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"train_preds = model.predict(X_train) # Get predicted labels (0 or 1)\n",
	"train_probs = model.predict_proba(X_train) # Get predicted probabilities for positive class (class 1)\n",
	"test_preds = model.predict(X_test) # Get predicted labels (0 or 1)\n",
	"test_probs = model.predict_proba(X_test) # Get predicted probabilities for positive class (class 1)\n",
	"\n",
	"model_evaluation().run(train, test, \n",
	" train_predictions=train_preds,\n",
	" test_predictions=test_preds,\n",
	" train_probabilities=train_probs,\n",
	" test_probabilities=test_probs)\n",
	" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "c535b6c4",
	"metadata": {},
	"outputs": [],
	"source": [
	"suite = full_suite()\n",
	"suite.run(train_dataset=train,\n",
	" test_dataset=test,\n",
	" with_display=True,\n",
	" train_predictions=train_preds,\n",
	" test_predictions=test_preds,\n",
	" train_probabilities=train_probs,\n",
	" test_probabilities=test_probs)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.12"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}