Skip to content

Instantly share code, notes, and snippets.

@chetanambi
Created May 16, 2022 04:50
Show Gist options
  • Save chetanambi/018d2ee53999971a78c10dd3eb09a0e9 to your computer and use it in GitHub Desktop.
Save chetanambi/018d2ee53999971a78c10dd3eb09a0e9 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "8852c2de",
"metadata": {},
"source": [
"# Scikit-learn Pipeline"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "7af6e3be",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn import set_config\n",
"set_config(display=\"diagram\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1af3565a",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline, make_union\n",
"from sklearn.pipeline import Pipeline, FeatureUnion\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.compose import ColumnTransformer, make_column_transformer, make_column_selector\n",
"from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.preprocessing import FunctionTransformer\n",
"from sklearn.linear_model import SGDClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.svm import SVC"
]
},
{
"cell_type": "markdown",
"id": "b99653b8",
"metadata": {},
"source": [
"## Pipeline"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "ba14bb80",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>alcohol</th>\n",
" <th>malic_acid</th>\n",
" <th>ash</th>\n",
" <th>alcalinity_of_ash</th>\n",
" <th>magnesium</th>\n",
" <th>total_phenols</th>\n",
" <th>flavanoids</th>\n",
" <th>nonflavanoid_phenols</th>\n",
" <th>proanthocyanins</th>\n",
" <th>color_intensity</th>\n",
" <th>hue</th>\n",
" <th>od280/od315_of_diluted_wines</th>\n",
" <th>proline</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>14.23</td>\n",
" <td>1.71</td>\n",
" <td>2.43</td>\n",
" <td>15.6</td>\n",
" <td>127.0</td>\n",
" <td>2.80</td>\n",
" <td>3.06</td>\n",
" <td>0.28</td>\n",
" <td>2.29</td>\n",
" <td>5.64</td>\n",
" <td>1.04</td>\n",
" <td>3.92</td>\n",
" <td>1065.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13.20</td>\n",
" <td>1.78</td>\n",
" <td>2.14</td>\n",
" <td>11.2</td>\n",
" <td>100.0</td>\n",
" <td>2.65</td>\n",
" <td>2.76</td>\n",
" <td>0.26</td>\n",
" <td>1.28</td>\n",
" <td>4.38</td>\n",
" <td>1.05</td>\n",
" <td>3.40</td>\n",
" <td>1050.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13.16</td>\n",
" <td>2.36</td>\n",
" <td>2.67</td>\n",
" <td>18.6</td>\n",
" <td>101.0</td>\n",
" <td>2.80</td>\n",
" <td>3.24</td>\n",
" <td>0.30</td>\n",
" <td>2.81</td>\n",
" <td>5.68</td>\n",
" <td>1.03</td>\n",
" <td>3.17</td>\n",
" <td>1185.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14.37</td>\n",
" <td>1.95</td>\n",
" <td>2.50</td>\n",
" <td>16.8</td>\n",
" <td>113.0</td>\n",
" <td>3.85</td>\n",
" <td>3.49</td>\n",
" <td>0.24</td>\n",
" <td>2.18</td>\n",
" <td>7.80</td>\n",
" <td>0.86</td>\n",
" <td>3.45</td>\n",
" <td>1480.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13.24</td>\n",
" <td>2.59</td>\n",
" <td>2.87</td>\n",
" <td>21.0</td>\n",
" <td>118.0</td>\n",
" <td>2.80</td>\n",
" <td>2.69</td>\n",
" <td>0.39</td>\n",
" <td>1.82</td>\n",
" <td>4.32</td>\n",
" <td>1.04</td>\n",
" <td>2.93</td>\n",
" <td>735.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n",
"0 14.23 1.71 2.43 15.6 127.0 2.80 \n",
"1 13.20 1.78 2.14 11.2 100.0 2.65 \n",
"2 13.16 2.36 2.67 18.6 101.0 2.80 \n",
"3 14.37 1.95 2.50 16.8 113.0 3.85 \n",
"4 13.24 2.59 2.87 21.0 118.0 2.80 \n",
"\n",
" flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n",
"0 3.06 0.28 2.29 5.64 1.04 \n",
"1 2.76 0.26 1.28 4.38 1.05 \n",
"2 3.24 0.30 2.81 5.68 1.03 \n",
"3 3.49 0.24 2.18 7.80 0.86 \n",
"4 2.69 0.39 1.82 4.32 1.04 \n",
"\n",
" od280/od315_of_diluted_wines proline target \n",
"0 3.92 1065.0 0 \n",
"1 3.40 1050.0 0 \n",
"2 3.17 1185.0 0 \n",
"3 3.45 1480.0 0 \n",
"4 2.93 735.0 0 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.datasets import load_wine\n",
"data = load_wine()\n",
"\n",
"df = pd.DataFrame(data=data.data, columns=data.feature_names)\n",
"df['target'] = data.target\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "245edb5d",
"metadata": {},
"outputs": [],
"source": [
"X = df.drop(['target'], axis=1)\n",
"y = df['target']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)\n",
"\n",
"pipe = Pipeline(steps=[\n",
" ('scaler', StandardScaler()), \n",
" ('svc', SVC())\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2da38b96",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 {color: black;background-color: white;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 pre{padding: 0;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-toggleable {background-color: white;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-estimator:hover {background-color: #d4ebff;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-item {z-index: 1;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-parallel-item:only-child::after {width: 0;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-8f38e087-9352-4ebe-b955-77a3f28f8875 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-8f38e087-9352-4ebe-b955-77a3f28f8875\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;scaler&#x27;, StandardScaler()), (&#x27;svc&#x27;, SVC())])</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"26c04d94-5875-42f1-a43c-e7bdef631b96\" type=\"checkbox\" ><label for=\"26c04d94-5875-42f1-a43c-e7bdef631b96\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;scaler&#x27;, StandardScaler()), (&#x27;svc&#x27;, SVC())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"27f6a2f1-faa7-4c00-a690-75c4bdf2043e\" type=\"checkbox\" ><label for=\"27f6a2f1-faa7-4c00-a690-75c4bdf2043e\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">StandardScaler</label><div class=\"sk-toggleable__content\"><pre>StandardScaler()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"3227972b-22bb-4641-ac7d-bbb400e60381\" type=\"checkbox\" ><label for=\"3227972b-22bb-4641-ac7d-bbb400e60381\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC()</pre></div></div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "089ed2a5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9777777777777777"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe.fit(X_train, y_train)\n",
"y_pred = pipe.predict(X_test)\n",
"accuracy_score(y_pred, y_test)"
]
},
{
"cell_type": "markdown",
"id": "e11b0078",
"metadata": {},
"source": [
"## make_pipeline"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "facfd8ee",
"metadata": {},
"outputs": [],
"source": [
"pipe = make_pipeline(StandardScaler(), \n",
" SVC())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a93f3bca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca {color: black;background-color: white;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca pre{padding: 0;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-toggleable {background-color: white;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-estimator:hover {background-color: #d4ebff;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-item {z-index: 1;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-parallel-item:only-child::after {width: 0;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-4000e8c0-7d5d-4f1f-8e1c-e4aa2131a7ca\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;standardscaler&#x27;, StandardScaler()), (&#x27;svc&#x27;, SVC())])</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"aabb46eb-7366-46ce-a883-70903c292740\" type=\"checkbox\" ><label for=\"aabb46eb-7366-46ce-a883-70903c292740\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;standardscaler&#x27;, StandardScaler()), (&#x27;svc&#x27;, SVC())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"a45ba295-3dc5-4c6e-9b67-a6517bb2a138\" type=\"checkbox\" ><label for=\"a45ba295-3dc5-4c6e-9b67-a6517bb2a138\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">StandardScaler</label><div class=\"sk-toggleable__content\"><pre>StandardScaler()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"eae48d0d-fd26-4c2a-af34-0f9798c79344\" type=\"checkbox\" ><label for=\"eae48d0d-fd26-4c2a-af34-0f9798c79344\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC()</pre></div></div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('standardscaler', StandardScaler()), ('svc', SVC())])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "8b501543",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9777777777777777"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe.fit(X_train, y_train)\n",
"y_pred = pipe.predict(X_test)\n",
"accuracy_score(y_pred, y_test)"
]
},
{
"cell_type": "markdown",
"id": "ea23320a",
"metadata": {},
"source": [
"## FeatureUnion"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "ec9b36b9",
"metadata": {},
"outputs": [],
"source": [
"# https://www.kaggle.com/datasets/team-ai/spam-text-message-classification"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "9bd11ffb",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('SPAM text message 20170820 - Data.csv')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "09e97c58",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Category</th>\n",
" <th>Message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>ham</td>\n",
" <td>Go until jurong point, crazy.. Available only ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ham</td>\n",
" <td>Ok lar... Joking wif u oni...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>spam</td>\n",
" <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>ham</td>\n",
" <td>U dun say so early hor... U c already then say...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ham</td>\n",
" <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Category Message\n",
"0 ham Go until jurong point, crazy.. Available only ...\n",
"1 ham Ok lar... Joking wif u oni...\n",
"2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n",
"3 ham U dun say so early hor... U c already then say...\n",
"4 ham Nah I don't think he goes to usf, he lives aro..."
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head() "
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a05bad42",
"metadata": {},
"outputs": [],
"source": [
"X = df['Message']\n",
"y = df['Category']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, stratify=y)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "4f938b3a",
"metadata": {},
"outputs": [],
"source": [
"def get_text_length(x):\n",
" return np.array([len(t) for t in x]).reshape(-1, 1)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "92af37a8",
"metadata": {},
"outputs": [],
"source": [
"preprocessor = FeatureUnion([ \n",
" ('tf-idf', TfidfVectorizer()),\n",
" ('length', FunctionTransformer(get_text_length, validate=False))\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "76defce3",
"metadata": {},
"outputs": [],
"source": [
"pipe = make_pipeline(preprocessor,\n",
" SGDClassifier(random_state=42))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "8355b9fc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 {color: black;background-color: white;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 pre{padding: 0;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-toggleable {background-color: white;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-estimator:hover {background-color: #d4ebff;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-item {z-index: 1;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-parallel-item:only-child::after {width: 0;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-8bf3e149-8cfb-4444-b318-f0f68bb1a470\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;featureunion&#x27;,\n",
" FeatureUnion(transformer_list=[(&#x27;tf-idf&#x27;, TfidfVectorizer()),\n",
" (&#x27;length&#x27;,\n",
" FunctionTransformer(func=&lt;function get_text_length at 0x000002678C36D700&gt;))])),\n",
" (&#x27;sgdclassifier&#x27;, SGDClassifier(random_state=42))])</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"0e273a53-9df4-4aab-a4c5-aa12ab0b2887\" type=\"checkbox\" ><label for=\"0e273a53-9df4-4aab-a4c5-aa12ab0b2887\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;featureunion&#x27;,\n",
" FeatureUnion(transformer_list=[(&#x27;tf-idf&#x27;, TfidfVectorizer()),\n",
" (&#x27;length&#x27;,\n",
" FunctionTransformer(func=&lt;function get_text_length at 0x000002678C36D700&gt;))])),\n",
" (&#x27;sgdclassifier&#x27;, SGDClassifier(random_state=42))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"fc69ff17-b19d-4ce3-ada7-349aad45642c\" type=\"checkbox\" ><label for=\"fc69ff17-b19d-4ce3-ada7-349aad45642c\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">featureunion: FeatureUnion</label><div class=\"sk-toggleable__content\"><pre>FeatureUnion(transformer_list=[(&#x27;tf-idf&#x27;, TfidfVectorizer()),\n",
" (&#x27;length&#x27;,\n",
" FunctionTransformer(func=&lt;function get_text_length at 0x000002678C36D700&gt;))])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><label>tf-idf</label></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"d9337d0a-cfed-4b18-86ac-27087c0c815e\" type=\"checkbox\" ><label for=\"d9337d0a-cfed-4b18-86ac-27087c0c815e\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">TfidfVectorizer</label><div class=\"sk-toggleable__content\"><pre>TfidfVectorizer()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><label>length</label></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"4f418d28-9524-4449-aab8-a8fa2affe421\" type=\"checkbox\" ><label for=\"4f418d28-9524-4449-aab8-a8fa2affe421\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">FunctionTransformer</label><div class=\"sk-toggleable__content\"><pre>FunctionTransformer(func=&lt;function get_text_length at 0x000002678C36D700&gt;)</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"3678efb9-bb59-46e8-aeea-117141b7202b\" type=\"checkbox\" ><label for=\"3678efb9-bb59-46e8-aeea-117141b7202b\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SGDClassifier</label><div class=\"sk-toggleable__content\"><pre>SGDClassifier(random_state=42)</pre></div></div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('featureunion',\n",
" FeatureUnion(transformer_list=[('tf-idf', TfidfVectorizer()),\n",
" ('length',\n",
" FunctionTransformer(func=<function get_text_length at 0x000002678C36D700>))])),\n",
" ('sgdclassifier', SGDClassifier(random_state=42))])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "a18be6ec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.8657573582196698"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe.fit(X_train, y_train)\n",
"y_pred = pipe.predict(X_test)\n",
"accuracy_score(y_test, y_pred)"
]
},
{
"cell_type": "markdown",
"id": "1aca0965",
"metadata": {},
"source": [
"## make_union"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "2c2d9a6f",
"metadata": {},
"outputs": [],
"source": [
"preprocessor = make_union(TfidfVectorizer(),\n",
" FunctionTransformer(get_text_length, validate=False))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "eb5ad45a",
"metadata": {},
"outputs": [],
"source": [
"pipe = make_pipeline(preprocessor, \n",
" SGDClassifier(random_state=42))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "d77b574b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa {color: black;background-color: white;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa pre{padding: 0;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-toggleable {background-color: white;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-estimator:hover {background-color: #d4ebff;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-item {z-index: 1;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-parallel-item:only-child::after {width: 0;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-231dab39-45ba-4dd4-8a97-125ed8c822aa div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-231dab39-45ba-4dd4-8a97-125ed8c822aa\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;featureunion&#x27;,\n",
" FeatureUnion(transformer_list=[(&#x27;tfidfvectorizer&#x27;,\n",
" TfidfVectorizer()),\n",
" (&#x27;functiontransformer&#x27;,\n",
" FunctionTransformer(func=&lt;function get_text_length at 0x000002678C36D700&gt;))])),\n",
" (&#x27;sgdclassifier&#x27;, SGDClassifier(random_state=42))])</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"97acfb7e-9f7b-4922-a11c-71d194ffcc34\" type=\"checkbox\" ><label for=\"97acfb7e-9f7b-4922-a11c-71d194ffcc34\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;featureunion&#x27;,\n",
" FeatureUnion(transformer_list=[(&#x27;tfidfvectorizer&#x27;,\n",
" TfidfVectorizer()),\n",
" (&#x27;functiontransformer&#x27;,\n",
" FunctionTransformer(func=&lt;function get_text_length at 0x000002678C36D700&gt;))])),\n",
" (&#x27;sgdclassifier&#x27;, SGDClassifier(random_state=42))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"5cdb9ce3-effa-4ef0-b096-5aa73b90de91\" type=\"checkbox\" ><label for=\"5cdb9ce3-effa-4ef0-b096-5aa73b90de91\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">featureunion: FeatureUnion</label><div class=\"sk-toggleable__content\"><pre>FeatureUnion(transformer_list=[(&#x27;tfidfvectorizer&#x27;, TfidfVectorizer()),\n",
" (&#x27;functiontransformer&#x27;,\n",
" FunctionTransformer(func=&lt;function get_text_length at 0x000002678C36D700&gt;))])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><label>tfidfvectorizer</label></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"86a44f47-5102-437c-b66f-ad5a101a6ba4\" type=\"checkbox\" ><label for=\"86a44f47-5102-437c-b66f-ad5a101a6ba4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">TfidfVectorizer</label><div class=\"sk-toggleable__content\"><pre>TfidfVectorizer()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><label>functiontransformer</label></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"c4b0d346-0e99-4a69-9460-876b10773f20\" type=\"checkbox\" ><label for=\"c4b0d346-0e99-4a69-9460-876b10773f20\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">FunctionTransformer</label><div class=\"sk-toggleable__content\"><pre>FunctionTransformer(func=&lt;function get_text_length at 0x000002678C36D700&gt;)</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"4a016713-e72c-4a3f-be72-3323037cb1d5\" type=\"checkbox\" ><label for=\"4a016713-e72c-4a3f-be72-3323037cb1d5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SGDClassifier</label><div class=\"sk-toggleable__content\"><pre>SGDClassifier(random_state=42)</pre></div></div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('featureunion',\n",
" FeatureUnion(transformer_list=[('tfidfvectorizer',\n",
" TfidfVectorizer()),\n",
" ('functiontransformer',\n",
" FunctionTransformer(func=<function get_text_length at 0x000002678C36D700>))])),\n",
" ('sgdclassifier', SGDClassifier(random_state=42))])"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "095f358b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.8657573582196698"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe.fit(X_train, y_train)\n",
"y_pred = pipe.predict(X_test)\n",
"accuracy_score(y_test, y_pred)"
]
},
{
"cell_type": "markdown",
"id": "fb0d19bd",
"metadata": {},
"source": [
"## ColumnsTransformer"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "1976b978",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S "
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('titanic_train.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "b21607e3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PassengerId 0\n",
"Survived 0\n",
"Pclass 0\n",
"Name 0\n",
"Sex 0\n",
"Age 177\n",
"SibSp 0\n",
"Parch 0\n",
"Ticket 0\n",
"Fare 0\n",
"Cabin 687\n",
"Embarked 2\n",
"dtype: int64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "2743c2c1",
"metadata": {},
"outputs": [],
"source": [
"X = df.drop('Survived', axis=1)\n",
"y = df['Survived']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "dbfb9f26",
"metadata": {},
"outputs": [],
"source": [
"num_features = ['Age', 'SibSp', 'Parch', 'Fare']\n",
"numeric_transformer = Pipeline(\n",
" steps=[(\"imputer\", SimpleImputer(strategy=\"median\")), \n",
" (\"scaler\", StandardScaler())]\n",
")\n",
"\n",
"cat_features = ['Pclass', 'Sex', 'Embarked']\n",
"categorical_transformer = OneHotEncoder(handle_unknown=\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "ddca5e7e",
"metadata": {},
"outputs": [],
"source": [
"preprocessor = ColumnTransformer(\n",
" transformers=[(\"numerical transformer\", numeric_transformer, num_features),\n",
" (\"categorical transformer\", categorical_transformer, cat_features)], \n",
" remainder='drop'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "e2f108f2",
"metadata": {},
"outputs": [],
"source": [
"pipe = make_pipeline(preprocessor, \n",
" SVC())"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "bde4caac",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-43e8648f-2592-42dd-a22b-691b111f99eb {color: black;background-color: white;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb pre{padding: 0;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-toggleable {background-color: white;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-estimator:hover {background-color: #d4ebff;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-item {z-index: 1;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-parallel-item:only-child::after {width: 0;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-43e8648f-2592-42dd-a22b-691b111f99eb div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-43e8648f-2592-42dd-a22b-691b111f99eb\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;columntransformer&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;numerical transformer&#x27;,\n",
" Pipeline(steps=[(&#x27;imputer&#x27;,\n",
" SimpleImputer(strategy=&#x27;median&#x27;)),\n",
" (&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;Age&#x27;, &#x27;SibSp&#x27;, &#x27;Parch&#x27;,\n",
" &#x27;Fare&#x27;]),\n",
" (&#x27;categorical transformer&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;),\n",
" [&#x27;Pclass&#x27;, &#x27;Sex&#x27;,\n",
" &#x27;Embarked&#x27;])])),\n",
" (&#x27;svc&#x27;, SVC())])</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"c6f14350-0c6b-4470-a996-38d16449b516\" type=\"checkbox\" ><label for=\"c6f14350-0c6b-4470-a996-38d16449b516\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;columntransformer&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;numerical transformer&#x27;,\n",
" Pipeline(steps=[(&#x27;imputer&#x27;,\n",
" SimpleImputer(strategy=&#x27;median&#x27;)),\n",
" (&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;Age&#x27;, &#x27;SibSp&#x27;, &#x27;Parch&#x27;,\n",
" &#x27;Fare&#x27;]),\n",
" (&#x27;categorical transformer&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;),\n",
" [&#x27;Pclass&#x27;, &#x27;Sex&#x27;,\n",
" &#x27;Embarked&#x27;])])),\n",
" (&#x27;svc&#x27;, SVC())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"bbcaba45-0e4e-402b-873a-6092aa113fa7\" type=\"checkbox\" ><label for=\"bbcaba45-0e4e-402b-873a-6092aa113fa7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">columntransformer: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[(&#x27;numerical transformer&#x27;,\n",
" Pipeline(steps=[(&#x27;imputer&#x27;,\n",
" SimpleImputer(strategy=&#x27;median&#x27;)),\n",
" (&#x27;scaler&#x27;, StandardScaler())]),\n",
" [&#x27;Age&#x27;, &#x27;SibSp&#x27;, &#x27;Parch&#x27;, &#x27;Fare&#x27;]),\n",
" (&#x27;categorical transformer&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;),\n",
" [&#x27;Pclass&#x27;, &#x27;Sex&#x27;, &#x27;Embarked&#x27;])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"fe6bcdb5-01bc-479e-a9a9-17cfc2f94c0d\" type=\"checkbox\" ><label for=\"fe6bcdb5-01bc-479e-a9a9-17cfc2f94c0d\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">numerical transformer</label><div class=\"sk-toggleable__content\"><pre>[&#x27;Age&#x27;, &#x27;SibSp&#x27;, &#x27;Parch&#x27;, &#x27;Fare&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"730c8910-6a7f-4e04-8854-9dcdf2649ce1\" type=\"checkbox\" ><label for=\"730c8910-6a7f-4e04-8854-9dcdf2649ce1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;median&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"5ac2bd0d-3dc7-4d6f-aa2e-88efd51440ba\" type=\"checkbox\" ><label for=\"5ac2bd0d-3dc7-4d6f-aa2e-88efd51440ba\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">StandardScaler</label><div class=\"sk-toggleable__content\"><pre>StandardScaler()</pre></div></div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"2a7409f7-988d-47f0-8507-5ae769d05813\" type=\"checkbox\" ><label for=\"2a7409f7-988d-47f0-8507-5ae769d05813\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">categorical transformer</label><div class=\"sk-toggleable__content\"><pre>[&#x27;Pclass&#x27;, &#x27;Sex&#x27;, &#x27;Embarked&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"c6ee8358-a81e-409c-abbc-81b8aa3cd3d9\" type=\"checkbox\" ><label for=\"c6ee8358-a81e-409c-abbc-81b8aa3cd3d9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;)</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"083eccc6-c392-4e85-8692-54a84707d7aa\" type=\"checkbox\" ><label for=\"083eccc6-c392-4e85-8692-54a84707d7aa\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC()</pre></div></div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('columntransformer',\n",
" ColumnTransformer(transformers=[('numerical transformer',\n",
" Pipeline(steps=[('imputer',\n",
" SimpleImputer(strategy='median')),\n",
" ('scaler',\n",
" StandardScaler())]),\n",
" ['Age', 'SibSp', 'Parch',\n",
" 'Fare']),\n",
" ('categorical transformer',\n",
" OneHotEncoder(handle_unknown='ignore'),\n",
" ['Pclass', 'Sex',\n",
" 'Embarked'])])),\n",
" ('svc', SVC())])"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "e45548cb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.8100558659217877"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe.fit(X_train, y_train)\n",
"y_pred = pipe.predict(X_test)\n",
"accuracy_score(y_pred, y_test)"
]
},
{
"cell_type": "markdown",
"id": "1ae32686",
"metadata": {},
"source": [
"## make_column_transformer"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "fc471b93",
"metadata": {},
"outputs": [],
"source": [
"preprocessor = make_column_transformer(\n",
" (numeric_transformer, num_features),\n",
" (categorical_transformer, cat_features)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "c9c6ca3b",
"metadata": {},
"outputs": [],
"source": [
"pipe = make_pipeline(preprocessor, \n",
" SVC())"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "0eed1cb2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 {color: black;background-color: white;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 pre{padding: 0;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-toggleable {background-color: white;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-estimator:hover {background-color: #d4ebff;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-item {z-index: 1;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-parallel-item:only-child::after {width: 0;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-8ba09fbd-7c5f-429a-aaa1-5ca252852533\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;columntransformer&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;pipeline&#x27;,\n",
" Pipeline(steps=[(&#x27;imputer&#x27;,\n",
" SimpleImputer(strategy=&#x27;median&#x27;)),\n",
" (&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;Age&#x27;, &#x27;SibSp&#x27;, &#x27;Parch&#x27;,\n",
" &#x27;Fare&#x27;]),\n",
" (&#x27;onehotencoder&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;),\n",
" [&#x27;Pclass&#x27;, &#x27;Sex&#x27;,\n",
" &#x27;Embarked&#x27;])])),\n",
" (&#x27;svc&#x27;, SVC())])</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"70a51e2d-fa6e-4366-90d2-9f75d6334e5c\" type=\"checkbox\" ><label for=\"70a51e2d-fa6e-4366-90d2-9f75d6334e5c\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;columntransformer&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;pipeline&#x27;,\n",
" Pipeline(steps=[(&#x27;imputer&#x27;,\n",
" SimpleImputer(strategy=&#x27;median&#x27;)),\n",
" (&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;Age&#x27;, &#x27;SibSp&#x27;, &#x27;Parch&#x27;,\n",
" &#x27;Fare&#x27;]),\n",
" (&#x27;onehotencoder&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;),\n",
" [&#x27;Pclass&#x27;, &#x27;Sex&#x27;,\n",
" &#x27;Embarked&#x27;])])),\n",
" (&#x27;svc&#x27;, SVC())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"dfce7075-38e4-4299-93cc-61846212b48d\" type=\"checkbox\" ><label for=\"dfce7075-38e4-4299-93cc-61846212b48d\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">columntransformer: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[(&#x27;pipeline&#x27;,\n",
" Pipeline(steps=[(&#x27;imputer&#x27;,\n",
" SimpleImputer(strategy=&#x27;median&#x27;)),\n",
" (&#x27;scaler&#x27;, StandardScaler())]),\n",
" [&#x27;Age&#x27;, &#x27;SibSp&#x27;, &#x27;Parch&#x27;, &#x27;Fare&#x27;]),\n",
" (&#x27;onehotencoder&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;),\n",
" [&#x27;Pclass&#x27;, &#x27;Sex&#x27;, &#x27;Embarked&#x27;])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"625428c4-a9e8-4d13-9368-006960e46e16\" type=\"checkbox\" ><label for=\"625428c4-a9e8-4d13-9368-006960e46e16\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">pipeline</label><div class=\"sk-toggleable__content\"><pre>[&#x27;Age&#x27;, &#x27;SibSp&#x27;, &#x27;Parch&#x27;, &#x27;Fare&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"6314900e-99c4-4060-a8ca-cc2cc2b8352e\" type=\"checkbox\" ><label for=\"6314900e-99c4-4060-a8ca-cc2cc2b8352e\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;median&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"2d83a64f-e4da-4790-8494-44a880462cdc\" type=\"checkbox\" ><label for=\"2d83a64f-e4da-4790-8494-44a880462cdc\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">StandardScaler</label><div class=\"sk-toggleable__content\"><pre>StandardScaler()</pre></div></div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"4074c4fd-2d52-4ec1-8b15-0a35bb7adbd6\" type=\"checkbox\" ><label for=\"4074c4fd-2d52-4ec1-8b15-0a35bb7adbd6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">onehotencoder</label><div class=\"sk-toggleable__content\"><pre>[&#x27;Pclass&#x27;, &#x27;Sex&#x27;, &#x27;Embarked&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"91bea2d4-8523-4b67-a1d3-6c95d5ee861b\" type=\"checkbox\" ><label for=\"91bea2d4-8523-4b67-a1d3-6c95d5ee861b\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;)</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"0090d5c7-9a1c-48de-83d6-951d59aba8be\" type=\"checkbox\" ><label for=\"0090d5c7-9a1c-48de-83d6-951d59aba8be\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC()</pre></div></div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('columntransformer',\n",
" ColumnTransformer(transformers=[('pipeline',\n",
" Pipeline(steps=[('imputer',\n",
" SimpleImputer(strategy='median')),\n",
" ('scaler',\n",
" StandardScaler())]),\n",
" ['Age', 'SibSp', 'Parch',\n",
" 'Fare']),\n",
" ('onehotencoder',\n",
" OneHotEncoder(handle_unknown='ignore'),\n",
" ['Pclass', 'Sex',\n",
" 'Embarked'])])),\n",
" ('svc', SVC())])"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "c1482a42",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.8100558659217877"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe.fit(X_train, y_train)\n",
"y_pred = pipe.predict(X_test)\n",
"accuracy_score(y_pred, y_test)"
]
},
{
"cell_type": "markdown",
"id": "55aa153e",
"metadata": {},
"source": [
"## make_column_selector"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "dc63facd",
"metadata": {},
"outputs": [],
"source": [
"preprocessor = make_column_transformer(\n",
" (numeric_transformer, make_column_selector(dtype_include=np.number)),\n",
" (categorical_transformer, make_column_selector(dtype_include=\"object\"))\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "0c6dee08",
"metadata": {},
"outputs": [],
"source": [
"pipe = make_pipeline(preprocessor, \n",
" SVC())"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "17263712",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae {color: black;background-color: white;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae pre{padding: 0;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-toggleable {background-color: white;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-estimator:hover {background-color: #d4ebff;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-item {z-index: 1;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-parallel-item:only-child::after {width: 0;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-5c086a25-8d96-4007-9dc8-3be92f2455ae div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-5c086a25-8d96-4007-9dc8-3be92f2455ae\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;columntransformer&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;pipeline&#x27;,\n",
" Pipeline(steps=[(&#x27;imputer&#x27;,\n",
" SimpleImputer(strategy=&#x27;median&#x27;)),\n",
" (&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" &lt;sklearn.compose._column_transformer.make_column_selector object at 0x000002678C369310&gt;),\n",
" (&#x27;onehotencoder&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;),\n",
" &lt;sklearn.compose._column_transformer.make_column_selector object at 0x000002678C134E80&gt;)])),\n",
" (&#x27;svc&#x27;, SVC())])</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"283bcae6-f76e-4b38-bbc5-b22b0b7169f0\" type=\"checkbox\" ><label for=\"283bcae6-f76e-4b38-bbc5-b22b0b7169f0\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;columntransformer&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;pipeline&#x27;,\n",
" Pipeline(steps=[(&#x27;imputer&#x27;,\n",
" SimpleImputer(strategy=&#x27;median&#x27;)),\n",
" (&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" &lt;sklearn.compose._column_transformer.make_column_selector object at 0x000002678C369310&gt;),\n",
" (&#x27;onehotencoder&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;),\n",
" &lt;sklearn.compose._column_transformer.make_column_selector object at 0x000002678C134E80&gt;)])),\n",
" (&#x27;svc&#x27;, SVC())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"8a13b493-fc33-4561-84ad-8ed6f895d270\" type=\"checkbox\" ><label for=\"8a13b493-fc33-4561-84ad-8ed6f895d270\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">columntransformer: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[(&#x27;pipeline&#x27;,\n",
" Pipeline(steps=[(&#x27;imputer&#x27;,\n",
" SimpleImputer(strategy=&#x27;median&#x27;)),\n",
" (&#x27;scaler&#x27;, StandardScaler())]),\n",
" &lt;sklearn.compose._column_transformer.make_column_selector object at 0x000002678C369310&gt;),\n",
" (&#x27;onehotencoder&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;),\n",
" &lt;sklearn.compose._column_transformer.make_column_selector object at 0x000002678C134E80&gt;)])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"b3c8ff65-e689-419c-a924-a12e9dd5e250\" type=\"checkbox\" ><label for=\"b3c8ff65-e689-419c-a924-a12e9dd5e250\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">pipeline</label><div class=\"sk-toggleable__content\"><pre>&lt;sklearn.compose._column_transformer.make_column_selector object at 0x000002678C369310&gt;</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"71bf3800-c1a0-4447-819f-2f22744e61d5\" type=\"checkbox\" ><label for=\"71bf3800-c1a0-4447-819f-2f22744e61d5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;median&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"a94bc936-bfb8-4a15-bf3c-ee2d3c891e31\" type=\"checkbox\" ><label for=\"a94bc936-bfb8-4a15-bf3c-ee2d3c891e31\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">StandardScaler</label><div class=\"sk-toggleable__content\"><pre>StandardScaler()</pre></div></div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"51246ca3-d5f1-4f9e-b071-1d73e7109499\" type=\"checkbox\" ><label for=\"51246ca3-d5f1-4f9e-b071-1d73e7109499\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">onehotencoder</label><div class=\"sk-toggleable__content\"><pre>&lt;sklearn.compose._column_transformer.make_column_selector object at 0x000002678C134E80&gt;</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"0a1deb44-5c8e-4126-b09a-fc5e64b1e21c\" type=\"checkbox\" ><label for=\"0a1deb44-5c8e-4126-b09a-fc5e64b1e21c\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;)</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"88ff1ac4-7fbe-499e-b0b3-8f3b12679e8b\" type=\"checkbox\" ><label for=\"88ff1ac4-7fbe-499e-b0b3-8f3b12679e8b\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC()</pre></div></div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('columntransformer',\n",
" ColumnTransformer(transformers=[('pipeline',\n",
" Pipeline(steps=[('imputer',\n",
" SimpleImputer(strategy='median')),\n",
" ('scaler',\n",
" StandardScaler())]),\n",
" <sklearn.compose._column_transformer.make_column_selector object at 0x000002678C369310>),\n",
" ('onehotencoder',\n",
" OneHotEncoder(handle_unknown='ignore'),\n",
" <sklearn.compose._column_transformer.make_column_selector object at 0x000002678C134E80>)])),\n",
" ('svc', SVC())])"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "33614385",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.8100558659217877"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe.fit(X_train, y_train)\n",
"y_pred = pipe.predict(X_test)\n",
"accuracy_score(y_pred, y_test)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment