Skip to content

Instantly share code, notes, and snippets.

@nelvadas
Created March 14, 2024 18:03
Show Gist options
  • Save nelvadas/57240b26573b439ceeeffa38415b17f4 to your computer and use it in GitHub Desktop.
Save nelvadas/57240b26573b439ceeeffa38415b17f4 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f93ce335",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd \n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4a516c8d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>BodyMassIndex</th>\n",
" <th>Childrens</th>\n",
" <th>Smoke</th>\n",
" <th>Department</th>\n",
" <th>InsurancePremium</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18</td>\n",
" <td>1.0</td>\n",
" <td>33.770</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1725.55230</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>28</td>\n",
" <td>1.0</td>\n",
" <td>33.000</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>4449.46200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>33</td>\n",
" <td>1.0</td>\n",
" <td>22.705</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>21984.47061</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>32</td>\n",
" <td>1.0</td>\n",
" <td>28.880</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>3866.85520</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>31</td>\n",
" <td>0.0</td>\n",
" <td>25.740</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>3756.62160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1332</th>\n",
" <td>50</td>\n",
" <td>1.0</td>\n",
" <td>30.970</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>10600.54830</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1333</th>\n",
" <td>18</td>\n",
" <td>0.0</td>\n",
" <td>31.920</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>2205.98080</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1334</th>\n",
" <td>18</td>\n",
" <td>0.0</td>\n",
" <td>36.850</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1629.83350</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1335</th>\n",
" <td>21</td>\n",
" <td>0.0</td>\n",
" <td>25.800</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2007.94500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1336</th>\n",
" <td>61</td>\n",
" <td>0.0</td>\n",
" <td>29.070</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>29141.36030</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1337 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" Age Gender BodyMassIndex Childrens Smoke Department \\\n",
"0 18 1.0 33.770 1.0 0.0 1.0 \n",
"1 28 1.0 33.000 3.0 0.0 1.0 \n",
"2 33 1.0 22.705 0.0 0.0 3.0 \n",
"3 32 1.0 28.880 0.0 0.0 3.0 \n",
"4 31 0.0 25.740 0.0 0.0 1.0 \n",
"... ... ... ... ... ... ... \n",
"1332 50 1.0 30.970 3.0 0.0 3.0 \n",
"1333 18 0.0 31.920 0.0 0.0 2.0 \n",
"1334 18 0.0 36.850 0.0 0.0 1.0 \n",
"1335 21 0.0 25.800 0.0 0.0 0.0 \n",
"1336 61 0.0 29.070 0.0 1.0 3.0 \n",
"\n",
" InsurancePremium \n",
"0 1725.55230 \n",
"1 4449.46200 \n",
"2 21984.47061 \n",
"3 3866.85520 \n",
"4 3756.62160 \n",
"... ... \n",
"1332 10600.54830 \n",
"1333 2205.98080 \n",
"1334 1629.83350 \n",
"1335 2007.94500 \n",
"1336 29141.36030 \n",
"\n",
"[1337 rows x 7 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"headers=[\"Age\",\"Gender\",\"BodyMassIndex\",\"Childrens\",\"Smoke\",\"Department\",\"InsurancePremium\"]\n",
"df = pd.read_csv(\"../data/insurance.csv\")\n",
"df.columns=headers\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "54ff43c5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Age Gender BodyMassIndex Childrens Smoke Department\n",
"0 18 1.0 33.770 1.0 0.0 1.0\n",
"1 28 1.0 33.000 3.0 0.0 1.0\n",
"2 33 1.0 22.705 0.0 0.0 3.0\n",
"3 32 1.0 28.880 0.0 0.0 3.0\n",
"4 31 0.0 25.740 0.0 0.0 1.0\n",
"... ... ... ... ... ... ...\n",
"1332 50 1.0 30.970 3.0 0.0 3.0\n",
"1333 18 0.0 31.920 0.0 0.0 2.0\n",
"1334 18 0.0 36.850 0.0 0.0 1.0\n",
"1335 21 0.0 25.800 0.0 0.0 0.0\n",
"1336 61 0.0 29.070 0.0 1.0 3.0\n",
"\n",
"[1337 rows x 6 columns]\n",
"0 1725.55230\n",
"1 4449.46200\n",
"2 21984.47061\n",
"3 3866.85520\n",
"4 3756.62160\n",
" ... \n",
"1332 10600.54830\n",
"1333 2205.98080\n",
"1334 1629.83350\n",
"1335 2007.94500\n",
"1336 29141.36030\n",
"Name: InsurancePremium, Length: 1337, dtype: float64\n"
]
}
],
"source": [
"# Model Variable and Target Var\n",
"\n",
"X = df[headers[:-1]]\n",
"Y = df['InsurancePremium']\n",
"print(X)\n",
"print(Y)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "35e32b65",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>"
],
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Linear Model and Train test Split \n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression\n",
"\n",
"X_train, X_test, Y_train,Y_test= train_test_split(X,Y,test_size=0.95,random_state=77)\n",
"\n",
"insurancePriceModel = LinearRegression() \n",
"insurancePriceModel.fit(X_train,Y_train)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "cf4fd7cc",
"metadata": {},
"outputs": [],
"source": [
"# Prediction\n",
"Y_pred = insurancePriceModel.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "51df0f70",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"27 2775.19215\n",
"38 48173.36100\n",
"441 1149.39590\n",
"1067 14349.85440\n",
"1309 6940.90985\n",
" ... \n",
"1122 18903.49141\n",
"1217 41661.60200\n",
"737 36189.10170\n",
"1316 1163.46270\n",
"287 47896.79135\n",
"Name: InsurancePremium, Length: 1271, dtype: float64\n",
"[ 612.8220659 33939.39784935 4227.92465756 ... 26509.33288263\n",
" 6622.85779678 36819.5920642 ]\n"
]
}
],
"source": [
"print(Y_test)\n",
"print(Y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f5513fbf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean Squared Root (RMSE): 6670.751445826781\n",
"R-squared (R2): 0.699989995369985\n"
]
}
],
"source": [
"# Model Evaluation R2 and MSE\n",
"\n",
"from sklearn.metrics import mean_squared_error, r2_score\n",
"\n",
"mse = mean_squared_error(Y_test, Y_pred)\n",
"r2 = r2_score(Y_test, Y_pred)\n",
"\n",
"print(f\"Mean Squared Root (RMSE): {np.sqrt(mse)}\")\n",
"print(f\"R-squared (R2): {r2}\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8d29a94a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>BodyMassIndex</th>\n",
" <th>Childrens</th>\n",
" <th>Smoke</th>\n",
" <th>Department</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>25</td>\n",
" <td>1.0</td>\n",
" <td>33</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Age Gender BodyMassIndex Childrens Smoke Department\n",
"0 25 1.0 33 0 0 3"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a single item\n",
"items = {\"Age\": [25],\"Gender\":[1.0],\"BodyMassIndex\":[33],\"Childrens\":[0],\"Smoke\":[0],\"Department\":[3] }\n",
"X_item= pd.DataFrame(items)\n",
"X_item"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f260fae8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[3291.38988939]\n"
]
}
],
"source": [
"# Prediction for a single item \n",
"Y_item = insurancePriceModel.predict(X_item)\n",
"print(Y_item)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "62d41c6f",
"metadata": {},
"outputs": [],
"source": [
"#Export ML Model to PMML \n",
"\n",
"from sklearn2pmml import PMMLPipeline, sklearn2pmml\n",
"\n",
"\n",
"# Create a PMMLPipeline with your Insurance model\n",
"pipeline = PMMLPipeline([\n",
" (\"regressor\", insurancePriceModel)\n",
"])\n",
"\n",
"# Export the model to a PMML file\n",
"sklearn2pmml(pipeline, \"insurancePriceModel.pmml\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment