Skip to content

Instantly share code, notes, and snippets.

@fhiyo
Last active February 16, 2018 09:58
Show Gist options
  • Save fhiyo/58efcfd245c238e00d0cd6eace911893 to your computer and use it in GitHub Desktop.
Save fhiyo/58efcfd245c238e00d0cd6eace911893 to your computer and use it in GitHub Desktop.
KaggleのTitanicチュートリアルを試してみた結果のjupyter notebook
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# titanicのチュートリアル問題\n",
"\n",
"18/01/21\n",
"今回はfeature engineeringではなく,モデル構築を色々やっていく."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- [Titanic Top 4% with ensemble modeling | Kaggle](https://www.kaggle.com/yassineghouzam/titanic-top-4-with-ensemble-modeling)\n",
"\n",
"を参考にしながら書いている."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
"from collections import Counter\n",
"\n",
"sns.set(style='white', context='notebook', palette='deep')\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
" \"This module will be removed in 0.20.\", DeprecationWarning)\n"
]
}
],
"source": [
"# modelingの関数をimport\n",
"\n",
"import xgboost as xgb\n",
"\n",
"from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier, VotingClassifier\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.neural_network import MLPClassifier\n",
"from sklearn.svm import SVC\n",
"from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold, learning_curve"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Load training and test data\n",
"train_df = pd.read_csv(\"../input/train.csv\", header=0)\n",
"test_df = pd.read_csv(\"../input/test.csv\", header=0)\n",
"# trainとtestで同じ処理するのが煩わしくなったからまとめて,学習のときに分ける\n",
"test_df_ids = test_df.PassengerId\n",
"train_count = len(train_df)\n",
"dataset_df = pd.concat(objs=[train_df, test_df], axis=0).reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Age</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" <th>Fare</th>\n",
" <th>Name</th>\n",
" <th>Parch</th>\n",
" <th>PassengerId</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>SibSp</th>\n",
" <th>Survived</th>\n",
" <th>Ticket</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>22.0</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>7.2500</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>A/5 21171</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>38.0</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" <td>71.2833</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>PC 17599</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>26.0</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>7.9250</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>female</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>35.0</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" <td>53.1000</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>113803</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>35.0</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>8.0500</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>373450</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Age Cabin Embarked Fare \\\n",
"0 22.0 NaN S 7.2500 \n",
"1 38.0 C85 C 71.2833 \n",
"2 26.0 NaN S 7.9250 \n",
"3 35.0 C123 S 53.1000 \n",
"4 35.0 NaN S 8.0500 \n",
"\n",
" Name Parch PassengerId \\\n",
"0 Braund, Mr. Owen Harris 0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... 0 2 \n",
"2 Heikkinen, Miss. Laina 0 3 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 0 4 \n",
"4 Allen, Mr. William Henry 0 5 \n",
"\n",
" Pclass Sex SibSp Survived Ticket \n",
"0 3 male 1 0.0 A/5 21171 \n",
"1 1 female 1 1.0 PC 17599 \n",
"2 3 female 0 1.0 STON/O2. 3101282 \n",
"3 1 female 1 1.0 113803 \n",
"4 3 male 0 0.0 373450 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Age 263\n",
"Cabin 1014\n",
"Embarked 2\n",
"Fare 1\n",
"Name 0\n",
"Parch 0\n",
"PassengerId 0\n",
"Pclass 0\n",
"Sex 0\n",
"SibSp 0\n",
"Survived 418\n",
"Ticket 0\n",
"dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 欠損値が各属性にどれだけあるかを見る.\n",
"# Survivedが欠損しているのはtest_df由来のデータ.\n",
"dataset_df.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Fare</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>891.000000</td>\n",
" <td>891.000000</td>\n",
" <td>891.000000</td>\n",
" <td>714.000000</td>\n",
" <td>891.000000</td>\n",
" <td>891.000000</td>\n",
" <td>891.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>446.000000</td>\n",
" <td>0.383838</td>\n",
" <td>2.308642</td>\n",
" <td>29.699118</td>\n",
" <td>0.523008</td>\n",
" <td>0.381594</td>\n",
" <td>32.204208</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>257.353842</td>\n",
" <td>0.486592</td>\n",
" <td>0.836071</td>\n",
" <td>14.526497</td>\n",
" <td>1.102743</td>\n",
" <td>0.806057</td>\n",
" <td>49.693429</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.420000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>223.500000</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>20.125000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>7.910400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>446.000000</td>\n",
" <td>0.000000</td>\n",
" <td>3.000000</td>\n",
" <td>28.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>14.454200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>668.500000</td>\n",
" <td>1.000000</td>\n",
" <td>3.000000</td>\n",
" <td>38.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>31.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>891.000000</td>\n",
" <td>1.000000</td>\n",
" <td>3.000000</td>\n",
" <td>80.000000</td>\n",
" <td>8.000000</td>\n",
" <td>6.000000</td>\n",
" <td>512.329200</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass Age SibSp \\\n",
"count 891.000000 891.000000 891.000000 714.000000 891.000000 \n",
"mean 446.000000 0.383838 2.308642 29.699118 0.523008 \n",
"std 257.353842 0.486592 0.836071 14.526497 1.102743 \n",
"min 1.000000 0.000000 1.000000 0.420000 0.000000 \n",
"25% 223.500000 0.000000 2.000000 20.125000 0.000000 \n",
"50% 446.000000 0.000000 3.000000 28.000000 0.000000 \n",
"75% 668.500000 1.000000 3.000000 38.000000 1.000000 \n",
"max 891.000000 1.000000 3.000000 80.000000 8.000000 \n",
"\n",
" Parch Fare \n",
"count 891.000000 891.000000 \n",
"mean 0.381594 32.204208 \n",
"std 0.806057 49.693429 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 7.910400 \n",
"50% 0.000000 14.454200 \n",
"75% 0.000000 31.000000 \n",
"max 6.000000 512.329200 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"### Summarize data\n",
"# Summarie and statistics\n",
"train_df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# feature analysisは後回し."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Convert \"Sex\" to be a dummy variable (female = 0, Male = 1)\n",
"dataset_df[\"Sex\"] = dataset_df[\"Sex\"].map({\"female\": 0, \"male\": 1}).astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Mr\n",
"1 Mrs\n",
"2 Miss\n",
"3 Mrs\n",
"4 Mr\n",
"Name: Title, dtype: object"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 前回同様,敬称からAgeの欠損値を予測してみる\n",
"# Nameから敬称を抽出する.データを見る限り,\"名字 敬称 名前\"の順になっていたのでそのルールに沿って敬称を取得する\n",
"dataset_df_title = [i.split(\",\")[1].split(\".\")[0].strip() for i in dataset_df[\"Name\"]]\n",
"dataset_df[\"Title\"] = pd.Series(dataset_df_title)\n",
"dataset_df[\"Title\"].head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x111766828>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# 敬称の頻度分布を可視化\n",
"g = sns.countplot(x=\"Title\",data=dataset_df)\n",
"g = plt.setp(g.get_xticklabels(), rotation=45) "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# 敬称をカテゴリデータに変換する.本当はonehot-vectorの形にしたほうがよさそう\n",
"dataset_df[\"Title\"] = dataset_df[\"Title\"].replace(['Lady', 'the Countess','Countess','Capt', 'Col','Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')\n",
"dataset_df[\"Title\"] = dataset_df[\"Title\"].map({\"Master\":0, \"Miss\":1, \"Mme\":1, \"Mlle\":1, \"Ms\" : 2 , \"Mrs\":2, \"Mr\":3, \"Rare\":4})\n",
"dataset_df[\"Title\"] = dataset_df[\"Title\"].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEFCAYAAADuT+DpAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFRdJREFUeJzt3XuQXGWZx/HvTCCgkOCiaHABQdx9LF2Lu4kIZMBguKhRLFcEV4mFLGtQULfwFougsrUKYgHCwnKLl4ByUVQEjCUkhnBV2RUKeJC7KJQEDImwEJPM/nHOmM4wGXpCTncm7/dTleL0e053P30407/znsvbPf39/UiSytXb7QIkSd1lEEhS4QwCSSqcQSBJhduo2wWMVERsAuwBPAqs6HI5kjRajAG2Bm7NzOdaZ4y6IKAKgQXdLkKSRqm9getbG0ZjEDwKMGfOHCZMmNDtWiRpVHjsscc4/PDDof4ObTUag2AFwIQJE9hmm226XYskjTbPO6TuyWJJKpxBIEmFMwgkqXAGgSQVziCQpMIZBJJUOINAkgpnEEhS4UbjDWWS1tJ/fOHSbpfQiM+f9L5ulzCq2SOQpMIZBJJUOINAkgpnEEhS4QwCSSqcQSBJhTMIJKlwBoEkFc4gkKTCGQSSVDiDQJIKZxBIUuEaG3QuIo4AjqgfbgrsDPQBpwHLgbmZeWJE9AJnATsBzwFHZua9TdUlSVpdY0GQmbOB2QARcSZwAXA28F7gfuCnEbELsAOwaWa+JSImAV8HpjVVlyRpdY0fGoqI3YE3At8DNsnM+zKzH/gZMAXYC7gGIDNvAnZvuiZJ0iqdOEfweeBEYDywpKV9KbBF3f5US/uKiPB3EiSpQxoNgoh4GRCZeR1VCIxrmT0OWDxEe29mLm+yLknSKk33CPYBfgGQmUuAZRGxY0T0AFOBBcBC4CCA+hzB7Q3XJElq0fQhmKA6MTzgaGAOMIbqqqGbI+JWYP+IuAHoAaY3XJMkqUWjQZCZJw96fBMwaVDbSqqAkCR1gTeUSVLhDAJJKpxBIEmFMwgkqXAGgSQVziCQpMIZBJJUOINAkgpnEEhS4QwCSSqcQSBJhTMIJKlwBoEkFc4gkKTCGQSSVDiDQJIKZxBIUuEMAkkqnEEgSYVr9DeLI+JzwLuAscBZwHxgNtAP3AHMyMyVEXECcDCwHDguM29psi5J0iqN9Qgiog/YE3grMBnYFjgVmJmZewM9wLSI2LWePxE4FDizqZokSc/X5KGhqcDtwA+BnwBXArtR9QoArgamAHsBczOzPzMfBjaKiK0arEuS1KLJQ0OvAF4DvAPYAfgx0JuZ/fX8pcAWwHjgiZbnDbQ/3mBtkqRak0HwBHB3Zi4DMiKepTo8NGAcsBhYUk8PbpckdUCTh4auBw6IiJ6IeDWwGfCL+twBwIHAAmAhMDUieiNiO6pew6IG65IktWisR5CZV0bEPsAtVIEzA3gAODcixgJ3AZdl5oqIWADc2LKcJKlDGr18NDOPH6J58hDLzQJmNVmLJGlo3lAmSYUzCCSpcAaBJBXOIJCkwhkEklQ4g0CSCmcQSFLhDAJJKpxBIEmFMwgkqXAGgSQVziCQpMIZBJJUOINAkgpnEEhS4QwCSSqcQSBJhTMIJKlwBoEkFa7R3yyOiN8AS+qHDwDnAKcBy4G5mXliRPQCZwE7Ac8BR2bmvU3WJUlapbEgiIhNgZ7M7Gtp+x/gvcD9wE8jYhdgB2DTzHxLREwCvg5Ma6ouSdLqmuwR7AS8NCLm1u8zC9gkM+8DiIifAVOArYFrADLzpojYvcGaJEmDNHmO4BngFGAqcDRwYd02YCmwBTAeeKqlfUVENHrISpK0SpNfuPcA92ZmP3BPRDwFbNkyfxywGHhpPT2gNzOXN1iXJKlFkz2Cj1Ad7yciXk31hf90ROwYET1UPYUFwELgoHq5ScDtDdYkSRqkyR7B+cDsiLge6KcKhpXAHGAM1VVDN0fErcD+EXED0ANMb7AmSdIgjQVBZi4DDhti1qRBy62kOocgSeoCbyiTpMIZBJJUOINAkgpnEEhS4QwCSSqcQSBJhTMIJKlwBoEkFc4gkKTCGQSSVDiDQJIKZxBIUuHaCoKIOGOItm+t+3IkSZ027OijEXEe8Fpg94h4Y8usjal+XUySNMq90DDUXwG2B04DTmxpXw7c1VBNkqQOGjYIMvNB4EFgp4gYT9UL6Klnbw482WRxkqTmtfXDNBHxOeBzwBMtzf1Uh40kSaNYu79QdiSwY2Y+3mQxkqTOa/fy0YfxMJAkbZDa7RH8Drg+Iq4Dnh1ozMwvNVKVJKlj2g2CP9T/YNXJ4hcUEa8Efg3sT3Wl0Wyqcwt3ADMyc2VEnAAcXM8/LjNvaff1JUkvXltBkJknvvBSq4uIjYFzgP+rm04FZmbmvIg4G5gWEQ8Bk4GJwLbA5cAeI30vSdLaa/eqoZVUe/Kt/piZ2w7ztFOAs6muNgLYDZhfT18NvB1IYG5m9gMPR8RGEbGVJ6UlqXPaOlmcmb2ZOSYzxwCbAocCl65p+Yg4Ang8M3/W0txTf+EDLKW6J2E88FTLMgPtkqQOafccwd9k5l+BSyPiC8Ms9hGgPyKmADsD3wZe2TJ/HLAYWFJPD26XJHVIu4eGPtTysAd4I7BsTctn5j4tz50HHA2cHBF9mTkPOBC4DrgX+FpEnAJsA/Rm5qIRfgZJ0ovQbo9g35bpfmAR8P4RvtengXMjYizVOEWXZeaKiFgA3Eh1mGrGCF9TkvQitXvV0PT6KqCon3NHZi5v87l9LQ8nDzF/FjCrndeSJK177f4ewW5UN5V9C7iQ6gqfiU0WJknqjHYPDZ0OvD8zbwaIiEnAGcCbmypMktQZ7Y41tPlACABk5k1Ul5FKkka5doPgyYiYNvAgIt7N6kNSS5JGqXYPDR0FXBkR51NdPtoP7NlYVZKkjmm3R3Ag8AzwGqpLSR8H+hqqSZLUQe0GwVHAWzPz6cz8LdW4QR9vrixJUqe0GwQbs/qdxMt4/iB0kqRRqN1zBFcA10bEJfXjQ4AfNVOSJKmT2h199DNU9xIE1Q/Wn56ZX2yyMElSZ7Q9+mhmXgZc1mAtkqQuaPccgSRpA2UQSFLhDAJJKpxBIEmFMwgkqXAGgSQVziCQpMIZBJJUuLZvKBupiBgDnEt1N3I/cDTwLDC7fnwHMCMzV0bECcDBwHLguMy8pam6JEmra7JH8E6AzHwrMBM4CTgVmJmZe1P9rsG0iNiV6kftJwKHAmc2WJMkaZDGgiAzr6Aavhqq3zFYTDV89fy67WpgCrAXMDcz+zPzYWCjiNiqqbokSatr9BxBZi6PiG9R/dD9HKAnMweGr14KbAGMB55qedpAuySpAxo/WZyZHwb+kep8wUtaZo2j6iUsqacHt0uSOqCxIIiIf4mIz9UPnwFWAr+KiL667UBgAbAQmBoRvRGxHdCbmYuaqkuStLrGrhoCfgBcGBG/pPqFs+OAu4BzI2JsPX1ZZq6IiAXAjVTBNKPBmiRJgzQWBJn5NPDPQ8yaPMSys4BZTdUiSVozbyiTpMIZBJJUOINAkgpnEEhS4QwCSSpck5ePqouOuPDYbpfQiNnTT+t2CdIGxx6BJBXOIJCkwhkEklQ4g0CSCmcQSFLhDAJJKpxBIEmFMwgkqXAGgSQVziCQpMIZBJJUOINAkgpnEEhS4RoZfTQiNgYuALYHNgG+AtwJzAb6gTuAGZm5MiJOAA4GlgPHZeYtTdQkSRpaUz2CDwJPZObewAHAN4FTgZl1Ww8wLSJ2pfox+4nAocCZDdUjSVqDpoLgUuCL9XQP1d7+bsD8uu1qYAqwFzA3M/sz82Fgo4jYqqGaJElDaOTQUGb+BSAixgGXATOBUzKzv15kKbAFMB54ouWpA+2PN1GXynTVh6Z3u4RGHPTtC7tdgjYQjZ0sjohtgeuA72TmRcDKltnjgMXAknp6cLskqUMaCYKIeBUwF/hMZl5QN98WEX319IHAAmAhMDUieiNiO6A3Mxc1UZMkaWhN/Wbx54G/A74YEQPnCo4FTo+IscBdwGWZuSIiFgA3UoXSjIbqkSStQVPnCI6l+uIfbPIQy84CZjVRhyTphXlDmSQVziCQpMIZBJJUOINAkgpnEEhS4QwCSSqcQSBJhTMIJKlwBoEkFc4gkKTCGQSSVDiDQJIKZxBIUuEMAkkqnEEgSYUzCCSpcAaBJBXOIJCkwhkEklQ4g0CSCtfIj9cPiIiJwFczsy8iXgfMBvqBO4AZmbkyIk4ADgaWA8dl5i1N1iRJWl1jPYKIOB44D9i0bjoVmJmZewM9wLSI2BWYDEwEDgXObKoeSdLQmjw0dB9wSMvj3YD59fTVwBRgL2BuZvZn5sPARhGxVYM1SZIGaSwIMvNy4K8tTT2Z2V9PLwW2AMYDT7UsM9AuSeqQTp4sXtkyPQ5YDCyppwe3S5I6pJNBcFtE9NXTBwILgIXA1IjojYjtgN7MXNTBmiSpeI1eNTTIp4FzI2IscBdwWWauiIgFwI1UoTSjg/VIkmg4CDLzQWBSPX0P1RVCg5eZBcxqsg5J0pp5Q5kkFc4gkKTCGQSSVDiDQJIKZxBIUuE6efloRxx2/Jxul7DOXfS1w7tdgqQNmD0CSSqcQSBJhTMIJKlwBoEkFc4gkKTCGQSSVDiDQJIKZxBIUuE2uBvKJKkdv7xyVrdLaMQ+75g14ufYI5CkwhkEklQ4g0CSCmcQSFLh1ouTxRHRC5wF7AQ8BxyZmfd2typJKsP60iN4N7BpZr4F+Czw9S7XI0nFWC96BMBewDUAmXlTROw+zLJjAB577LEhZz73zOJ1Xly3PfLIIyN+zrOLn2mgku5bm3Xx5HPPNlBJ963NuvjL039uoJLuW5t1sejJvzRQSfetaV20fGeOGTyvp7+/v8GS2hMR5wGXZ+bV9eOHgddm5vIhlt0LWNDhEiVpQ7F3Zl7f2rC+9AiWAONaHvcOFQK1W4G9gUeBFU0XJkkbiDHA1lTfoatZX4JgIfBO4JKImATcvqYFM/M54Po1zZckrdF9QzWuL0HwQ2D/iLgB6AGmd7keSSrGenGOQJLUPevL5aOSpC4xCCSpcAaBJBVufTlZ3DUR0QdcB3wgM7/X0v5b4DeZecQIXuuYzPzmOi/yhd+3j2E+AzA+Mw8Z4WvuSnXH93b1f19VX7E1MO/XwL6ZOe9F1N3WewDbA68Hzga+l5mT1vY926ipj7XYHiLiNOA0qqsyzsnMo1vmnQ68KzO3b6rubluXf0ejSf25LwHuBPqB8cD9wOGZuayLpY2IPYLK3cChAw8i4k3AZmvxOjPXWUUjt8bPMNIQqL0DuLKefhQ4sGXe4VQb+4vVifdYG2uzPbw2M+8HngD2iYiN6ueOAfZoqtD1zLr6Oxptrs3MvszcNzN3A/4KvKvbRY1E8T2C2v8CERFbZOZTwAeBOcB2EXEMcAjVBr0IeA/VHuqFwHKqMD0M+BCwZUScBRxLtff6D/X8mZk5LyLuAO4BlmXmoaxbw32GxzJzQkR8DPgwsBK4NTM/ERGHAJ+h2nj/CByamSuB3YEv1699MfAB4Ip6gMBdqW9KiYgjqO4BeQnVzSqnAdOAfwL+PTN/FBHvAz5FdQPg9Zn52fp123qPoUTEZOCk+jXvA/41M/+6dqvueYZblxcCr6s/72mZ+Z2IeANwV/3c5cA8YH/gauDtwM+ptg8iYh7wJ2BLYAZwAS3bUWb+fh19hm4Ybr09RBUUd2bmJ7tZZJMiYizV38Gf6xETtq0f/zgzZ0bEbODl9b+DgeOpbpAdA5yamZd2o257BKtcDhwSET3Am4EbqNbPy4EpmTmRKjj3oPojvwWYApwAbJGZJwFPZubHgCOBRZm5D9WX4pn1e2wOfLmBEBjuM7SaDhxTD+53V73X+gHg5Mzci2rvfHxEvAr4U2YOXFt8C/D6iNgM2I/qEECrcZl5EPBV4N+ogvMoYHpEbAmcCLytfo+/j4j91+I9/qb+fOcCh2TmZOAPwBHtr6a2DLUuxwH71J/vAFbd2d7aswG4iFV7xodRfRm2ujgzp1BtP6ttR+v4M3TDmrbBbamCbkMMgf0iYl5E3El1KPaHVDsnN2XmVKr1cHTL8tdm5p7AJGCH+u9iX+ALEfGyDtcOGAStBv5492HVWEYrgWXAxRFxPrANsDFwPrCYaqC8Y6j26Fq9CTio3vu7HNgoIl5Rz8sOf4ZW04EZETEfeA3VzXufotqQ5wN7Un3mg4GrBj33R1Shdhjw3UHzbqv/uxi4q/5y/zOwKdXe81bAVfX6eAOw41q8R6utqPayLqlf8+3151mXhlqXS4HjgP8Gvg9sUrfvSXV3/ICFwC4RMbDn99Cg1x7YBl5oOxqN1rQNLsrMJ7pTUuOuzcw+qj37ZcADwJPAHhExB/gGq7YVWPX//03AbvU2fA3Vd8v2nSl5dQZBrT6+uxnwCVZ9CY0H3p2Z7wc+TrW+eqi+rBZk5tuAS6kOrVDPg6oLfHG9cRxYL/NkPW9lhz9Dq48CR9d70btQfYEdBcyq23qoDn3tD8wd9NyLqA5vbF2/T6vh7kp8APg9sH+9Ps4AblqL92i1CHgEmFa/5knAtcMsP2JrWJdbA7tl5nuoguxrEbEVsCQzV7Q8t58q5P4LuGKIlx/YBta0HY1aw2yDjW3364s66D4InAd8ElicmYdTDav/0rqXBKvWxd3AdfU2vB/VSechh4BomkGwuu8D22bmPfXj5cDTEbGQ6jjvo8CrgV8BX4qIa6m6fGfUy98ZEd8FzqE6zDGfqmv8UH3cvRufodXtwIK67j8BN1MdmrgyIn4BTKDaMxmbmauN0ZuZd1Ptif9kJMVk5uPAqcD8iLiZKhjvfzHvUa/LY4Gf1sOSfAy4YyR1tWnwunwMmFC/58+BU6gC7ZohnjuH6oThcMd817QdjXbDbYMbtMy8Ezid6hzZARHxS6odgt9RfXe0+gnwl4hYQHWFXH9mLu1kvQMcYkKSCmePQJIKZxBIUuEMAkkqnEEgSYUzCCSpcA4xIbUhIs4E3gqMpbpJ7s561jlUl/2dXQ8/MSszH4qIB4G+zHywC+VKI2IQSG3IzBkAEbE9MC8zdx5isX2phtOQRhWDQHoRImJWPfks1Q1DV0XE3i3zxwAnA31UA4vNzsxvdLhMaVieI5DWgcz8T6rRWw8aNKbOR+v5u1INPjatNSik9YE9AqlZU4CdI2K/+vHmVIONDTUooNQVBoHUrDHA8Zn5A4B6FNqnu1uStDoPDUnrznKev3N1LfDRiNg4IjYHrgcmdrwyaRj2CKR150qqk8VTW9oGfqnuNqq/twtfzO88S01w9FFJKpyHhiSpcAaBJBXOIJCkwhkEklQ4g0CSCmcQSFLhDAJJKtz/AwaQAyuo0lj3AAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1125d4fd0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"g = sns.countplot(dataset_df[\"Title\"])\n",
"g = g.set_xticklabels([\"Master\",\"Miss/Mme/Mlle\", \"Ms/Mrs\",\"Mr\",\"Rare\"])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAARgAAAEYCAYAAACHjumMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFIpJREFUeJzt3XmUXGWZx/FvdychkRAx4LiCwKiPjEJQEIODSmhwQTHIMCMSNwaCqIAHPKOojIIKztFhcFpFXMEFUVw4JKi4sBsMMq4wyINRQBlFRXbopEm6549brUXT6b7p7rerq/h+zsnpukvdem5S/ct7l/e9XUNDQ0hSCd2tLkBS5zJgJBVjwEgqxoCRVMysVhdQR0TMAp4I3JKZ61tdj6R62iJgqMLlxosuuqjVdUgaXddoMz1EklSMASOpGANGUjEGjKRiDBhJxRgwkooxYCQVY8BIKsaAkVSMASOpGANG066vr4/e3l76+vpaXYoKM2A0rfr7+1mxYgUAK1eupL+/v8UVqSQDRtNqYGCA4WFaBwcHGRgYaHFFKsmAkVSMASOpGANGUjEGjKRiDBhJxRgwkooxYCQVY8BIKsaAkVSMASOpGANGUjEGjKRiij3ZMSK6gdOBRcA64PDMXNO0/K3AIcAgcEpmnleqFkmtUbIFcwAwNzP3AI4HTh1eEBFbAm8B9gBeCHy4YB2SWqRkwOwJXAiQmauB3ZqW3QfcDGze+DNYsA5JLVIyYBYAdzVNb4iI5kOy3wHXAT8BHNpM6kDFzsEAdwNbNE13Z+b6xuuXAI8Dtm9MfyciVmXmjwrWo0n61msPnfQ2+jdseND09990NPN6eia8vf0+f+ZkS1JBJVswq4D9ACJiMXBN07I7gH5gXWauBe4EtixYi6QWKNmCOQ/YNyKuBLqAQyPiOGBNZq6IiH2A1RExCPwA+F7BWiS1QLGAycxB4MgRs69vWv4e4D2lPl9S63mjnaRiDJg25vOFNNMZMG3K5wupHRgwbcrnC6kdGDCSijFgJBVjwEgqxoCRVIwBI6kYA0ZSMQaMpGIMGEnFGDCSinnYB4z9eaRyHtYBY38eqayHdcDYn0cq62EdMJLKMmA0rXq6uv76umvEtDqPAaNpNae7m102nw/Aos3nM6fbr2AnKznotzSq3i0X0rvlwlaXoWlgwLTA6898y6S3sWHd+gdNH3XOO+nZbOL/nGcd+t+TLUl6CNunkooxYCQVY8BIKsaAkVSMASOpGANGUjEGjKRiDBhJxRgwkooxYCQVY8BIKsaAkVSMASOpGANGUjEGjKRiDBhJxRgwkooxYNpUV3fTYNldI6alGcKAaVPds3uY/9RqXNv5T1lI9+yeFlckPZRj8raxR+3+eB61++NbXYa0UeMGTERcC3wO+EJm3lq+JEmdos4h0kuBucAlEfHNiDgoImYXrktSBxg3YDLz5sx8X2buCHwaOA34Q0R8OCK2Kl6hpLZV5xBpPnAQ8BrgCcDHgS8DLwa+A+y2kfd1A6cDi4B1wOGZuaZp+UuA91A9QfTHwJszc2gyOyNpZqlziHQj8ALgpMx8Wmaekpm/oQqa343xvgOAuZm5B3A8cOrwgojYAvgQ8LLMfA5wE7D1xHZB0kxV5yrSYZm5onlGRByYmd8AXjHG+/YELgTIzNUR0dzSeS5wDXBqROwAfDoz/7xppUua6TYaMBHxSmAz4L0RsWXTotnAO4BvjLPtBcBdTdMbImJWZq6naq0sAXYB7gWuiIgfZuYNE9gHSTPUWC2YBVQtjS2owmDYeuBdNbZ9d+O9w7ob4QLwF+Dq4cveEXE5VdgYMFIH2WjAZOangE9FRG9mXjSBba8C9gfOjYjFVIdEw34CPCMitgbuBBYDn5rAZ0iawcY6RPpkZh4BnBARD2mxZObe42z7PGDfiLiS6krRoRFxHLAmM1dExDuorkIBnJuZ105sFyTNVGMdIn2i8fPEiWw4MweBI0fMvr5p+ZepLndL6lBjBczmEfF8wHtTJE3IWAFz0hjLhoDxDpGKO+RtZ0/q/YPr1z5o+g0nfY3uWXMntc0vfXDZpN4vdZKxTvIu2dgySapj3JO8EXEJoxwm1TjJK2kG6Ovr4/zzz2fp0qUcc8wx0/rZxU7ySmq9/v5+VqyobsRfuXIly5cvZ968edP2+Rvti5SZP278vAy4A9gZ2BG4tTFP0gw3MDDA0FB1ADI4OMjAwMC0fv64nR0j4hjga8B2wFOBlRHxusJ1SeoAdTo7Lgd2zcx7ACLifcDlVKPcSdJG1Rmu4T7ggRHTazeyriT91VhXkd7dePkXYFVEfJmqo+NBwK+moTZJbW6sQ6ThB+38qPHzEY2f3y1XjqROMtaNdqPeyRsRXcD2xSqS1DHqjMl7FHAKsHnT7BuBJ5cqSlJnqHOS961UA3d/Bfh74DDgqpJFSeoMdQLmT5l5I/ALYKfMPAuIolVJ6gi1LlNHxBKqgNk/Ih4LPKpsWZI6QZ2AORp4OdUTAraiGjTqIyWLktQZxj3Jm5n/CxwbEQuAZZnpTXaSaqlzFWknqm4BTwKGIuJ64HWZ+evSxUlqb3UOkc4A3pWZW2Xm1lRPaPxs2bIkdYI6ATMvM789PJGZ51E9M0mSxjRWX6RtGy9/HhHHA5+h6ou0DLhiGmqT1ObGOgdzGdVQmV3AXsAbmpYNAdM79p6ktjNWXyT7G0malDpXkR4NfBTobax/MfDGzPxj4doktbk6J3k/AVwN7EA1bOZqqvMxkjSmOkNm7pCZBzZNfzAiXlOqIEmdo04LZigithmeaFxdemCM9SUJqNeC+XfghxFxFdUVpecARxStSlJHqBMwvwWeCexO1eI5MjP/VLQqSR2hTsB8JTN3BL5ZuhhJnaVOwFzXeMLAVUD/8MzMvLxYVZI6Qp2AWQgsafwZNgTsXaQiSR2jzngwSwAiYiGwITPvKl6VpI5Q507eRcDngScA3RHxS+C1jgcjaTx17oP5LNV4MFtn5kLgP4GzilYlqSPUCZiuzLxgeKIxHsz8ciVJ6hR1TvJeHhEnAJ+iGg/mYOCXw+PFZOZvC9YnqY3VCZiljZ+HjZg/PF7MDlNakaSOUecqkuPCSJqQOudgOldXT/PEiGlJk/WwDpjuntnMe/SOAMx79NPo7pnd4oqkzlLnHExHW7DtHizYdo9WlyF1pLGeKnAm1UncUWXmvxapSFLHGKsFc+lkNhwR3cDpwCJgHXB4Zq4ZZZ1vAudn5hmT+TxJM89YTxX43PDrRj+kzakGnOoB6lxZOgCYm5l7RMRiqidCLh2xzvuBR21q0ZLaw7gneSPiFOBGIIEfAGuAD9TY9p7AhQCZuRrYbcR2DwIGh9eR1HnqXEV6FbAN8BWqIRv2Af5c430LgOae1xsiYhZARDwDOAR49yZVK6mt1AmYP2Tm3cC1wKLMvAR4TI333Q1s0fxZmbm+8fq1VL2zLwZeDxwXES+uXbWktlDnMvVdjceU/Bg4OiJ+T73zJquA/YFzG+dgrhlekJlvG34dEScCt2amh0pSh6nTgjkM+LvMvBS4iepBbCfUeN95wNqIuBI4DTg2Io6LiJdPsFZJbaZOC+ZfgC8CZOZb6244MweBI0fMvn6U9U6su01J7aVOwDwBWB0RSRU038jM+8uWJakTjHuIlJn/1uhRfTKwGPhZRHyheGWS2l6tzo4R0QXMBuZQ3buyrmRRkjpDnUG/P0J1V+5PgbOBYzJzbenCJLW/OudgbgCelZl1bq6TpL8aqzf1EZn5SaoHr70xIh60PDPfW7g2SW1urBZM10ZeS1ItY/Wm/kTj5V3AOZn5x+kpSVKn8D4YaQa7/IITJ/X++/sfeND0D7/7QR4xb3JDwz7/ZSfWXtf7YCQV430wkoqpex/MUuBnVIdI3gcjqZY652D+COzqfTCSNlWdQ6RlhoukiajTgrkuIt4NXAX0D8/MzMuLVSWpI9QJmIVUY/EuaZo3BOxdpCJJHWPcgMnMJeOtI0mjqXMV6RJGecJjZtqCkTSmOodIJza9nk11yfqOItVI6ih1DpEuGzHr+xFxFT7TSNI46hwibds02QU8HdiqWEWSOkadQ6TmFswQcBtwdJlyJHWSOodIdR50L0kPUecQaXeqB9l/FLgAeCZwZGZ+vXBtktpcna4CfVSPjT2I6k7eXYHjSxYlqTPUCZjuxpWklwJfy8zfUu/cjaSHuToBc39EvJWqa8AFEfEW4J6yZUnqBLV6UwObA/+UmXcAjwcOKVqVpI5Q5yrS/wHvbZp+e9GKJHWMWkNmStJEGDCSijFgJBVjwEgqxoCRVIwBI6kYA0ZSMQaMpGIMGEnFGDCSijFgJBVjwEgqxoCRVIwBI6kYA0ZSMcWGvoyIbuB0YBGwDjg8M9c0LT8WOLgx+a3MPKlULZJao2QL5gBgbmbuQTVI+KnDCyJiB6qR8p4LLAZeGBE7F6xFUguUDJg9gQsBMnM1sFvTst8BL87MDZk5RPXM67UFa5HUAiWfDrAAuKtpekNEzMrM9Zn5AHBbRHQBHwJ+mpk3FKxFUguUbMHcDWzR/FmZuX54IiLmAmc31nlTwToktUjJgFkF7AcQEYuBa4YXNFou5wM/z8w3ZOaGgnVIU6Kvr4/e3l76+vpaXUrbKHmIdB6wb0RcCXQBh0bEccAaoAd4AbBZRLyksf47MvOHBeuRJqy/v58VK1YAsHLlSpYvX868efNaXNXMVyxgMnMQOHLE7OubXs8t9dnSVBsYGGBoaAiAwcFBBgYGDJgavNFOUjEGjKRiDBipg83q+duveFfXg6engwEjdbA5c3p49qLHAbDbzo9jzpyeaf38kleRJM0A++39ZPbb+8kt+WxbMJKKsQWjh4VT3vXVSb3/gQce3FXuwyevYPbsyd1p8c6T/3lS728HtmAkFWPASCrGgJFUjAEjqRgDRlIxBoykYgwYScUYMFINXd3Nt9h3jZjWxhgwUg2zembzxMc8HYAnPuYfmNUzu8UVtQfv5JVqiu2fR2z/vFaX0VZswUgqxoCRVIwBI6kYA0ZSMQaMpGIMGEnFGDCSijFgJBVjwEgqxoCRVIwBI6kYA0ZSMQaMpGIMGEnFGDCSijFgJBVjwEgqxoCRVIwBI6kYA0ZSMQaMpGIMGEnFGDCSijFgJBVjwEgqxoCRVIwBI6mYYs+mjohu4HRgEbAOODwz1zQtXw68AVgPvD8zLyhVi6TWKNmCOQCYm5l7AMcDpw4viIjHAscA/wi8CPhARGxWsBZJLVCsBQPsCVwIkJmrI2K3pmW7A6sycx2wLiLWADsDV29kWz0At95664Nmrrv/zqmuedJuueWWcddZe+f901DJpqlT9+3r1k5DJZumTt0A9953R+FKNl2d2m+7/d5pqGTTjFZ3b2/vdsAtmbm+eX7JgFkA3NU0vSEiZjUKGLnsHuCRY2zrcQDLli2b8iKnWu/3+lpdwoT0ntHb6hIm5H297Vk3wNe/fer4K81EJ68cbe6NwPbATc0zSwbM3cAWTdPdTek2ctkWwFjNkauB5wF/ADZMZZGSpsxDmjYlA2YVsD9wbkQsBq5pWvYj4OSImAtsBuwIXLuxDTUOpX5QsFZJBXQNDQ0V2XDTVaSdgS7gUGA/YE1mrmhcRTqC6kTzKZn59SKFSGqZYgEjSd5oJ6kYA0ZSMQaMpGJKXkWadhGxF3AJ8KrM/HLT/F8AP8nM12/Cto7KzI9OeZH1P38vxtgXYEFmHtii8sY1lf8WrdIO+9Co8VzgOmCI6h6z3wDLMnOghaUBndmCuR44eHgiInYCNp/Adk6YsoombqP7MpPDpclU/Vu0Ujvsw8WZuVdmLsnMXYEHgJe3uijosBZMw8+BiIhHZuZdwKuBs4FtI+Io4ECqL8htwCuA7YAzqTpddgOHAK8FFkbE6cBbgDOApzSWn5CZl0bEtcANwEBmHkwZY+3LrZn52Ih4E/A6YBC4OjOPiYgDgbdTfdF+DxycmYOFapxo/TdT/fJeB1wxQ+odTa19yMxjW1nksIiYQ3Xn+x0R8Wlgm8b0isw8ISLOArZq/Hkp8Daqm1h7gP/KzK9OZT2d2IIB+DpwYER0UfV7upJqX7cC9snM51CF67OBfalu/NsHeA/wyMw8Gbg9M98EHA7clpnPB5YCH2t8xnzgfQXDZax9aXYocFSjU+kvI2IW8CrgQ5m5J3ABVbO5VTZW/zbAIY1fzJlU72jq7EMr7R0Rl0bEdVSHz+cBvwZWZ+aLqGo+smn9izPzucBiYPvG3/sS4F0RseVUFtapAfMlqmbt86n+d4Tqf/gB4JyI+AzwRGA28BmqbgoXAkdRtWSa7QTsFxGXUn3RZkXE1o1lWXAfho22L80OBd4cEZcBT6K6qfE4qi/dZcBzqfa9VTZW/22Z+ZfG65lU72jq7EMrXZyZe1G1RAao+gXdDjw7Is4GTqO6Y37Y8Pd2J2DXxnf7Qqrfh+2msrCODJjM/A3VYdAxwBcbsxcAB2TmK4Gjqfa9i6pVckVm9gJfpWqq01gGVRP4nMY/4Esa69zeWFb8F2Ej+9JsOXBkZr4AeCbVL+gRwImNeV1Uh4ItMUb9zX93M6be0dTch5ZrhN2rgU8DxwJ3ZuYyqqFSHtFogcHf6r4euKTx3d6b6mTxr6eypo4MmIavANtk5g2N6fXAfRGxCvgeVcfJxwP/A7w3Ii6makZ+pLH+dRHxReATwNMa/7teCdzcgvMDI/el2TXAFY36/wRcRXXId0FEXAQ8luqwo5XGqh9mXr2jGW8fZoTMvA7oA54BvDgiLgc+DvyK6vvebCVwb0RcAfwYGMrMe6ayHrsKSCqmk1swklrMgJFUjAEjqRgDRlIxBoykYjqxq4BaKCI+RvU4mjnAk6m6AkB1uX8oM8+IiDOp7nu5OSJuAvbKzJtaUK4KM2A0pTLzzQARsR1waWbuMspqS4CTprMutYYBo2kRESc2Xq6luuHrWxHxvKblPcCHgL2oOt6dlZmnTXOZmmKeg9G0ysz/oOoxvd+IfjzLG8ufRdU5b2lzAKk92YLRTLEPsEtE7N2Ynk/VGW+0Dp5qEwaMZooe4G2Z+Q2ARo/1+1pbkibLQyS1wnoe+p/bxcDyiJgdEfOpHrT3nGmvTFPKFoxa4QKqk7wvapo3PGrgT6m+l2dm5qUtqE1TyN7UkorxEElSMQaMpGIMGEnFGDCSijFgJBVjwEgqxoCRVMz/A/hX9EXpLU+dAAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x112b16eb8>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"g = sns.factorplot(x=\"Title\",y=\"Survived\",data=dataset_df,kind=\"bar\")\n",
"g = g.set_xticklabels([\"Master\",\"Miss\",\"Mrs\",\"Mr\",\"Rare\"])\n",
"g = g.set_ylabels(\"survival probability\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# Nameの属性を削除\n",
"dataset_df.drop(labels = [\"Name\"], axis = 1, inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# fillnaで各行のTitleなどの属性によって補完値を変えるやり方がわからなかったので,forで回す\n",
"for i, dataset in dataset_df.iterrows():\n",
" if math.isnan(dataset.Age):\n",
" if math.isnan(dataset.Title):\n",
" dataset_df.at[i, 'Age'] = dataset_df[dataset_df.Title==dataset.Title].Age.dropna().mean()\n",
" else:\n",
" dataset_df.at[i, 'Age'] = dataset_df.Age.dropna().mean()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Age 0\n",
"Cabin 1014\n",
"Embarked 2\n",
"Fare 1\n",
"Parch 0\n",
"PassengerId 0\n",
"Pclass 0\n",
"Sex 0\n",
"SibSp 0\n",
"Survived 418\n",
"Ticket 0\n",
"Title 0\n",
"dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_df.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# XXX: 今回はとにかく属性を削除しまくる\n",
"dataset_df = dataset_df.drop([\"Ticket\", \"SibSp\", \"Parch\", \"Fare\", \"Cabin\", \"Embarked\"], axis=1)\n",
"\n",
"# Prepare data\n",
"X_train = dataset_df[:train_count].drop(['PassengerId', 'Survived'], axis=1)\n",
"y_train = dataset_df[:train_count].Survived\n",
"X_test = dataset_df[train_count:].drop(['PassengerId', 'Survived'], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"# from sklearn.model_selection import KFold\n",
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"様々なアルゴリズムで学習をさせてみる.\n",
"\n",
"[Titanic Top 4% with ensemble modeling | Kaggle](https://www.kaggle.com/yassineghouzam/titanic-top-4-with-ensemble-modeling)を参考に,以下のアルゴリズムで学習を行う.\n",
"\n",
"- SVC\n",
"- Decision Tree\n",
"- AdaBoost\n",
"- Random Forest\n",
"- Extra Trees\n",
"- Gradient Boosting\n",
"- Multiple layer perceprton (neural network)\n",
"- KNN\n",
"- Logistic regression\n",
"- Linear Discriminant Analysis\n",
"- xgboost"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x11294cf28>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Modeling step Test differents algorithms \n",
"N_SPLITS = 4\n",
"kfold = StratifiedKFold(n_splits=N_SPLITS)\n",
"\n",
"random_state = 2 # 乱数のseed\n",
"\n",
"classifiers = []\n",
"classifiers.append(SVC(random_state=random_state))\n",
"classifiers.append(DecisionTreeClassifier(random_state=random_state))\n",
"classifiers.append(AdaBoostClassifier(DecisionTreeClassifier(random_state=random_state),random_state=random_state,learning_rate=0.1))\n",
"classifiers.append(RandomForestClassifier(random_state=random_state))\n",
"classifiers.append(ExtraTreesClassifier(random_state=random_state))\n",
"classifiers.append(GradientBoostingClassifier(random_state=random_state))\n",
"classifiers.append(MLPClassifier(random_state=random_state))\n",
"classifiers.append(KNeighborsClassifier())\n",
"classifiers.append(LogisticRegression(random_state=random_state))\n",
"classifiers.append(LinearDiscriminantAnalysis())\n",
"classifiers.append(xgb.XGBClassifier(seed=random_state))\n",
"\n",
"cv_results = []\n",
"for classifier in classifiers :\n",
" cv_results.append(cross_val_score(classifier, X_train, y = y_train, scoring = \"accuracy\", cv = kfold, n_jobs=4))\n",
" \n",
"cv_means = []\n",
"cv_std = []\n",
"for cv_result in cv_results:\n",
" cv_means.append(cv_result.mean())\n",
" cv_std.append(cv_result.std())\n",
"\n",
"cv_res = pd.DataFrame({\"CrossValMeans\":cv_means,\"CrossValerrors\": cv_std,\"Algorithm\":[\"SVC\",\"DecisionTree\",\"AdaBoost\",\n",
"\"RandomForest\",\"ExtraTrees\",\"GradientBoosting\",\"MultipleLayerPerceptron\",\"KNeighboors\",\"LogisticRegression\",\"LinearDiscriminantAnalysis\", \"XGBClassifier\"]})\n",
"\n",
"g = sns.barplot(\"CrossValMeans\",\"Algorithm\",data = cv_res, palette=\"Set3\",orient = \"h\",**{'xerr':cv_std})\n",
"g.set_xlabel(\"Mean Accuracy\")\n",
"g = g.set_title(\"Cross validation scores\") "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"hyper parameter tuningを行う.この辺りは中のアルゴリズムを知ってないと少し難しそう \n",
"> grid search optimization for AdaBoost, ExtraTrees , RandomForest, GradientBoosting and SVC classifiers "
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 4 folds for each of 112 candidates, totalling 448 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=4)]: Done 448 out of 448 | elapsed: 1.9s finished\n"
]
},
{
"data": {
"text/plain": [
"0.8125701459034792"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Adaboost\n",
"DTC = DecisionTreeClassifier()\n",
"\n",
"adaDTC = AdaBoostClassifier(DTC, random_state=7)\n",
"\n",
"ada_param_grid = {\"base_estimator__criterion\" : [\"gini\", \"entropy\"],\n",
" \"base_estimator__splitter\" : [\"best\", \"random\"],\n",
" \"algorithm\" : [\"SAMME\",\"SAMME.R\"],\n",
" \"n_estimators\" :[1,2],\n",
" \"learning_rate\": [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 1.5]}\n",
"\n",
"gsadaDTC = GridSearchCV(adaDTC, param_grid=ada_param_grid, cv=kfold, scoring=\"accuracy\", n_jobs=4, verbose=1)\n",
"\n",
"gsadaDTC.fit(X_train, y_train)\n",
"\n",
"ada_best = gsadaDTC.best_estimator_\n",
"gsadaDTC.best_score_"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 4 folds for each of 54 candidates, totalling 216 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 6.8s\n",
"[Parallel(n_jobs=4)]: Done 192 tasks | elapsed: 32.1s\n",
"[Parallel(n_jobs=4)]: Done 216 out of 216 | elapsed: 37.6s finished\n"
]
},
{
"data": {
"text/plain": [
"0.81144781144781142"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ExtraTrees \n",
"ExtC = ExtraTreesClassifier()\n",
"\n",
"# Search grid for optimal parameters\n",
"ex_param_grid = {\"max_depth\": [None],\n",
" \"max_features\": [1, 3, 4],\n",
" \"min_samples_split\": [2, 3, 10],\n",
" \"min_samples_leaf\": [1, 3, 10],\n",
" \"bootstrap\": [False],\n",
" \"n_estimators\" :[100,300],\n",
" \"criterion\": [\"gini\"]}\n",
"\n",
"\n",
"gsExtC = GridSearchCV(ExtC, param_grid=ex_param_grid, cv=kfold, scoring=\"accuracy\", n_jobs=4, verbose=1)\n",
"\n",
"gsExtC.fit(X_train,y_train)\n",
"\n",
"ExtC_best = gsExtC.best_estimator_\n",
"\n",
"# Best score\n",
"gsExtC.best_score_"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 4 folds for each of 54 candidates, totalling 216 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.3s\n",
"[Parallel(n_jobs=4)]: Done 192 tasks | elapsed: 40.5s\n",
"[Parallel(n_jobs=4)]: Done 216 out of 216 | elapsed: 45.8s finished\n"
]
},
{
"data": {
"text/plain": [
"0.8125701459034792"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# RFC Parameters tunning \n",
"RFC = RandomForestClassifier()\n",
"\n",
"# Search grid for optimal parameters\n",
"rf_param_grid = {\"max_depth\": [None],\n",
" \"max_features\": [1, 3, 4],\n",
" \"min_samples_split\": [2, 3, 10],\n",
" \"min_samples_leaf\": [1, 3, 10],\n",
" \"bootstrap\": [False],\n",
" \"n_estimators\" :[100, 300],\n",
" \"criterion\": [\"gini\"]}\n",
"\n",
"gsRFC = GridSearchCV(RFC, param_grid=rf_param_grid, cv=kfold, scoring=\"accuracy\", n_jobs=4, verbose=1)\n",
"\n",
"gsRFC.fit(X_train,y_train)\n",
"\n",
"RFC_best = gsRFC.best_estimator_\n",
"\n",
"# Best score\n",
"gsRFC.best_score_"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 4 folds for each of 72 candidates, totalling 288 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=4)]: Done 76 tasks | elapsed: 3.7s\n",
"[Parallel(n_jobs=4)]: Done 288 out of 288 | elapsed: 15.5s finished\n"
]
},
{
"data": {
"text/plain": [
"0.81369248035914699"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Gradient boosting tunning\n",
"\n",
"GBC = GradientBoostingClassifier()\n",
"gb_param_grid = {'loss' : [\"deviance\"],\n",
" 'n_estimators' : [100, 200, 300],\n",
" 'learning_rate': [0.1, 0.05, 0.01],\n",
" 'max_depth': [4, 8],\n",
" 'min_samples_leaf': [100, 150],\n",
" 'max_features': [0.3, 0.1]}\n",
"\n",
"gsGBC = GridSearchCV(GBC, param_grid=gb_param_grid, cv=kfold, scoring=\"accuracy\", n_jobs=4, verbose=1)\n",
"\n",
"gsGBC.fit(X_train, y_train)\n",
"\n",
"GBC_best = gsGBC.best_estimator_\n",
"\n",
"# Best score\n",
"gsGBC.best_score_"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 4 folds for each of 28 candidates, totalling 112 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=4)]: Done 76 tasks | elapsed: 8.3s\n",
"[Parallel(n_jobs=4)]: Done 112 out of 112 | elapsed: 22.6s finished\n"
]
},
{
"data": {
"text/plain": [
"0.80808080808080807"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# SVC classifier\n",
"SVMC = SVC(probability=True)\n",
"svc_param_grid = {'kernel': ['rbf'], \n",
" 'gamma': [0.001, 0.01, 0.1, 1],\n",
" 'C': [1, 10, 50, 100,200, 300, 1000]}\n",
"\n",
"gsSVMC = GridSearchCV(SVMC, param_grid=svc_param_grid, cv=kfold, scoring=\"accuracy\", n_jobs=4, verbose=1)\n",
"\n",
"gsSVMC.fit(X_train, y_train)\n",
"\n",
"SVMC_best = gsSVMC.best_estimator_\n",
"\n",
"# Best score\n",
"gsSVMC.best_score_"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 4 folds for each of 6 candidates, totalling 24 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=4)]: Done 24 out of 24 | elapsed: 1.3s finished\n"
]
},
{
"data": {
"text/plain": [
"0.86025492508120771"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# xgboost ref: https://www.kaggle.com/phunter/xgboost-with-gridsearchcv\n",
"XgbC = xgb.XGBClassifier()\n",
"\n",
"xgb_param_grid = {'learning_rate': [0.1, 0.05, 0.01], #so called `eta` value\n",
" 'max_depth': [4, 8],\n",
" 'min_child_weight': [11],\n",
" 'silent': [1],\n",
" 'subsample': [0.8],\n",
" 'colsample_bytree': [0.7],\n",
" 'n_estimators': [100], #number of trees, change it to 1000 for better results\n",
" 'seed': [random_state]}\n",
"\n",
"gsXgbC = GridSearchCV(XgbC, xgb_param_grid, n_jobs=4, cv=kfold, scoring='roc_auc', verbose=1, refit=True)\n",
"gsXgbC.fit(X_train, y_train)\n",
"\n",
"Xgb_best = gsXgbC.best_estimator_\n",
"\n",
"# Best score\n",
"gsXgbC.best_score_"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1129ff4a8>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1129ffcf8>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x112bee6a0>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1129cf240>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x11303a3c8>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Plot learning curve\n",
"def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,\n",
" n_jobs=-1, train_sizes=np.linspace(.1, 1.0, 5)):\n",
" \"\"\"Generate a simple plot of the test and training learning curve.\"\"\"\n",
" plt.figure()\n",
" plt.title(title)\n",
" if ylim is not None:\n",
" plt.ylim(*ylim)\n",
" plt.xlabel(\"Training examples\")\n",
" plt.ylabel(\"Score\")\n",
" train_sizes, train_scores, test_scores = learning_curve(\n",
" estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)\n",
" train_scores_mean = np.mean(train_scores, axis=1)\n",
" train_scores_std = np.std(train_scores, axis=1)\n",
" test_scores_mean = np.mean(test_scores, axis=1)\n",
" test_scores_std = np.std(test_scores, axis=1)\n",
" plt.grid()\n",
"\n",
" plt.fill_between(train_sizes, train_scores_mean - train_scores_std,\n",
" train_scores_mean + train_scores_std, alpha=0.1,\n",
" color=\"r\")\n",
" plt.fill_between(train_sizes, test_scores_mean - test_scores_std,\n",
" test_scores_mean + test_scores_std, alpha=0.1, color=\"g\")\n",
" plt.plot(train_sizes, train_scores_mean, 'o-', color=\"r\",\n",
" label=\"Training score\")\n",
" plt.plot(train_sizes, test_scores_mean, 'o-', color=\"g\",\n",
" label=\"Cross-validation score\")\n",
"\n",
" plt.legend(loc=\"best\")\n",
" return plt\n",
"\n",
"g = plot_learning_curve(gsRFC.best_estimator_, \"RF mearning curves\", X_train, y_train, cv=kfold)\n",
"g = plot_learning_curve(gsExtC.best_estimator_, \"ExtraTrees learning curves\", X_train, y_train, cv=kfold)\n",
"g = plot_learning_curve(gsSVMC.best_estimator_, \"SVC learning curves\", X_train, y_train, cv=kfold)\n",
"g = plot_learning_curve(gsadaDTC.best_estimator_, \"AdaBoost learning curves\", X_train, y_train, cv=kfold)\n",
"g = plot_learning_curve(gsGBC.best_estimator_, \"GradientBoosting learning curves\", X_train, y_train, cv=kfold)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x112b3b198>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"nrows = ncols = 2\n",
"fig, axes = plt.subplots(nrows = nrows, ncols = ncols, sharex=\"all\", figsize=(15, 15))\n",
"\n",
"names_classifiers = [(\"AdaBoosting\", ada_best), (\"ExtraTrees\", ExtC_best), (\"RandomForest\", RFC_best), (\"GradientBoosting\", GBC_best)]\n",
"\n",
"nclassifier = 0\n",
"for row in range(nrows):\n",
" for col in range(ncols):\n",
" name = names_classifiers[nclassifier][0]\n",
" classifier = names_classifiers[nclassifier][1]\n",
" indices = np.argsort(classifier.feature_importances_)[::-1][:40]\n",
" g = sns.barplot(y=X_train.columns[indices][:40], x = classifier.feature_importances_[indices][:40] , orient='h',ax=axes[row][col])\n",
" g.set_xlabel(\"Relative importance\", fontsize=12)\n",
" g.set_ylabel(\"Features\", fontsize=12)\n",
" g.tick_params(labelsize=9)\n",
" g.set_title(name + \" feature importance\")\n",
" nclassifier += 1"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1131a0c50>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"test_Survived_RFC = pd.Series(RFC_best.predict(X_test), name=\"RFC\")\n",
"test_Survived_ExtC = pd.Series(ExtC_best.predict(X_test), name=\"ExtC\")\n",
"test_Survived_SVMC = pd.Series(SVMC_best.predict(X_test), name=\"SVC\")\n",
"test_Survived_AdaC = pd.Series(ada_best.predict(X_test), name=\"Ada\")\n",
"test_Survived_GBC = pd.Series(GBC_best.predict(X_test), name=\"GBC\")\n",
"test_Survived_XgbC = pd.Series(Xgb_best.predict(X_test), name=\"Xgb\")\n",
"\n",
"# Concatenate all classifier results\n",
"ensemble_results = pd.concat([test_Survived_RFC, test_Survived_ExtC, test_Survived_AdaC, test_Survived_GBC, test_Survived_SVMC, test_Survived_XgbC], axis=1)\n",
"\n",
"g= sns.heatmap(ensemble_results.corr(),annot=True)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# ensamble modeling\n",
"# votingC = VotingClassifier(estimators=[('rfc', RFC_best), ('extc', ExtC_best),\n",
"# ('svc', SVMC_best), ('adac',ada_best),('gbc',GBC_best), ('xgb', Xgb_best)], voting='soft', n_jobs=4)\n",
"votingC = VotingClassifier(estimators=[('rfc', RFC_best), ('extc', ExtC_best),\n",
" ('svc', SVMC_best), ('adac',ada_best),('gbc',GBC_best)], voting='soft', n_jobs=4)\n",
"\n",
"votingC = votingC.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"# Prediction\n",
"y_pred = pd.Series(votingC.predict(X_test), name=\"Survived\").astype(int)\n",
"\n",
"submission = pd.DataFrame({\n",
" \"PassengerId\": test_df[\"PassengerId\"],\n",
" \"Survived\": y_pred\n",
" })\n",
"submission.to_csv('../output/submission.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# score: 0.72 (xgboostをvotingClassifierで未使用の場合)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment