vimaloctavius/titanic.ipynb

## titanic.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Titanic.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "mount_file_id": "1hrRubWz4nfYtQIhFLqm3YRYh6SK-5QSM",
      "authorship_tag": "ABX9TyPm3V+PgeBayXjlcX1B1jrS",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/vimaloctavius/598496314325e3e820451a4a9a02610e/titanic.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Ir2FPypJx-Fs"
      },
      "source": [
        "**Machine Learning** Algorithms to predict who survived the Titanic based on independent Xs like Age, Gender, Parent or Child etc...\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zPRyfLeucVSL"
      },
      "source": [
        "# Not all libraries maybe used...copied as a standard template for my use, not quite efficient for sure ;-)\n",
        "\n",
        "# Pandas is a software library written for the Python programming language for data manipulation and analysis.\n",
        "import pandas as pd\n",
        "# NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays\n",
        "import numpy as np\n",
        "# Matplotlib is a plotting library for python and pyplot gives us a MatLab like plotting framework. We will use this in our plotter function to plot data.\n",
        "import matplotlib.pyplot as plt\n",
        "#Seaborn is a Python data visualization library based on matplotlib. It provides a high-level interface for drawing attractive and informative statistical graphics\n",
        "import seaborn as sns\n",
        "# Preprocessing allows us to standarsize our data\n",
        "from sklearn import preprocessing\n",
        "# Allows us to split our data into training and testing data\n",
        "from sklearn.model_selection import train_test_split\n",
        "# Allows us to test parameters of classification algorithms and find the best one\n",
        "from sklearn.model_selection import GridSearchCV\n",
        "# Logistic Regression classification algorithm\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "# Support Vector Machine classification algorithm\n",
        "from sklearn.svm import SVC\n",
        "# Decision Tree classification algorithm\n",
        "from sklearn.tree import DecisionTreeClassifier\n",
        "# K Nearest Neighbors classification algorithm\n",
        "from sklearn.neighbors import KNeighborsClassifier"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Vf8hqw3Aj70f"
      },
      "source": [
        "import itertools\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "from matplotlib.ticker import NullFormatter\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "import matplotlib.ticker as ticker\n",
        "from sklearn import preprocessing\n",
        "%matplotlib inline\n",
        "from sklearn.metrics import jaccard_score\n",
        "from sklearn.metrics import log_loss\n",
        "from sklearn.metrics import classification_report\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.neighbors import KNeighborsClassifier\n",
        "from sklearn import metrics\n",
        "from sklearn.tree import DecisionTreeClassifier\n",
        "from sklearn.metrics import accuracy_score\n",
        "import sklearn.metrics as metrics\n",
        "from sklearn import svm\n",
        "import pylab as pl\n",
        "import scipy.optimize as opt\n",
        "from sklearn import preprocessing\n",
        "from sklearn.svm import SVC\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.metrics import confusion_matrix\n",
        "from sklearn.metrics import jaccard_score\n",
        "from sklearn.metrics import f1_score\n",
        "from sklearn.metrics import log_loss"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "BuafV5SLkXou"
      },
      "source": [
        "# Data Prep, feature engineering"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 496
        },
        "id": "Kz_K8E_gq9vK",
        "outputId": "cc63b445-d814-4447-9562-32fc8bf33211"
      },
      "source": [
        "train_data = pd.read_csv('/content/drive/MyDrive/Books/Kaggle/train.csv')\n",
        "train_data.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>PassengerId</th>\n",
              "      <th>Survived</th>\n",
              "      <th>Pclass</th>\n",
              "      <th>Name</th>\n",
              "      <th>Sex</th>\n",
              "      <th>Age</th>\n",
              "      <th>SibSp</th>\n",
              "      <th>Parch</th>\n",
              "      <th>Ticket</th>\n",
              "      <th>Fare</th>\n",
              "      <th>Cabin</th>\n",
              "      <th>Embarked</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>Braund, Mr. Owen Harris</td>\n",
              "      <td>male</td>\n",
              "      <td>22.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>A/5 21171</td>\n",
              "      <td>7.2500</td>\n",
              "      <td>NaN</td>\n",
              "      <td>S</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
              "      <td>female</td>\n",
              "      <td>38.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>PC 17599</td>\n",
              "      <td>71.2833</td>\n",
              "      <td>C85</td>\n",
              "      <td>C</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>3</td>\n",
              "      <td>1</td>\n",
              "      <td>3</td>\n",
              "      <td>Heikkinen, Miss. Laina</td>\n",
              "      <td>female</td>\n",
              "      <td>26.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>STON/O2. 3101282</td>\n",
              "      <td>7.9250</td>\n",
              "      <td>NaN</td>\n",
              "      <td>S</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>4</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
              "      <td>female</td>\n",
              "      <td>35.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>113803</td>\n",
              "      <td>53.1000</td>\n",
              "      <td>C123</td>\n",
              "      <td>S</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>5</td>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>Allen, Mr. William Henry</td>\n",
              "      <td>male</td>\n",
              "      <td>35.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>373450</td>\n",
              "      <td>8.0500</td>\n",
              "      <td>NaN</td>\n",
              "      <td>S</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   PassengerId  Survived  Pclass  ...     Fare Cabin  Embarked\n",
              "0            1         0       3  ...   7.2500   NaN         S\n",
              "1            2         1       1  ...  71.2833   C85         C\n",
              "2            3         1       3  ...   7.9250   NaN         S\n",
              "3            4         1       1  ...  53.1000  C123         S\n",
              "4            5         0       3  ...   8.0500   NaN         S\n",
              "\n",
              "[5 rows x 12 columns]"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8Bvye_8r0Cm4"
      },
      "source": [
        "test_data = pd.read_csv('/content/drive/MyDrive/Books/Kaggle/test.csv')"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 445
        },
        "id": "4-GE0AMn0Zr7",
        "outputId": "066a9bd5-a2bb-480a-c08c-c8cc5f93c86f"
      },
      "source": [
        "test_data.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>PassengerId</th>\n",
              "      <th>Pclass</th>\n",
              "      <th>Name</th>\n",
              "      <th>Sex</th>\n",
              "      <th>Age</th>\n",
              "      <th>SibSp</th>\n",
              "      <th>Parch</th>\n",
              "      <th>Ticket</th>\n",
              "      <th>Fare</th>\n",
              "      <th>Cabin</th>\n",
              "      <th>Embarked</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>892</td>\n",
              "      <td>3</td>\n",
              "      <td>Kelly, Mr. James</td>\n",
              "      <td>male</td>\n",
              "      <td>34.5</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>330911</td>\n",
              "      <td>7.8292</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Q</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>893</td>\n",
              "      <td>3</td>\n",
              "      <td>Wilkes, Mrs. James (Ellen Needs)</td>\n",
              "      <td>female</td>\n",
              "      <td>47.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>363272</td>\n",
              "      <td>7.0000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>S</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>894</td>\n",
              "      <td>2</td>\n",
              "      <td>Myles, Mr. Thomas Francis</td>\n",
              "      <td>male</td>\n",
              "      <td>62.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>240276</td>\n",
              "      <td>9.6875</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Q</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>895</td>\n",
              "      <td>3</td>\n",
              "      <td>Wirz, Mr. Albert</td>\n",
              "      <td>male</td>\n",
              "      <td>27.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>315154</td>\n",
              "      <td>8.6625</td>\n",
              "      <td>NaN</td>\n",
              "      <td>S</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>896</td>\n",
              "      <td>3</td>\n",
              "      <td>Hirvonen, Mrs. Alexander (Helga E Lindqvist)</td>\n",
              "      <td>female</td>\n",
              "      <td>22.0</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>3101298</td>\n",
              "      <td>12.2875</td>\n",
              "      <td>NaN</td>\n",
              "      <td>S</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   PassengerId  Pclass  ... Cabin Embarked\n",
              "0          892       3  ...   NaN        Q\n",
              "1          893       3  ...   NaN        S\n",
              "2          894       2  ...   NaN        Q\n",
              "3          895       3  ...   NaN        S\n",
              "4          896       3  ...   NaN        S\n",
              "\n",
              "[5 rows x 11 columns]"
            ]
          },
          "metadata": {},
          "execution_count": 13
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 419
        },
        "id": "w5JKIG1d2OD8",
        "outputId": "fb4f699a-e766-4043-e581-6d7f52698a52"
      },
      "source": [
        "features = [\"Pclass\",\"Sex\",\"SibSp\",\"Parch\"]\n",
        "X = pd.get_dummies(train_data[features])\n",
        "X"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Pclass</th>\n",
              "      <th>SibSp</th>\n",
              "      <th>Parch</th>\n",
              "      <th>Sex_female</th>\n",
              "      <th>Sex_male</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>3</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>886</th>\n",
              "      <td>2</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>887</th>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>888</th>\n",
              "      <td>3</td>\n",
              "      <td>1</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>889</th>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>890</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>891 rows × 5 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "     Pclass  SibSp  Parch  Sex_female  Sex_male\n",
              "0         3      1      0           0         1\n",
              "1         1      1      0           1         0\n",
              "2         3      0      0           1         0\n",
              "3         1      1      0           1         0\n",
              "4         3      0      0           0         1\n",
              "..      ...    ...    ...         ...       ...\n",
              "886       2      0      0           0         1\n",
              "887       1      0      0           1         0\n",
              "888       3      1      2           1         0\n",
              "889       1      0      0           0         1\n",
              "890       3      0      0           0         1\n",
              "\n",
              "[891 rows x 5 columns]"
            ]
          },
          "metadata": {},
          "execution_count": 16
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Fz8gjEfC3TJA"
      },
      "source": [
        "y = train_data[\"Survived\"]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mIUeIzUkvpsK",
        "outputId": "8acfa7a7-d531-4f02-86e1-53f61fb12387"
      },
      "source": [
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)\n",
        "print(\"Train set:\", X_train.shape, y_train.shape)\n",
        "print(\"Test set:\", X_test.shape, y_test.shape)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Train set: (712, 5) (712,)\n",
            "Test set: (179, 5) (179,)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 419
        },
        "id": "sgMkyY8lfejD",
        "outputId": "accc2782-6c5c-4a90-85af-9ebc8b0a2d20"
      },
      "source": [
        "X_train"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Pclass</th>\n",
              "      <th>SibSp</th>\n",
              "      <th>Parch</th>\n",
              "      <th>Sex_female</th>\n",
              "      <th>Sex_male</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>790</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>467</th>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>431</th>\n",
              "      <td>3</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>710</th>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>608</th>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>536</th>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>198</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>876</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>3</td>\n",
              "      <td>3</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>558</th>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>712 rows × 5 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "     Pclass  SibSp  Parch  Sex_female  Sex_male\n",
              "790       3      0      0           0         1\n",
              "467       1      0      0           0         1\n",
              "431       3      1      0           1         0\n",
              "710       1      0      0           1         0\n",
              "608       2      1      2           1         0\n",
              "..      ...    ...    ...         ...       ...\n",
              "536       1      0      0           0         1\n",
              "198       3      0      0           1         0\n",
              "876       3      0      0           0         1\n",
              "7         3      3      1           0         1\n",
              "558       1      1      1           1         0\n",
              "\n",
              "[712 rows x 5 columns]"
            ]
          },
          "metadata": {},
          "execution_count": 19
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "buLIdU0gfj4s",
        "outputId": "00ee284f-6191-4f2a-ef96-f998cbb4b0b3"
      },
      "source": [
        "y_train"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "790    0\n",
              "467    0\n",
              "431    1\n",
              "710    1\n",
              "608    1\n",
              "      ..\n",
              "536    0\n",
              "198    1\n",
              "876    0\n",
              "7      0\n",
              "558    1\n",
              "Name: Survived, Length: 712, dtype: int64"
            ]
          },
          "metadata": {},
          "execution_count": 20
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 419
        },
        "id": "Rc_LaOgTfulO",
        "outputId": "839e33f5-ce19-49d0-888b-c415150e350c"
      },
      "source": [
        "X_test"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Pclass</th>\n",
              "      <th>SibSp</th>\n",
              "      <th>Parch</th>\n",
              "      <th>Sex_female</th>\n",
              "      <th>Sex_male</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>177</th>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>846</th>\n",
              "      <td>3</td>\n",
              "      <td>8</td>\n",
              "      <td>2</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>69</th>\n",
              "      <td>3</td>\n",
              "      <td>2</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>682</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>569</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>797</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>91</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>312</th>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>540</th>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>135</th>\n",
              "      <td>2</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>179 rows × 5 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "     Pclass  SibSp  Parch  Sex_female  Sex_male\n",
              "177       1      0      0           1         0\n",
              "846       3      8      2           0         1\n",
              "69        3      2      0           0         1\n",
              "682       3      0      0           0         1\n",
              "569       3      0      0           0         1\n",
              "..      ...    ...    ...         ...       ...\n",
              "797       3      0      0           1         0\n",
              "91        3      0      0           0         1\n",
              "312       2      1      1           1         0\n",
              "540       1      0      2           1         0\n",
              "135       2      0      0           0         1\n",
              "\n",
              "[179 rows x 5 columns]"
            ]
          },
          "metadata": {},
          "execution_count": 21
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "0WXYNTkQf5Ed",
        "outputId": "f6d45b4b-c55e-4531-cb61-984b4caef909"
      },
      "source": [
        "y_test"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "177    0\n",
              "846    0\n",
              "69     0\n",
              "682    0\n",
              "569    1\n",
              "      ..\n",
              "797    1\n",
              "91     0\n",
              "312    0\n",
              "540    1\n",
              "135    0\n",
              "Name: Survived, Length: 179, dtype: int64"
            ]
          },
          "metadata": {},
          "execution_count": 39
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hBx-ubkJiqRU"
      },
      "source": [
        "# KNN"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "yvJUFFc4kKMe",
        "outputId": "d3b6c3e8-af69-419b-9b31-87114f576df6"
      },
      "source": [
        "  j = 10\n",
        "  for i in range(1,j):\n",
        "      \n",
        "    KNN_model = KNeighborsClassifier(n_neighbors = i).fit(X_train,y_train)\n",
        "    yhat_Knn = KNN_model.predict(X_test)\n",
        "    acc_sc= (metrics.accuracy_score(y_test, yhat_Knn))\n",
        "    print('Neighbour {} with acuracy {}'. format(i, acc_sc))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Neighbour 1 with acuracy 0.8268156424581006\n",
            "Neighbour 2 with acuracy 0.8212290502793296\n",
            "Neighbour 3 with acuracy 0.8491620111731844\n",
            "Neighbour 4 with acuracy 0.7988826815642458\n",
            "Neighbour 5 with acuracy 0.8156424581005587\n",
            "Neighbour 6 with acuracy 0.7988826815642458\n",
            "Neighbour 7 with acuracy 0.7988826815642458\n",
            "Neighbour 8 with acuracy 0.770949720670391\n",
            "Neighbour 9 with acuracy 0.776536312849162\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "QIa1stDYkPXN",
        "outputId": "0141ca07-8d3d-40a8-804e-7dd8240f5090"
      },
      "source": [
        "k=3\n",
        "neigh = KNeighborsClassifier(n_neighbors = k).fit(X_train, y_train)\n",
        "yhat_knn2 = neigh.predict(X_test)\n",
        "jc_knn = jaccard_score(y_test,yhat_knn2,pos_label=1)\n",
        "print(classification_report(y_test,yhat_knn2))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.86      0.91      0.89       114\n",
            "           1       0.83      0.74      0.78        65\n",
            "\n",
            "    accuracy                           0.85       179\n",
            "   macro avg       0.84      0.83      0.83       179\n",
            "weighted avg       0.85      0.85      0.85       179\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "1rzG-Lur5LyA",
        "outputId": "7bd17f43-d14a-48f4-ea6a-4574623e9f71"
      },
      "source": [
        "yhat_knn2"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n",
              "       1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n",
              "       0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0,\n",
              "       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
              "       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,\n",
              "       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n",
              "       1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0,\n",
              "       0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n",
              "       1, 1, 0])"
            ]
          },
          "metadata": {},
          "execution_count": 25
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "xzEYpgMYki7e"
      },
      "source": [
        "# Decision Tree"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "634bo44Pkpbe"
      },
      "source": [
        "dtree = DecisionTreeClassifier(criterion = \"entropy\", max_depth = 4)\n",
        "dtree.fit(X_train, y_train)\n",
        "predtree = dtree.predict(X_test)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TvUGfAgxktGz",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "a8608f0b-efa2-4853-d0fb-65420439ef65"
      },
      "source": [
        "print(\"Decision tree accuracy :\",metrics.accuracy_score(y_test,predtree) )"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Decision tree accuracy : 0.7988826815642458\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "DuwbjSWNkvi8",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1803af04-d187-4be3-b539-877b1d3ebc1b"
      },
      "source": [
        "jc_dtree = jaccard_score(y_test,predtree,pos_label=1)\n",
        "jc_dtree"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.5135135135135135"
            ]
          },
          "metadata": {},
          "execution_count": 29
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "KKPtCry6k1aI"
      },
      "source": [
        "# Support Vector Machine"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PiqrHW3Pk8HB",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "7917b010-6d47-4cb4-c3a9-546fddf76a9c"
      },
      "source": [
        "clf = svm.SVC(kernel = \"rbf\")\n",
        "clf.fit(X_train,y_train)\n",
        "SVC()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "SVC()"
            ]
          },
          "metadata": {},
          "execution_count": 30
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "U2sCf3Sfk_w8"
      },
      "source": [
        "yhat_svm = clf.predict(X_test)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_A1n_j4rlCEK",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "d4a6e626-9977-4132-f627-23d99ebda77f"
      },
      "source": [
        "jc_svm = jaccard_score(y_test,yhat_svm,pos_label=1)\n",
        "jc_svm"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.56"
            ]
          },
          "metadata": {},
          "execution_count": 33
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ecs67HO3lPpK",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "6aa0f19a-e486-4e90-c95e-17d8dd7f9cea"
      },
      "source": [
        "print(classification_report(y_test,yhat_svm))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.82      0.91      0.86       114\n",
            "           1       0.81      0.65      0.72        65\n",
            "\n",
            "    accuracy                           0.82       179\n",
            "   macro avg       0.81      0.78      0.79       179\n",
            "weighted avg       0.81      0.82      0.81       179\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ofRd5X0Llpme"
      },
      "source": [
        "# Logistic Regression"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gn_e0j9AlxFw"
      },
      "source": [
        "LR = LogisticRegression(C = 0.01, solver = \"liblinear\").fit(X_train, y_train)\n",
        "yhat_LR = LR.predict(X_test)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zSqqIz68lyAJ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "c079f8ed-b43f-49e8-8536-d1fb43df374f"
      },
      "source": [
        "jc_logreg = jaccard_score(y_test,yhat_LR,pos_label=1)\n",
        "jc_logreg"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.4444444444444444"
            ]
          },
          "metadata": {},
          "execution_count": 37
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wHL-dK2fl1sT",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "fdf6a948-486b-4fee-bde3-487d4685e565"
      },
      "source": [
        "print(classification_report(y_test,yhat_LR))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.76      0.94      0.84       114\n",
            "           1       0.82      0.49      0.62        65\n",
            "\n",
            "    accuracy                           0.78       179\n",
            "   macro avg       0.79      0.72      0.73       179\n",
            "weighted avg       0.78      0.78      0.76       179\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "v7XCkutSl9KZ"
      },
      "source": [
        "# Prediction, Output"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 419
        },
        "id": "J9-stXcJk9kc",
        "outputId": "4e8bdeab-3780-4a2e-e8a4-73cd0f4dff57"
      },
      "source": [
        "X2 = pd.get_dummies(test_data[features])\n",
        "X2"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Pclass</th>\n",
              "      <th>SibSp</th>\n",
              "      <th>Parch</th>\n",
              "      <th>Sex_female</th>\n",
              "      <th>Sex_male</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>3</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>3</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>413</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>414</th>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>415</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>416</th>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>417</th>\n",
              "      <td>3</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>418 rows × 5 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "     Pclass  SibSp  Parch  Sex_female  Sex_male\n",
              "0         3      0      0           0         1\n",
              "1         3      1      0           1         0\n",
              "2         2      0      0           0         1\n",
              "3         3      0      0           0         1\n",
              "4         3      1      1           1         0\n",
              "..      ...    ...    ...         ...       ...\n",
              "413       3      0      0           0         1\n",
              "414       1      0      0           1         0\n",
              "415       3      0      0           0         1\n",
              "416       3      0      0           0         1\n",
              "417       3      1      1           0         1\n",
              "\n",
              "[418 rows x 5 columns]"
            ]
          },
          "metadata": {},
          "execution_count": 42
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jE54Sw8alaoJ",
        "outputId": "5d8f1e2b-c649-4e16-9290-6a45e03e21a2"
      },
      "source": [
        "yhat_Knn3 = neigh.predict(X2)\n",
        "yhat_Knn3"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,\n",
              "       1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1,\n",
              "       1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,\n",
              "       1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,\n",
              "       1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
              "       0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n",
              "       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,\n",
              "       0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,\n",
              "       1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,\n",
              "       0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,\n",
              "       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,\n",
              "       0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,\n",
              "       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,\n",
              "       0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,\n",
              "       0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,\n",
              "       1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,\n",
              "       0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,\n",
              "       1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,\n",
              "       0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0])"
            ]
          },
          "metadata": {},
          "execution_count": 44
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cQjxHSTmmHb6"
      },
      "source": [
        "#yhat_knn2 seems best with neighbor3 with .8491 accuracy\n",
        "output = pd.DataFrame\\\n",
        "({'PassengeId':test_data.PassengerId,'Survivor':yhat_Knn3})\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 419
        },
        "id": "-D5As-ipmQ2Y",
        "outputId": "b8698673-9248-4b29-ae19-efe2e14db7ea"
      },
      "source": [
        "output"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>PassengeId</th>\n",
              "      <th>Survivor</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>892</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>893</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>894</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>895</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>896</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>413</th>\n",
              "      <td>1305</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>414</th>\n",
              "      <td>1306</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>415</th>\n",
              "      <td>1307</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>416</th>\n",
              "      <td>1308</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>417</th>\n",
              "      <td>1309</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>418 rows × 2 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "     PassengeId  Survivor\n",
              "0           892         0\n",
              "1           893         1\n",
              "2           894         0\n",
              "3           895         0\n",
              "4           896         0\n",
              "..          ...       ...\n",
              "413        1305         0\n",
              "414        1306         1\n",
              "415        1307         0\n",
              "416        1308         0\n",
              "417        1309         0\n",
              "\n",
              "[418 rows x 2 columns]"
            ]
          },
          "metadata": {},
          "execution_count": 47
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "z4IH1qe8mVgB"
      },
      "source": [
        "output.to_csv('/content/drive/MyDrive/Books/Kaggle/prediction.csv',index=False)"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}