Skip to content

Instantly share code, notes, and snippets.

@kungfoomanchu
Created July 16, 2019 05:37
Show Gist options
  • Save kungfoomanchu/4e952268569e7780314e9e9a414061af to your computer and use it in GitHub Desktop.
Save kungfoomanchu/4e952268569e7780314e9e9a414061af to your computer and use it in GitHub Desktop.
Cusine For Google Colab
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Cusine For Google Colab",
"version": "0.3.2",
"provenance": [],
"include_colab_link": true
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/kungfoomanchu/4e952268569e7780314e9e9a414061af/cusine-for-google-colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "yDrQBMtlPqSF",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "74ba2214-bc24-48b5-d55d-b2b933ae07f6"
},
"source": [
"import pandas as pd\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from keras.utils import to_categorical\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.datasets import make_classification\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense\n",
"from keras.models import model_from_json\n",
"from tensorflow.keras.models import load_model\n",
"\n",
"from google.colab import files"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "SyaoVEjpRM5w",
"colab_type": "code",
"colab": {
"resources": {
"http://localhost:8080/nbextensions/google.colab/files.js": {
"data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=",
"ok": true,
"headers": [
[
"content-type",
"application/javascript"
]
],
"status": 200,
"status_text": ""
}
},
"base_uri": "https://localhost:8080/",
"height": 74
},
"outputId": "12e88b74-5d56-494e-bae3-642110acbd52"
},
"source": [
"uploaded = files.upload()"
],
"execution_count": 2,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" <input type=\"file\" id=\"files-5781108a-e573-46d1-a25c-87d581b622d1\" name=\"files[]\" multiple disabled />\n",
" <output id=\"result-5781108a-e573-46d1-a25c-87d581b622d1\">\n",
" Upload widget is only available when the cell has been executed in the\n",
" current browser session. Please rerun this cell to enable.\n",
" </output>\n",
" <script src=\"/nbextensions/google.colab/files.js\"></script> "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"Saving train.json to train.json\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "lWIOsFvhRNCQ",
"colab_type": "code",
"colab": {
"resources": {
"http://localhost:8080/nbextensions/google.colab/files.js": {
"data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=",
"ok": true,
"headers": [
[
"content-type",
"application/javascript"
]
],
"status": 200,
"status_text": ""
}
},
"base_uri": "https://localhost:8080/",
"height": 74
},
"outputId": "cd4cd35b-98f4-47b5-dffb-4cbf34e2a214"
},
"source": [
"uploaded = files.upload()"
],
"execution_count": 3,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" <input type=\"file\" id=\"files-2369b3eb-eabf-40bd-b9c5-c2ddb6dccee7\" name=\"files[]\" multiple disabled />\n",
" <output id=\"result-2369b3eb-eabf-40bd-b9c5-c2ddb6dccee7\">\n",
" Upload widget is only available when the cell has been executed in the\n",
" current browser session. Please rerun this cell to enable.\n",
" </output>\n",
" <script src=\"/nbextensions/google.colab/files.js\"></script> "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"Saving test.json to test.json\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "WRmVLaIWPqSL",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "ab904300-556c-46fc-c7b8-c650dd27b71d"
},
"source": [
"data = pd.read_json(\"train.json\")\n",
"data.head()"
],
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cuisine</th>\n",
" <th>id</th>\n",
" <th>ingredients</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>greek</td>\n",
" <td>10259</td>\n",
" <td>[romaine lettuce, black olives, grape tomatoes...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>southern_us</td>\n",
" <td>25693</td>\n",
" <td>[plain flour, ground pepper, salt, tomatoes, g...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>filipino</td>\n",
" <td>20130</td>\n",
" <td>[eggs, pepper, salt, mayonaise, cooking oil, g...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>indian</td>\n",
" <td>22213</td>\n",
" <td>[water, vegetable oil, wheat, salt]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>indian</td>\n",
" <td>13162</td>\n",
" <td>[black pepper, shallots, cornflour, cayenne pe...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cuisine id ingredients\n",
"0 greek 10259 [romaine lettuce, black olives, grape tomatoes...\n",
"1 southern_us 25693 [plain flour, ground pepper, salt, tomatoes, g...\n",
"2 filipino 20130 [eggs, pepper, salt, mayonaise, cooking oil, g...\n",
"3 indian 22213 [water, vegetable oil, wheat, salt]\n",
"4 indian 13162 [black pepper, shallots, cornflour, cayenne pe..."
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "-O_AYsCyPqSO",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 357
},
"outputId": "8674b73c-f942-4e75-becf-a3d95f73522d"
},
"source": [
"# Create list of unique cuisine types\n",
"cuisine_list = data['cuisine']\n",
"cuisine_compilation = []\n",
"for cuisine in cuisine_list:\n",
" cuisine_compilation.append(cuisine)\n",
" \n",
"cuis_unique = list(set(cuisine_compilation))\n",
"cuis_unique"
],
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['italian',\n",
" 'russian',\n",
" 'thai',\n",
" 'filipino',\n",
" 'mexican',\n",
" 'greek',\n",
" 'spanish',\n",
" 'british',\n",
" 'cajun_creole',\n",
" 'french',\n",
" 'irish',\n",
" 'jamaican',\n",
" 'vietnamese',\n",
" 'korean',\n",
" 'chinese',\n",
" 'indian',\n",
" 'moroccan',\n",
" 'southern_us',\n",
" 'japanese',\n",
" 'brazilian']"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "C26VncKVPqSQ",
"colab_type": "code",
"colab": {}
},
"source": [
"# Onehotencoding for cuisine types\n",
"label_encoder_cuis = LabelEncoder()\n",
"label_encoder_cuis.fit(cuis_unique)\n",
"encoded_cuis = label_encoder_cuis.transform(data['cuisine'])\n",
"one_hot_cuis = to_categorical(encoded_cuis)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "F0nrC-egPqSS",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "1a05e367-b61f-4045-c8af-41651db20ae0"
},
"source": [
"# Check first encoded item\n",
"one_hot_cuis[0]"
],
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0.], dtype=float32)"
]
},
"metadata": {
"tags": []
},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "huUtx052PqSV",
"colab_type": "code",
"colab": {}
},
"source": [
"# Add one hot cuisine to dataframe\n",
"data[\"one_hot_cuisine\"] = list(one_hot_cuis)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "bFpBKxoyPqSX",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "d4b22761-a910-4758-886d-d17cc1ea3307"
},
"source": [
"data.head()"
],
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cuisine</th>\n",
" <th>id</th>\n",
" <th>ingredients</th>\n",
" <th>one_hot_cuisine</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>greek</td>\n",
" <td>10259</td>\n",
" <td>[romaine lettuce, black olives, grape tomatoes...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>southern_us</td>\n",
" <td>25693</td>\n",
" <td>[plain flour, ground pepper, salt, tomatoes, g...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>filipino</td>\n",
" <td>20130</td>\n",
" <td>[eggs, pepper, salt, mayonaise, cooking oil, g...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>indian</td>\n",
" <td>22213</td>\n",
" <td>[water, vegetable oil, wheat, salt]</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>indian</td>\n",
" <td>13162</td>\n",
" <td>[black pepper, shallots, cornflour, cayenne pe...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cuisine ... one_hot_cuisine\n",
"0 greek ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...\n",
"1 southern_us ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...\n",
"2 filipino ... [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
"3 indian ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
"4 indian ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
"\n",
"[5 rows x 4 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 9
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "xgqQCZ7FPqSZ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "5377d002-1fcb-447b-cafe-977966c8fa45"
},
"source": [
"# Organize data frame\n",
"data = data[[\"cuisine\", \"one_hot_cuisine\", \"ingredients\"]]\n",
"data.head()"
],
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cuisine</th>\n",
" <th>one_hot_cuisine</th>\n",
" <th>ingredients</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>greek</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...</td>\n",
" <td>[romaine lettuce, black olives, grape tomatoes...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>southern_us</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" <td>[plain flour, ground pepper, salt, tomatoes, g...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>filipino</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" <td>[eggs, pepper, salt, mayonaise, cooking oil, g...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>indian</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" <td>[water, vegetable oil, wheat, salt]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>indian</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" <td>[black pepper, shallots, cornflour, cayenne pe...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cuisine ... ingredients\n",
"0 greek ... [romaine lettuce, black olives, grape tomatoes...\n",
"1 southern_us ... [plain flour, ground pepper, salt, tomatoes, g...\n",
"2 filipino ... [eggs, pepper, salt, mayonaise, cooking oil, g...\n",
"3 indian ... [water, vegetable oil, wheat, salt]\n",
"4 indian ... [black pepper, shallots, cornflour, cayenne pe...\n",
"\n",
"[5 rows x 3 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "n61Xvfu6PqSc",
"colab_type": "code",
"colab": {}
},
"source": [
"# Create one hot encoding for ingredients that are in lists!\n",
"# -Create dictionary\n",
"ingredients = data.loc[:,'ingredients']\n",
"\n",
"i_map = {}\n",
"i_list = []\n",
"counter = 0\n",
"for lists in ingredients:\n",
" for items in lists:\n",
" if items not in i_map:\n",
" i_list.append(items)\n",
" i_map[items] = counter\n",
" counter = counter + 1"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "2tYwfaBpPqSf",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 102
},
"outputId": "7f2984d8-1454-41c0-85ef-182d3ea08197"
},
"source": [
"dict(list(i_map.items())[0:5])"
],
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'black olives': 1,\n",
" 'garlic': 3,\n",
" 'grape tomatoes': 2,\n",
" 'pepper': 4,\n",
" 'romaine lettuce': 0}"
]
},
"metadata": {
"tags": []
},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "aSlEak3BPqSi",
"colab_type": "code",
"colab": {}
},
"source": [
"# Create one hot encoding for ingredients that are in lists!\n",
"# -Create encoded data\n",
"ingredients_encodings = []\n",
"for lists in ingredients:\n",
" encoding = [0]*len(i_map)\n",
" for items in lists:\n",
" encoding[i_map[items]] = 1\n",
" ingredients_encodings.append(encoding)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "s0kuDddyPqSl",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "5e99ff7b-2808-4487-f44e-8437e1d3cc4b"
},
"source": [
"ingredients_encodings[0]"
],
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" ...]"
]
},
"metadata": {
"tags": []
},
"execution_count": 14
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "jIKfh8BbPqSo",
"colab_type": "code",
"colab": {}
},
"source": [
"# Add encoded ingredients to data frame\n",
"data[\"one_hot_ingredients\"] = ingredients_encodings"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "WI5Xty1-PqSr",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "c759c5ef-6eda-4585-a13c-2c858a64b5e7"
},
"source": [
"data.head()"
],
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cuisine</th>\n",
" <th>one_hot_cuisine</th>\n",
" <th>ingredients</th>\n",
" <th>one_hot_ingredients</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>greek</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...</td>\n",
" <td>[romaine lettuce, black olives, grape tomatoes...</td>\n",
" <td>[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>southern_us</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" <td>[plain flour, ground pepper, salt, tomatoes, g...</td>\n",
" <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>filipino</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" <td>[eggs, pepper, salt, mayonaise, cooking oil, g...</td>\n",
" <td>[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>indian</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" <td>[water, vegetable oil, wheat, salt]</td>\n",
" <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>indian</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" <td>[black pepper, shallots, cornflour, cayenne pe...</td>\n",
" <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cuisine ... one_hot_ingredients\n",
"0 greek ... [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ...\n",
"1 southern_us ... [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, ...\n",
"2 filipino ... [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...\n",
"3 indian ... [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...\n",
"4 indian ... [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...\n",
"\n",
"[5 rows x 4 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 16
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "uR1Q_O8APqSu",
"colab_type": "code",
"colab": {}
},
"source": [
"# Create variables for train test split\n",
"one_hot_cuisine = data['one_hot_cuisine']\n",
"one_hot_ingredients = data['one_hot_ingredients']"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "_MLWC7B1PqSw",
"colab_type": "code",
"colab": {}
},
"source": [
"# Split data\n",
"ing_train, ing_test,cuis_train, cuis_test = train_test_split(one_hot_ingredients, one_hot_cuisine, random_state=1)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "4NzPDfSUPqSx",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 119
},
"outputId": "f1c503ea-c43e-4067-870f-1521e8c821c5"
},
"source": [
"ing_train.head()"
],
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"15470 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
"24599 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...\n",
"4712 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...\n",
"8761 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
"22503 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, ...\n",
"Name: one_hot_ingredients, dtype: object"
]
},
"metadata": {
"tags": []
},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "m4L9LTZUPqSz",
"colab_type": "code",
"colab": {}
},
"source": [
"# Convert object into numpy array \n",
"cuis_train = np.array(cuis_train.tolist())\n",
"cuis_test = np.array(cuis_test.tolist())\n",
"ing_train = np.array(ing_train.tolist())\n",
"ing_test = np.array(ing_test.tolist())"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "3YzeFYakPqS0",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 136
},
"outputId": "7c49e5ec-d98b-46c6-d77b-57983e05bcde"
},
"source": [
"ing_train"
],
"execution_count": 21,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[0, 0, 0, ..., 0, 0, 0],\n",
" [0, 0, 0, ..., 0, 0, 0],\n",
" [0, 0, 0, ..., 0, 0, 0],\n",
" ...,\n",
" [0, 0, 0, ..., 0, 0, 0],\n",
" [0, 0, 0, ..., 0, 0, 0],\n",
" [0, 0, 0, ..., 0, 0, 0]])"
]
},
"metadata": {
"tags": []
},
"execution_count": 21
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "COq5jmxWPqS2",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"outputId": "80ad3ec2-4f82-4cc8-eb8d-9066a454f5be"
},
"source": [
"# Create model structure\n",
"deep_model = Sequential()\n",
"deep_model.add(Dense(units=20, activation='relu', input_dim=6714))\n",
"deep_model.add(Dense(units=15, activation='relu'))\n",
"deep_model.add(Dense(units=10, activation='relu'))\n",
"deep_model.add(Dense(units=20, activation='softmax'))"
],
"execution_count": 22,
"outputs": [
{
"output_type": "stream",
"text": [
"WARNING: Logging before flag parsing goes to stderr.\n",
"W0716 05:36:04.674324 140608603793280 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Call initializer instance with the dtype argument instead of passing it to the constructor\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "KNc1lbefPqS5",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 289
},
"outputId": "4eb1ca14-2ff4-43c3-b896-6538c5246fa4"
},
"source": [
"deep_model.summary()"
],
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"text": [
"Model: \"sequential\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"dense (Dense) (None, 20) 134300 \n",
"_________________________________________________________________\n",
"dense_1 (Dense) (None, 15) 315 \n",
"_________________________________________________________________\n",
"dense_2 (Dense) (None, 10) 160 \n",
"_________________________________________________________________\n",
"dense_3 (Dense) (None, 20) 220 \n",
"=================================================================\n",
"Total params: 134,995\n",
"Trainable params: 134,995\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "5P5SVUIAPqS9",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 374
},
"outputId": "c4e0db74-3c7f-4d01-f936-da1729088610"
},
"source": [
"# Compile and fit model\n",
"deep_model.compile(optimizer='adam',\n",
" loss='categorical_crossentropy',\n",
" metrics=['accuracy'])\n",
"\n",
"deep_model.fit(\n",
" ing_train,\n",
" cuis_train,\n",
" epochs=10,\n",
" shuffle=True,\n",
" verbose=2\n",
")"
],
"execution_count": 24,
"outputs": [
{
"output_type": "stream",
"text": [
"Epoch 1/10\n",
"29830/29830 - 4s - loss: 1.6802 - acc: 0.5114\n",
"Epoch 2/10\n",
"29830/29830 - 3s - loss: 0.9572 - acc: 0.7250\n",
"Epoch 3/10\n",
"29830/29830 - 3s - loss: 0.7762 - acc: 0.7728\n",
"Epoch 4/10\n",
"29830/29830 - 3s - loss: 0.6661 - acc: 0.8040\n",
"Epoch 5/10\n",
"29830/29830 - 3s - loss: 0.5822 - acc: 0.8275\n",
"Epoch 6/10\n",
"29830/29830 - 3s - loss: 0.5137 - acc: 0.8491\n",
"Epoch 7/10\n",
"29830/29830 - 3s - loss: 0.4582 - acc: 0.8665\n",
"Epoch 8/10\n",
"29830/29830 - 3s - loss: 0.4123 - acc: 0.8784\n",
"Epoch 9/10\n",
"29830/29830 - 3s - loss: 0.3715 - acc: 0.8912\n",
"Epoch 10/10\n",
"29830/29830 - 3s - loss: 0.3372 - acc: 0.9018\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x7fe1dabc6cf8>"
]
},
"metadata": {
"tags": []
},
"execution_count": 24
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "3QBZAwZAPqTA",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "59302994-1c54-4aab-999d-32aad3df7a0e"
},
"source": [
"# Test the model. Get loss and accuracy.\n",
"deep_model_loss, deep_model_accuracy = deep_model.evaluate(\n",
" ing_test, cuis_test, verbose=2)\n",
"print(\n",
" f\"Deep Neural Network - Loss: {deep_model_loss}, Accuracy: {deep_model_accuracy}\")"
],
"execution_count": 25,
"outputs": [
{
"output_type": "stream",
"text": [
"9944/9944 - 1s - loss: 1.2339 - acc: 0.7273\n",
"Deep Neural Network - Loss: 1.2339258739186791, Accuracy: 0.7272727489471436\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "e151jDlVPqTD",
"colab_type": "code",
"colab": {}
},
"source": [
"# Save the model weights\n",
"deep_model.save(\"cuisine_deep_model_trained.h5\")"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "KAeVInQkPqTG",
"colab_type": "code",
"colab": {}
},
"source": [
"# Save the model structure\n",
"with open('deep_model_architecture.json', 'w') as f:\n",
" f.write(deep_model.to_json())"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "GUo8U3BtPqTI",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "dyyRbG82PqTN",
"colab_type": "text"
},
"source": [
""
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment