sofia100/Capstone_Coursera.ipynb

## Capstone_Coursera.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h1>\n",
    "    Capstone Project</h1>\n",
    "    <h3 > - provided by Coursera</h3>\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This project shall see how to use location API to find a new location to live with good neighbours"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Hello Capstone Project Course!\n"
     ]
    }
   ],
   "source": [
    "print (\" Hello Capstone Project Course!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Explore and cluster the neighborhoods in Toronto"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_url=\"https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting BeautifulSoup4\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/66/25/ff030e2437265616a1e9b25ccc864e0371a0bc3adb7c5a404fd661c6f4f6/beautifulsoup4-4.9.1-py3-none-any.whl (115kB)\n",
      "\u001b[K     |████████████████████████████████| 122kB 25.0MB/s eta 0:00:01\n",
      "\u001b[?25hCollecting soupsieve>1.2 (from BeautifulSoup4)\n",
      "  Downloading https://files.pythonhosted.org/packages/6f/8f/457f4a5390eeae1cc3aeab89deb7724c965be841ffca6cfca9197482e470/soupsieve-2.0.1-py3-none-any.whl\n",
      "Installing collected packages: soupsieve, BeautifulSoup4\n",
      "Successfully installed BeautifulSoup4-4.9.1 soupsieve-2.0.1\n",
      "Requirement already satisfied: requests in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (2.23.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests) (2020.4.5.1)\n",
      "Requirement already satisfied: chardet<4,>=3.0.2 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests) (3.0.4)\n",
      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests) (1.25.9)\n",
      "Requirement already satisfied: idna<3,>=2.5 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests) (2.9)\n"
     ]
    }
   ],
   "source": [
    "#install Beautiful Soup and requests for Web Scaping\n",
    "!pip install BeautifulSoup4\n",
    "!pip install requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting package metadata (current_repodata.json): done\n",
      "Solving environment: done\n",
      "\n",
      "# All requested packages already installed.\n",
      "\n",
      "Libraries imported.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np # library to handle data in a vectorized manner\n",
    "\n",
    "import pandas as pd # library for data analsysis\n",
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.max_rows', None)\n",
    "\n",
    "import json # library to handle JSON files\n",
    "\n",
    "!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab\n",
    "from geopy.geocoders import Nominatim # convert an address into latitude and longitude values\n",
    "\n",
    "import requests # library to handle requests\n",
    "from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe\n",
    "\n",
    "# Matplotlib and associated plotting modules\n",
    "import matplotlib.cm as cm\n",
    "import matplotlib.colors as colors\n",
    "\n",
    "# import k-means from clustering stage\n",
    "from sklearn.cluster import KMeans\n",
    "\n",
    "#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab\n",
    "import folium # map rendering library\n",
    "\n",
    "print('Libraries imported.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "#imports\n",
    "from bs4 import BeautifulSoup\n",
    "import requests\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#get html from wiki page and create soup object\n",
    "source = requests.get(data_url)\n",
    "soup = BeautifulSoup(source.text, 'html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "#using soup object, iterate the .wikitable to get the data from the HTML page and store it into a list\n",
    "data = []\n",
    "columns = []\n",
    "table = soup.find(class_='wikitable')\n",
    "for index, tr in enumerate(table.find_all('tr')):\n",
    "    section = []\n",
    "    for td in tr.find_all(['th','td']):\n",
    "        section.append(td.text.rstrip())\n",
    "    \n",
    "    #First row of data is the header\n",
    "    if (index == 0):\n",
    "        columns = section\n",
    "    else:\n",
    "        data.append(section)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Borough</th>\n",
       "      <th>Neighborhood</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M1A</td>\n",
       "      <td>Not assigned</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M2A</td>\n",
       "      <td>Not assigned</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M3A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Parkwoods</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M4A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Victoria Village</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Regent Park, Harbourfront</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Postal Code           Borough               Neighborhood\n",
       "0         M1A      Not assigned                           \n",
       "1         M2A      Not assigned                           \n",
       "2         M3A        North York                  Parkwoods\n",
       "3         M4A        North York           Victoria Village\n",
       "4         M5A  Downtown Toronto  Regent Park, Harbourfront"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#convert list into Pandas DataFrame\n",
    "canada_df = pd.DataFrame(data = data,columns = columns)\n",
    "canada_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Borough</th>\n",
       "      <th>Neighborhood</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M3A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Parkwoods</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M4A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Victoria Village</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Regent Park, Harbourfront</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>M6A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Manor, Lawrence Heights</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>M7A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Queen's Park, Ontario Provincial Government</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Postal Code           Borough                                 Neighborhood\n",
       "2         M3A        North York                                    Parkwoods\n",
       "3         M4A        North York                             Victoria Village\n",
       "4         M5A  Downtown Toronto                    Regent Park, Harbourfront\n",
       "5         M6A        North York             Lawrence Manor, Lawrence Heights\n",
       "6         M7A  Downtown Toronto  Queen's Park, Ontario Provincial Government"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Remove Boroughs that are 'Not assigned'\n",
    "canada_df = canada_df[canada_df['Borough'] != 'Not assigned']\n",
    "canada_df.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Borough</th>\n",
       "      <th>Neighborhood</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Postal Code</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>M3A</th>\n",
       "      <td>North York</td>\n",
       "      <td>Parkwoods</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M4A</th>\n",
       "      <td>North York</td>\n",
       "      <td>Victoria Village</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M5A</th>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Regent Park, Harbourfront</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M6A</th>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Manor, Lawrence Heights</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M7A</th>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Queen's Park, Ontario Provincial Government</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      Borough                                 Neighborhood\n",
       "Postal Code                                                               \n",
       "M3A                North York                                    Parkwoods\n",
       "M4A                North York                             Victoria Village\n",
       "M5A          Downtown Toronto                    Regent Park, Harbourfront\n",
       "M6A                North York             Lawrence Manor, Lawrence Heights\n",
       "M7A          Downtown Toronto  Queen's Park, Ontario Provincial Government"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# More than one neighborhood can exist in one postal code area, combined these into one row with the neighborhoods separated with a comma\n",
    "canada_df[\"Neighborhood\"] = canada_df.groupby(\"Postal Code\")[\"Neighborhood\"].transform(lambda neigh: ', '.join(neigh))\n",
    "\n",
    "#remove duplicates\n",
    "canada_df = canada_df.drop_duplicates()\n",
    "\n",
    "#update index to be postcode if it isn't already\n",
    "if(canada_df.index.name != 'Postal Code'):\n",
    "    canada_df = canada_df.set_index('Postal Code')\n",
    "    \n",
    "canada_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Borough</th>\n",
       "      <th>Neighborhood</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Postal Code</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>M3A</th>\n",
       "      <td>North York</td>\n",
       "      <td>Parkwoods</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M4A</th>\n",
       "      <td>North York</td>\n",
       "      <td>Victoria Village</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M5A</th>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Regent Park, Harbourfront</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M6A</th>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Manor, Lawrence Heights</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M7A</th>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Queen's Park, Ontario Provincial Government</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      Borough                                 Neighborhood\n",
       "Postal Code                                                               \n",
       "M3A                North York                                    Parkwoods\n",
       "M4A                North York                             Victoria Village\n",
       "M5A          Downtown Toronto                    Regent Park, Harbourfront\n",
       "M6A                North York             Lawrence Manor, Lawrence Heights\n",
       "M7A          Downtown Toronto  Queen's Park, Ontario Provincial Government"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough\n",
    "canada_df['Neighborhood'].replace(\"Not assigned\", canada_df[\"Borough\"],inplace=True)\n",
    "canada_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(103, 2)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "canada_df.shape\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python",
   "language": "python",
   "name": "conda-env-python-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"<h1>\n",
	" Capstone Project</h1>\n",
	" <h3 > - provided by Coursera</h3>\n",
	" "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"This project shall see how to use location API to find a new location to live with good neighbours"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" Hello Capstone Project Course!\n"
	]
	}
	],
	"source": [
	"print (\" Hello Capstone Project Course!\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Explore and cluster the neighborhoods in Toronto"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"#### build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"data_url=\"https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Collecting BeautifulSoup4\n",
	"\u001b[?25l Downloading https://files.pythonhosted.org/packages/66/25/ff030e2437265616a1e9b25ccc864e0371a0bc3adb7c5a404fd661c6f4f6/beautifulsoup4-4.9.1-py3-none-any.whl (115kB)\n",
	"\u001b[K \|████████████████████████████████\| 122kB 25.0MB/s eta 0:00:01\n",
	"\u001b[?25hCollecting soupsieve>1.2 (from BeautifulSoup4)\n",
	" Downloading https://files.pythonhosted.org/packages/6f/8f/457f4a5390eeae1cc3aeab89deb7724c965be841ffca6cfca9197482e470/soupsieve-2.0.1-py3-none-any.whl\n",
	"Installing collected packages: soupsieve, BeautifulSoup4\n",
	"Successfully installed BeautifulSoup4-4.9.1 soupsieve-2.0.1\n",
	"Requirement already satisfied: requests in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (2.23.0)\n",
	"Requirement already satisfied: certifi>=2017.4.17 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests) (2020.4.5.1)\n",
	"Requirement already satisfied: chardet<4,>=3.0.2 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests) (3.0.4)\n",
	"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests) (1.25.9)\n",
	"Requirement already satisfied: idna<3,>=2.5 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests) (2.9)\n"
	]
	}
	],
	"source": [
	"#install Beautiful Soup and requests for Web Scaping\n",
	"!pip install BeautifulSoup4\n",
	"!pip install requests"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": false,
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Collecting package metadata (current_repodata.json): done\n",
	"Solving environment: done\n",
	"\n",
	"# All requested packages already installed.\n",
	"\n",
	"Libraries imported.\n"
	]
	}
	],
	"source": [
	"import numpy as np # library to handle data in a vectorized manner\n",
	"\n",
	"import pandas as pd # library for data analsysis\n",
	"pd.set_option('display.max_columns', None)\n",
	"pd.set_option('display.max_rows', None)\n",
	"\n",
	"import json # library to handle JSON files\n",
	"\n",
	"!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab\n",
	"from geopy.geocoders import Nominatim # convert an address into latitude and longitude values\n",
	"\n",
	"import requests # library to handle requests\n",
	"from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe\n",
	"\n",
	"# Matplotlib and associated plotting modules\n",
	"import matplotlib.cm as cm\n",
	"import matplotlib.colors as colors\n",
	"\n",
	"# import k-means from clustering stage\n",
	"from sklearn.cluster import KMeans\n",
	"\n",
	"#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab\n",
	"import folium # map rendering library\n",
	"\n",
	"print('Libraries imported.')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"#imports\n",
	"from bs4 import BeautifulSoup\n",
	"import requests\n",
	"import pandas as pd\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [],
	"source": [
	"#get html from wiki page and create soup object\n",
	"source = requests.get(data_url)\n",
	"soup = BeautifulSoup(source.text, 'html')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [],
	"source": [
	"#using soup object, iterate the .wikitable to get the data from the HTML page and store it into a list\n",
	"data = []\n",
	"columns = []\n",
	"table = soup.find(class_='wikitable')\n",
	"for index, tr in enumerate(table.find_all('tr')):\n",
	" section = []\n",
	" for td in tr.find_all(['th','td']):\n",
	" section.append(td.text.rstrip())\n",
	" \n",
	" #First row of data is the header\n",
	" if (index == 0):\n",
	" columns = section\n",
	" else:\n",
	" data.append(section)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Postal Code</th>\n",
	" <th>Borough</th>\n",
	" <th>Neighborhood</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>M1A</td>\n",
	" <td>Not assigned</td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>M2A</td>\n",
	" <td>Not assigned</td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>M3A</td>\n",
	" <td>North York</td>\n",
	" <td>Parkwoods</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>M4A</td>\n",
	" <td>North York</td>\n",
	" <td>Victoria Village</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>M5A</td>\n",
	" <td>Downtown Toronto</td>\n",
	" <td>Regent Park, Harbourfront</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" Postal Code Borough Neighborhood\n",
	"0 M1A Not assigned \n",
	"1 M2A Not assigned \n",
	"2 M3A North York Parkwoods\n",
	"3 M4A North York Victoria Village\n",
	"4 M5A Downtown Toronto Regent Park, Harbourfront"
	]
	},
	"execution_count": 22,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"#convert list into Pandas DataFrame\n",
	"canada_df = pd.DataFrame(data = data,columns = columns)\n",
	"canada_df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Postal Code</th>\n",
	" <th>Borough</th>\n",
	" <th>Neighborhood</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>M3A</td>\n",
	" <td>North York</td>\n",
	" <td>Parkwoods</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>M4A</td>\n",
	" <td>North York</td>\n",
	" <td>Victoria Village</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>M5A</td>\n",
	" <td>Downtown Toronto</td>\n",
	" <td>Regent Park, Harbourfront</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>M6A</td>\n",
	" <td>North York</td>\n",
	" <td>Lawrence Manor, Lawrence Heights</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <td>M7A</td>\n",
	" <td>Downtown Toronto</td>\n",
	" <td>Queen's Park, Ontario Provincial Government</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" Postal Code Borough Neighborhood\n",
	"2 M3A North York Parkwoods\n",
	"3 M4A North York Victoria Village\n",
	"4 M5A Downtown Toronto Regent Park, Harbourfront\n",
	"5 M6A North York Lawrence Manor, Lawrence Heights\n",
	"6 M7A Downtown Toronto Queen's Park, Ontario Provincial Government"
	]
	},
	"execution_count": 23,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"#Remove Boroughs that are 'Not assigned'\n",
	"canada_df = canada_df[canada_df['Borough'] != 'Not assigned']\n",
	"canada_df.head()\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Borough</th>\n",
	" <th>Neighborhood</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>Postal Code</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>M3A</th>\n",
	" <td>North York</td>\n",
	" <td>Parkwoods</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>M4A</th>\n",
	" <td>North York</td>\n",
	" <td>Victoria Village</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>M5A</th>\n",
	" <td>Downtown Toronto</td>\n",
	" <td>Regent Park, Harbourfront</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>M6A</th>\n",
	" <td>North York</td>\n",
	" <td>Lawrence Manor, Lawrence Heights</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>M7A</th>\n",
	" <td>Downtown Toronto</td>\n",
	" <td>Queen's Park, Ontario Provincial Government</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" Borough Neighborhood\n",
	"Postal Code \n",
	"M3A North York Parkwoods\n",
	"M4A North York Victoria Village\n",
	"M5A Downtown Toronto Regent Park, Harbourfront\n",
	"M6A North York Lawrence Manor, Lawrence Heights\n",
	"M7A Downtown Toronto Queen's Park, Ontario Provincial Government"
	]
	},
	"execution_count": 24,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# More than one neighborhood can exist in one postal code area, combined these into one row with the neighborhoods separated with a comma\n",
	"canada_df[\"Neighborhood\"] = canada_df.groupby(\"Postal Code\")[\"Neighborhood\"].transform(lambda neigh: ', '.join(neigh))\n",
	"\n",
	"#remove duplicates\n",
	"canada_df = canada_df.drop_duplicates()\n",
	"\n",
	"#update index to be postcode if it isn't already\n",
	"if(canada_df.index.name != 'Postal Code'):\n",
	" canada_df = canada_df.set_index('Postal Code')\n",
	" \n",
	"canada_df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Borough</th>\n",
	" <th>Neighborhood</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>Postal Code</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>M3A</th>\n",
	" <td>North York</td>\n",
	" <td>Parkwoods</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>M4A</th>\n",
	" <td>North York</td>\n",
	" <td>Victoria Village</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>M5A</th>\n",
	" <td>Downtown Toronto</td>\n",
	" <td>Regent Park, Harbourfront</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>M6A</th>\n",
	" <td>North York</td>\n",
	" <td>Lawrence Manor, Lawrence Heights</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>M7A</th>\n",
	" <td>Downtown Toronto</td>\n",
	" <td>Queen's Park, Ontario Provincial Government</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" Borough Neighborhood\n",
	"Postal Code \n",
	"M3A North York Parkwoods\n",
	"M4A North York Victoria Village\n",
	"M5A Downtown Toronto Regent Park, Harbourfront\n",
	"M6A North York Lawrence Manor, Lawrence Heights\n",
	"M7A Downtown Toronto Queen's Park, Ontario Provincial Government"
	]
	},
	"execution_count": 26,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough\n",
	"canada_df['Neighborhood'].replace(\"Not assigned\", canada_df[\"Borough\"],inplace=True)\n",
	"canada_df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 27,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(103, 2)"
	]
	},
	"execution_count": 27,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"canada_df.shape\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python",
	"language": "python",
	"name": "conda-env-python-py"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.10"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}