Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save javigonzalez6/6df43635aa24ae5295116473c02dc44d to your computer and use it in GitHub Desktop.
Save javigonzalez6/6df43635aa24ae5295116473c02dc44d to your computer and use it in GitHub Desktop.
Created on Skills Network Labs
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# In this notebook we will be clustering Neighborhoods of Toronto"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Retrieve the data and create a Dataframe"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Scraping the Wikipedia page and creating the Dataframe"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np # library to handle data in a vectorized manner\n",
"\n",
"import pandas as pd # library for data analsysis\n",
"pd.set_option('display.max_columns', None)\n",
"pd.set_option('display.max_rows', None)\n",
"\n",
"import json # library to handle JSON files\n",
"\n",
"#!conda install -c conda-forge geopy --yes \n",
"from geopy.geocoders import Nominatim # convert an address into latitude and longitude values\n",
"\n",
"import requests # library to handle requests\n",
"from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe\n",
"\n",
"# Matplotlib and associated plotting modules\n",
"import matplotlib.cm as cm\n",
"import matplotlib.colors as colors\n",
"\n",
"# import k-means from clustering stage\n",
"from sklearn.cluster import KMeans\n",
"\n",
"#!conda install -c conda-forge folium=0.5.0 --yes\n",
"import folium # map rendering library"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PostalCode</th>\n",
" <th>Borough</th>\n",
" <th>Neighborhood</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [PostalCode, Borough, Neighborhood]\n",
"Index: []"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# define the dataframe columns\n",
"column_names = ['PostalCode', 'Borough', 'Neighborhood'] \n",
"\n",
"# instantiate the dataframe\n",
"df = pd.DataFrame(columns=column_names)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# import the library we use to open URLs\n",
"import urllib.request\n",
"# specify which URL/web page we are going to be scraping\n",
"url = \"https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M\"\n",
"# open the url using urllib.request and put the HTML into the page variable\n",
"page = urllib.request.urlopen(url)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# import the BeautifulSoup library so we can parse HTML and XML documents\n",
"#!pip install bs4"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# import the BeautifulSoup library so we can parse HTML and XML documents\n",
"from bs4 import BeautifulSoup\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# parse the HTML from our URL into the BeautifulSoup parse tree format\n",
"soup = BeautifulSoup(page)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#print(soup.prettify())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# use the 'find_all' function to bring back all instances of the 'table' tag in the HTML and store in 'all_tables' variable\n",
"all_tables=soup.find_all(\"table\")\n",
"right_table=soup.find('table',class_='wikitable sortable')\n",
"#right_table"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"A=[]\n",
"B=[]\n",
"C=[]\n",
"for row in right_table.findAll('tr'):\n",
" cells=row.findAll('td')\n",
" if len(cells)==3:\n",
" A.append(cells[0].find(text=True))\n",
" B.append(cells[1].find(text=True))\n",
" C.append(cells[2].find(text=True))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df['PostalCode']=A\n",
"df['Borough']=B\n",
"df['Neighborhood']=C\n",
"df = df.replace(r'\\n',' ', regex=True)\n",
"indexes=df[df['Borough']=='Not assigned '].index\n",
"df.drop(indexes,axis=0,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PostalCode</th>\n",
" <th>Borough</th>\n",
" <th>Neighborhood</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M3A</td>\n",
" <td>North York</td>\n",
" <td>Parkwoods</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M4A</td>\n",
" <td>North York</td>\n",
" <td>Victoria Village</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Regent Park, Harbourfront</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M6A</td>\n",
" <td>North York</td>\n",
" <td>Lawrence Manor, Lawrence Heights</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M7A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Queen's Park, Ontario Provincial Government</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>M9A</td>\n",
" <td>Etobicoke</td>\n",
" <td>Islington Avenue, Humber Valley Village</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>M1B</td>\n",
" <td>Scarborough</td>\n",
" <td>Malvern, Rouge</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>M3B</td>\n",
" <td>North York</td>\n",
" <td>Don Mills</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>M4B</td>\n",
" <td>East York</td>\n",
" <td>Parkview Hill, Woodbine Gardens</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>M5B</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Garden District, Ryerson</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>M6B</td>\n",
" <td>North York</td>\n",
" <td>Glencairn</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>M9B</td>\n",
" <td>Etobicoke</td>\n",
" <td>West Deane Park, Princess Gardens, Martin Grov...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>M1C</td>\n",
" <td>Scarborough</td>\n",
" <td>Rouge Hill, Port Union, Highland Creek</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>M3C</td>\n",
" <td>North York</td>\n",
" <td>Don Mills</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>M4C</td>\n",
" <td>East York</td>\n",
" <td>Woodbine Heights</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>M5C</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>St. James Town</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>M6C</td>\n",
" <td>York</td>\n",
" <td>Humewood-Cedarvale</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>M9C</td>\n",
" <td>Etobicoke</td>\n",
" <td>Eringate, Bloordale Gardens, Old Burnhamthorpe...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>M1E</td>\n",
" <td>Scarborough</td>\n",
" <td>Guildwood, Morningside, West Hill</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>M4E</td>\n",
" <td>East Toronto</td>\n",
" <td>The Beaches</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>M5E</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Berczy Park</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>M6E</td>\n",
" <td>York</td>\n",
" <td>Caledonia-Fairbanks</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>M1G</td>\n",
" <td>Scarborough</td>\n",
" <td>Woburn</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>M4G</td>\n",
" <td>East York</td>\n",
" <td>Leaside</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>M5G</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Central Bay Street</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>M6G</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Christie</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>M1H</td>\n",
" <td>Scarborough</td>\n",
" <td>Cedarbrae</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>M2H</td>\n",
" <td>North York</td>\n",
" <td>Hillcrest Village</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>M3H</td>\n",
" <td>North York</td>\n",
" <td>Bathurst Manor, Wilson Heights, Downsview North</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>M4H</td>\n",
" <td>East York</td>\n",
" <td>Thorncliffe Park</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>M5H</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Richmond, Adelaide, King</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>M6H</td>\n",
" <td>West Toronto</td>\n",
" <td>Dufferin, Dovercourt Village</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>M1J</td>\n",
" <td>Scarborough</td>\n",
" <td>Scarborough Village</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>M2J</td>\n",
" <td>North York</td>\n",
" <td>Fairview, Henry Farm, Oriole</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>M3J</td>\n",
" <td>North York</td>\n",
" <td>Northwood Park, York University</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>M4J</td>\n",
" <td>East York</td>\n",
" <td>East Toronto, Broadview North (Old East York)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>M5J</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Harbourfront East, Union Station, Toronto Isla...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>M6J</td>\n",
" <td>West Toronto</td>\n",
" <td>Little Portugal, Trinity</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>M1K</td>\n",
" <td>Scarborough</td>\n",
" <td>Kennedy Park, Ionview, East Birchmount Park</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>M2K</td>\n",
" <td>North York</td>\n",
" <td>Bayview Village</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>M3K</td>\n",
" <td>North York</td>\n",
" <td>Downsview</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>M4K</td>\n",
" <td>East Toronto</td>\n",
" <td>The Danforth West, Riverdale</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>M5K</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Toronto Dominion Centre, Design Exchange</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>M6K</td>\n",
" <td>West Toronto</td>\n",
" <td>Brockton, Parkdale Village, Exhibition Place</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>M1L</td>\n",
" <td>Scarborough</td>\n",
" <td>Golden Mile, Clairlea, Oakridge</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>M2L</td>\n",
" <td>North York</td>\n",
" <td>York Mills, Silver Hills</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>M3L</td>\n",
" <td>North York</td>\n",
" <td>Downsview</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>M4L</td>\n",
" <td>East Toronto</td>\n",
" <td>India Bazaar, The Beaches West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>M5L</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Commerce Court, Victoria Hotel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>M6L</td>\n",
" <td>North York</td>\n",
" <td>North Park, Maple Leaf Park, Upwood Park</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>M9L</td>\n",
" <td>North York</td>\n",
" <td>Humber Summit</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>M1M</td>\n",
" <td>Scarborough</td>\n",
" <td>Cliffside, Cliffcrest, Scarborough Village West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>M2M</td>\n",
" <td>North York</td>\n",
" <td>Willowdale, Newtonbrook</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>M3M</td>\n",
" <td>North York</td>\n",
" <td>Downsview</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>M4M</td>\n",
" <td>East Toronto</td>\n",
" <td>Studio District</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>M5M</td>\n",
" <td>North York</td>\n",
" <td>Bedford Park, Lawrence Manor East</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>M6M</td>\n",
" <td>York</td>\n",
" <td>Del Ray, Mount Dennis, Keelsdale and Silverthorn</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>M9M</td>\n",
" <td>North York</td>\n",
" <td>Humberlea, Emery</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>M1N</td>\n",
" <td>Scarborough</td>\n",
" <td>Birch Cliff, Cliffside West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>M2N</td>\n",
" <td>North York</td>\n",
" <td>Willowdale, Willowdale East</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>M3N</td>\n",
" <td>North York</td>\n",
" <td>Downsview</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>M4N</td>\n",
" <td>Central Toronto</td>\n",
" <td>Lawrence Park</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>M5N</td>\n",
" <td>Central Toronto</td>\n",
" <td>Roselawn</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63</th>\n",
" <td>M6N</td>\n",
" <td>York</td>\n",
" <td>Runnymede, The Junction North</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>M9N</td>\n",
" <td>York</td>\n",
" <td>Weston</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>M1P</td>\n",
" <td>Scarborough</td>\n",
" <td>Dorset Park, Wexford Heights, Scarborough Town...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>M2P</td>\n",
" <td>North York</td>\n",
" <td>York Mills West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>M4P</td>\n",
" <td>Central Toronto</td>\n",
" <td>Davisville North</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>M5P</td>\n",
" <td>Central Toronto</td>\n",
" <td>Forest Hill North &amp; West, Forest Hill Road Park</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69</th>\n",
" <td>M6P</td>\n",
" <td>West Toronto</td>\n",
" <td>High Park, The Junction South</td>\n",
" </tr>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>M9P</td>\n",
" <td>Etobicoke</td>\n",
" <td>Westmount</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>M1R</td>\n",
" <td>Scarborough</td>\n",
" <td>Wexford, Maryvale</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>M2R</td>\n",
" <td>North York</td>\n",
" <td>Willowdale, Willowdale West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>M4R</td>\n",
" <td>Central Toronto</td>\n",
" <td>North Toronto West, Lawrence Park</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>M5R</td>\n",
" <td>Central Toronto</td>\n",
" <td>The Annex, North Midtown, Yorkville</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>M6R</td>\n",
" <td>West Toronto</td>\n",
" <td>Parkdale, Roncesvalles</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>M7R</td>\n",
" <td>Mississauga</td>\n",
" <td>Canada Post Gateway Processing Centre</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>M9R</td>\n",
" <td>Etobicoke</td>\n",
" <td>Kingsview Village, St. Phillips, Martin Grove ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>M1S</td>\n",
" <td>Scarborough</td>\n",
" <td>Agincourt</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>M4S</td>\n",
" <td>Central Toronto</td>\n",
" <td>Davisville</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>M5S</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>University of Toronto, Harbord</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>M6S</td>\n",
" <td>West Toronto</td>\n",
" <td>Runnymede, Swansea</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>M1T</td>\n",
" <td>Scarborough</td>\n",
" <td>Clarks Corners, Tam O'Shanter, Sullivan</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>M4T</td>\n",
" <td>Central Toronto</td>\n",
" <td>Moore Park, Summerhill East</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>M5T</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Kensington Market, Chinatown, Grange Park</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>M1V</td>\n",
" <td>Scarborough</td>\n",
" <td>Milliken, Agincourt North, Steeles East, L'Amo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>M4V</td>\n",
" <td>Central Toronto</td>\n",
" <td>Summerhill West, Rathnelly, South Hill, Forest...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>M5V</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>CN Tower, King and Spadina, Railway Lands, Har...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>M8V</td>\n",
" <td>Etobicoke</td>\n",
" <td>New Toronto, Mimico South, Humber Bay Shores</td>\n",
" </tr>\n",
" <tr>\n",
" <th>89</th>\n",
" <td>M9V</td>\n",
" <td>Etobicoke</td>\n",
" <td>South Steeles, Silverstone, Humbergate, Jamest...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>90</th>\n",
" <td>M1W</td>\n",
" <td>Scarborough</td>\n",
" <td>Steeles West, L'Amoreaux West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>91</th>\n",
" <td>M4W</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Rosedale</td>\n",
" </tr>\n",
" <tr>\n",
" <th>92</th>\n",
" <td>M5W</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Stn A PO Boxes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>93</th>\n",
" <td>M8W</td>\n",
" <td>Etobicoke</td>\n",
" <td>Alderwood, Long Branch</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94</th>\n",
" <td>M9W</td>\n",
" <td>Etobicoke</td>\n",
" <td>Northwest, West Humber - Clairville</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>M1X</td>\n",
" <td>Scarborough</td>\n",
" <td>Upper Rouge</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>M4X</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>St. James Town, Cabbagetown</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>M5X</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>First Canadian Place, Underground city</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>M8X</td>\n",
" <td>Etobicoke</td>\n",
" <td>The Kingsway, Montgomery Road, Old Mill North</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>M4Y</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Church and Wellesley</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100</th>\n",
" <td>M7Y</td>\n",
" <td>East Toronto</td>\n",
" <td>Business reply mail Processing Centre, South C...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>101</th>\n",
" <td>M8Y</td>\n",
" <td>Etobicoke</td>\n",
" <td>Old Mill South, King's Mill Park, Sunnylea, Hu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>M8Z</td>\n",
" <td>Etobicoke</td>\n",
" <td>Mimico NW, The Queensway West, South of Bloor,...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PostalCode Borough \\\n",
"0 M3A North York \n",
"1 M4A North York \n",
"2 M5A Downtown Toronto \n",
"3 M6A North York \n",
"4 M7A Downtown Toronto \n",
"5 M9A Etobicoke \n",
"6 M1B Scarborough \n",
"7 M3B North York \n",
"8 M4B East York \n",
"9 M5B Downtown Toronto \n",
"10 M6B North York \n",
"11 M9B Etobicoke \n",
"12 M1C Scarborough \n",
"13 M3C North York \n",
"14 M4C East York \n",
"15 M5C Downtown Toronto \n",
"16 M6C York \n",
"17 M9C Etobicoke \n",
"18 M1E Scarborough \n",
"19 M4E East Toronto \n",
"20 M5E Downtown Toronto \n",
"21 M6E York \n",
"22 M1G Scarborough \n",
"23 M4G East York \n",
"24 M5G Downtown Toronto \n",
"25 M6G Downtown Toronto \n",
"26 M1H Scarborough \n",
"27 M2H North York \n",
"28 M3H North York \n",
"29 M4H East York \n",
"30 M5H Downtown Toronto \n",
"31 M6H West Toronto \n",
"32 M1J Scarborough \n",
"33 M2J North York \n",
"34 M3J North York \n",
"35 M4J East York \n",
"36 M5J Downtown Toronto \n",
"37 M6J West Toronto \n",
"38 M1K Scarborough \n",
"39 M2K North York \n",
"40 M3K North York \n",
"41 M4K East Toronto \n",
"42 M5K Downtown Toronto \n",
"43 M6K West Toronto \n",
"44 M1L Scarborough \n",
"45 M2L North York \n",
"46 M3L North York \n",
"47 M4L East Toronto \n",
"48 M5L Downtown Toronto \n",
"49 M6L North York \n",
"50 M9L North York \n",
"51 M1M Scarborough \n",
"52 M2M North York \n",
"53 M3M North York \n",
"54 M4M East Toronto \n",
"55 M5M North York \n",
"56 M6M York \n",
"57 M9M North York \n",
"58 M1N Scarborough \n",
"59 M2N North York \n",
"60 M3N North York \n",
"61 M4N Central Toronto \n",
"62 M5N Central Toronto \n",
"63 M6N York \n",
"64 M9N York \n",
"65 M1P Scarborough \n",
"66 M2P North York \n",
"67 M4P Central Toronto \n",
"68 M5P Central Toronto \n",
"69 M6P West Toronto \n",
"70 M9P Etobicoke \n",
"71 M1R Scarborough \n",
"72 M2R North York \n",
"73 M4R Central Toronto \n",
"74 M5R Central Toronto \n",
"75 M6R West Toronto \n",
"76 M7R Mississauga \n",
"77 M9R Etobicoke \n",
"78 M1S Scarborough \n",
"79 M4S Central Toronto \n",
"80 M5S Downtown Toronto \n",
"81 M6S West Toronto \n",
"82 M1T Scarborough \n",
"83 M4T Central Toronto \n",
"84 M5T Downtown Toronto \n",
"85 M1V Scarborough \n",
"86 M4V Central Toronto \n",
"87 M5V Downtown Toronto \n",
"88 M8V Etobicoke \n",
"89 M9V Etobicoke \n",
"90 M1W Scarborough \n",
"91 M4W Downtown Toronto \n",
"92 M5W Downtown Toronto \n",
"93 M8W Etobicoke \n",
"94 M9W Etobicoke \n",
"95 M1X Scarborough \n",
"96 M4X Downtown Toronto \n",
"97 M5X Downtown Toronto \n",
"98 M8X Etobicoke \n",
"99 M4Y Downtown Toronto \n",
"100 M7Y East Toronto \n",
"101 M8Y Etobicoke \n",
"102 M8Z Etobicoke \n",
"\n",
" Neighborhood \n",
"0 Parkwoods \n",
"1 Victoria Village \n",
"2 Regent Park, Harbourfront \n",
"3 Lawrence Manor, Lawrence Heights \n",
"4 Queen's Park, Ontario Provincial Government \n",
"5 Islington Avenue, Humber Valley Village \n",
"6 Malvern, Rouge \n",
"7 Don Mills \n",
"8 Parkview Hill, Woodbine Gardens \n",
"9 Garden District, Ryerson \n",
"10 Glencairn \n",
"11 West Deane Park, Princess Gardens, Martin Grov... \n",
"12 Rouge Hill, Port Union, Highland Creek \n",
"13 Don Mills \n",
"14 Woodbine Heights \n",
"15 St. James Town \n",
"16 Humewood-Cedarvale \n",
"17 Eringate, Bloordale Gardens, Old Burnhamthorpe... \n",
"18 Guildwood, Morningside, West Hill \n",
"19 The Beaches \n",
"20 Berczy Park \n",
"21 Caledonia-Fairbanks \n",
"22 Woburn \n",
"23 Leaside \n",
"24 Central Bay Street \n",
"25 Christie \n",
"26 Cedarbrae \n",
"27 Hillcrest Village \n",
"28 Bathurst Manor, Wilson Heights, Downsview North \n",
"29 Thorncliffe Park \n",
"30 Richmond, Adelaide, King \n",
"31 Dufferin, Dovercourt Village \n",
"32 Scarborough Village \n",
"33 Fairview, Henry Farm, Oriole \n",
"34 Northwood Park, York University \n",
"35 East Toronto, Broadview North (Old East York) \n",
"36 Harbourfront East, Union Station, Toronto Isla... \n",
"37 Little Portugal, Trinity \n",
"38 Kennedy Park, Ionview, East Birchmount Park \n",
"39 Bayview Village \n",
"40 Downsview \n",
"41 The Danforth West, Riverdale \n",
"42 Toronto Dominion Centre, Design Exchange \n",
"43 Brockton, Parkdale Village, Exhibition Place \n",
"44 Golden Mile, Clairlea, Oakridge \n",
"45 York Mills, Silver Hills \n",
"46 Downsview \n",
"47 India Bazaar, The Beaches West \n",
"48 Commerce Court, Victoria Hotel \n",
"49 North Park, Maple Leaf Park, Upwood Park \n",
"50 Humber Summit \n",
"51 Cliffside, Cliffcrest, Scarborough Village West \n",
"52 Willowdale, Newtonbrook \n",
"53 Downsview \n",
"54 Studio District \n",
"55 Bedford Park, Lawrence Manor East \n",
"56 Del Ray, Mount Dennis, Keelsdale and Silverthorn \n",
"57 Humberlea, Emery \n",
"58 Birch Cliff, Cliffside West \n",
"59 Willowdale, Willowdale East \n",
"60 Downsview \n",
"61 Lawrence Park \n",
"62 Roselawn \n",
"63 Runnymede, The Junction North \n",
"64 Weston \n",
"65 Dorset Park, Wexford Heights, Scarborough Town... \n",
"66 York Mills West \n",
"67 Davisville North \n",
"68 Forest Hill North & West, Forest Hill Road Park \n",
"69 High Park, The Junction South \n",
"70 Westmount \n",
"71 Wexford, Maryvale \n",
"72 Willowdale, Willowdale West \n",
"73 North Toronto West, Lawrence Park \n",
"74 The Annex, North Midtown, Yorkville \n",
"75 Parkdale, Roncesvalles \n",
"76 Canada Post Gateway Processing Centre \n",
"77 Kingsview Village, St. Phillips, Martin Grove ... \n",
"78 Agincourt \n",
"79 Davisville \n",
"80 University of Toronto, Harbord \n",
"81 Runnymede, Swansea \n",
"82 Clarks Corners, Tam O'Shanter, Sullivan \n",
"83 Moore Park, Summerhill East \n",
"84 Kensington Market, Chinatown, Grange Park \n",
"85 Milliken, Agincourt North, Steeles East, L'Amo... \n",
"86 Summerhill West, Rathnelly, South Hill, Forest... \n",
"87 CN Tower, King and Spadina, Railway Lands, Har... \n",
"88 New Toronto, Mimico South, Humber Bay Shores \n",
"89 South Steeles, Silverstone, Humbergate, Jamest... \n",
"90 Steeles West, L'Amoreaux West \n",
"91 Rosedale \n",
"92 Stn A PO Boxes \n",
"93 Alderwood, Long Branch \n",
"94 Northwest, West Humber - Clairville \n",
"95 Upper Rouge \n",
"96 St. James Town, Cabbagetown \n",
"97 First Canadian Place, Underground city \n",
"98 The Kingsway, Montgomery Road, Old Mill North \n",
"99 Church and Wellesley \n",
"100 Business reply mail Processing Centre, South C... \n",
"101 Old Mill South, King's Mill Park, Sunnylea, Hu... \n",
"102 Mimico NW, The Queensway West, South of Bloor,... "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"df=df.groupby('PostalCode').agg({'Borough':'first', \n",
" 'Neighborhood': ', '.join, \n",
" }).reset_index()\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(103, 3)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Get the latitude and the longitude coordinates of each neighborhood."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"#! pip install geocoder\n",
"import geocoder # import geocoder"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PostalCode</th>\n",
" <th>Borough</th>\n",
" <th>Neighborhood</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M1B</td>\n",
" <td>Scarborough</td>\n",
" <td>Malvern, Rouge</td>\n",
" <td>&lt;built-in function zeros&gt;</td>\n",
" <td>&lt;built-in function zeros&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M1C</td>\n",
" <td>Scarborough</td>\n",
" <td>Rouge Hill, Port Union, Highland Creek</td>\n",
" <td>&lt;built-in function zeros&gt;</td>\n",
" <td>&lt;built-in function zeros&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M1E</td>\n",
" <td>Scarborough</td>\n",
" <td>Guildwood, Morningside, West Hill</td>\n",
" <td>&lt;built-in function zeros&gt;</td>\n",
" <td>&lt;built-in function zeros&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M1G</td>\n",
" <td>Scarborough</td>\n",
" <td>Woburn</td>\n",
" <td>&lt;built-in function zeros&gt;</td>\n",
" <td>&lt;built-in function zeros&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M1H</td>\n",
" <td>Scarborough</td>\n",
" <td>Cedarbrae</td>\n",
" <td>&lt;built-in function zeros&gt;</td>\n",
" <td>&lt;built-in function zeros&gt;</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PostalCode Borough Neighborhood \\\n",
"0 M1B Scarborough Malvern, Rouge \n",
"1 M1C Scarborough Rouge Hill, Port Union, Highland Creek \n",
"2 M1E Scarborough Guildwood, Morningside, West Hill \n",
"3 M1G Scarborough Woburn \n",
"4 M1H Scarborough Cedarbrae \n",
"\n",
" latitude longitude \n",
"0 <built-in function zeros> <built-in function zeros> \n",
"1 <built-in function zeros> <built-in function zeros> \n",
"2 <built-in function zeros> <built-in function zeros> \n",
"3 <built-in function zeros> <built-in function zeros> \n",
"4 <built-in function zeros> <built-in function zeros> "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# initialize your variable to None\n",
"lat_lng_coords = None\n",
"df['latitude']=np.zeros\n",
"df['longitude']=np.zeros\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Postal Code</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M1B</td>\n",
" <td>43.806686</td>\n",
" <td>-79.194353</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M1C</td>\n",
" <td>43.784535</td>\n",
" <td>-79.160497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M1E</td>\n",
" <td>43.763573</td>\n",
" <td>-79.188711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M1G</td>\n",
" <td>43.770992</td>\n",
" <td>-79.216917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M1H</td>\n",
" <td>43.773136</td>\n",
" <td>-79.239476</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Postal Code Latitude Longitude\n",
"0 M1B 43.806686 -79.194353\n",
"1 M1C 43.784535 -79.160497\n",
"2 M1E 43.763573 -79.188711\n",
"3 M1G 43.770992 -79.216917\n",
"4 M1H 43.773136 -79.239476"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"url='https://cocl.us/Geospatial_data'\n",
"lonlog_df=pd.read_csv(url)\n",
"lonlog_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"df['latitude']=lonlog_df['Latitude']\n",
"df['longitude']=lonlog_df['Longitude']"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PostalCode</th>\n",
" <th>Borough</th>\n",
" <th>Neighborhood</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M1B</td>\n",
" <td>Scarborough</td>\n",
" <td>Malvern, Rouge</td>\n",
" <td>43.806686</td>\n",
" <td>-79.194353</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M1C</td>\n",
" <td>Scarborough</td>\n",
" <td>Rouge Hill, Port Union, Highland Creek</td>\n",
" <td>43.784535</td>\n",
" <td>-79.160497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M1E</td>\n",
" <td>Scarborough</td>\n",
" <td>Guildwood, Morningside, West Hill</td>\n",
" <td>43.763573</td>\n",
" <td>-79.188711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M1G</td>\n",
" <td>Scarborough</td>\n",
" <td>Woburn</td>\n",
" <td>43.770992</td>\n",
" <td>-79.216917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M1H</td>\n",
" <td>Scarborough</td>\n",
" <td>Cedarbrae</td>\n",
" <td>43.773136</td>\n",
" <td>-79.239476</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PostalCode Borough Neighborhood \\\n",
"0 M1B Scarborough Malvern, Rouge \n",
"1 M1C Scarborough Rouge Hill, Port Union, Highland Creek \n",
"2 M1E Scarborough Guildwood, Morningside, West Hill \n",
"3 M1G Scarborough Woburn \n",
"4 M1H Scarborough Cedarbrae \n",
"\n",
" latitude longitude \n",
"0 43.806686 -79.194353 \n",
"1 43.784535 -79.160497 \n",
"2 43.763573 -79.188711 \n",
"3 43.770992 -79.216917 \n",
"4 43.773136 -79.239476 "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clustering of Toronto neighborhoods"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"df_toronto=df.iloc[df[df['Borough'].str.contains('Toronto')].index]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PostalCode</th>\n",
" <th>Borough</th>\n",
" <th>Neighborhood</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>M4E</td>\n",
" <td>East Toronto</td>\n",
" <td>The Beaches</td>\n",
" <td>43.676357</td>\n",
" <td>-79.293031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>M4K</td>\n",
" <td>East Toronto</td>\n",
" <td>The Danforth West, Riverdale</td>\n",
" <td>43.679557</td>\n",
" <td>-79.352188</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>M4L</td>\n",
" <td>East Toronto</td>\n",
" <td>India Bazaar, The Beaches West</td>\n",
" <td>43.668999</td>\n",
" <td>-79.315572</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>M4M</td>\n",
" <td>East Toronto</td>\n",
" <td>Studio District</td>\n",
" <td>43.659526</td>\n",
" <td>-79.340923</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>M4N</td>\n",
" <td>Central Toronto</td>\n",
" <td>Lawrence Park</td>\n",
" <td>43.728020</td>\n",
" <td>-79.388790</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PostalCode Borough Neighborhood latitude \\\n",
"37 M4E East Toronto The Beaches 43.676357 \n",
"41 M4K East Toronto The Danforth West, Riverdale 43.679557 \n",
"42 M4L East Toronto India Bazaar, The Beaches West 43.668999 \n",
"43 M4M East Toronto Studio District 43.659526 \n",
"44 M4N Central Toronto Lawrence Park 43.728020 \n",
"\n",
" longitude \n",
"37 -79.293031 \n",
"41 -79.352188 \n",
"42 -79.315572 \n",
"43 -79.340923 \n",
"44 -79.388790 "
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_toronto.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run *k*-means to cluster the neighborhood into 5 clusters."
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "could not convert string to float: 'East Toronto '",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-40-9ceeba66911b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# run k-means clustering\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mkmeans\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKMeans\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_clusters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkclusters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtoronto_grouped_clustering\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;31m# check cluster labels generated for each row in the dataframe\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/sklearn/cluster/k_means_.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 969\u001b[0m \u001b[0mtol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy_x\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy_x\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malgorithm\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0malgorithm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 971\u001b[0;31m return_n_iter=True)\n\u001b[0m\u001b[1;32m 972\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 973\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/sklearn/cluster/k_means_.py\u001b[0m in \u001b[0;36mk_means\u001b[0;34m(X, n_clusters, sample_weight, init, precompute_distances, n_init, max_iter, verbose, tol, random_state, copy_x, n_jobs, algorithm, return_n_iter)\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[0morder\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"C\"\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcopy_x\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 310\u001b[0m X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32],\n\u001b[0;32m--> 311\u001b[0;31m order=order, copy=copy_x)\n\u001b[0m\u001b[1;32m 312\u001b[0m \u001b[0;31m# verify that the number of samples given is larger than k\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 313\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_num_samples\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mn_clusters\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 526\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 527\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 528\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 529\u001b[0m raise ValueError(\"Complex data not supported\\n\"\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/numpy/core/_asarray.py\u001b[0m in \u001b[0;36masarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m \"\"\"\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'East Toronto '"
]
}
],
"source": [
"# set number of clusters\n",
"kclusters = 5\n",
"\n",
"toronto_grouped_clustering = df_toronto.drop('Neighborhood', 1)\n",
"\n",
"# run k-means clustering\n",
"kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)\n",
"\n",
"# check cluster labels generated for each row in the dataframe\n",
"kmeans.labels_[0:10] "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python",
"language": "python",
"name": "conda-env-python-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment