Skip to content

Instantly share code, notes, and snippets.

@rquintel
Created July 14, 2019 23:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rquintel/ed5c60c10a750edf6fd0b538cbb46085 to your computer and use it in GitHub Desktop.
Save rquintel/ed5c60c10a750edf6fd0b538cbb46085 to your computer and use it in GitHub Desktop.
Created on Cognitive Class Labs
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"# Import data\n",
"link = \"https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M\"\n",
"html_table = pd.read_html(link,header=0)\n",
"postal_codes = pd.DataFrame(html_table[0])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"# Cleaning\n",
"postal_codes.replace(\"Not assigned\", np.nan, inplace = True)\n",
"postal_codes.dropna(subset=[\"Borough\"], axis=0, inplace = True)\n",
"postal_codes.Neighbourhood.fillna(postal_codes.Borough, inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"# Grouping\n",
"postal_codes = postal_codes.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda x: ', '.join(x)).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(103, 3)"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"postal_codes.shape"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"# Import Geo Spatial Data\n",
"geo_spatial_data = pd.read_csv(\"http://cocl.us/Geospatial_data\")"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"toronto_locations = postal_codes\n",
"\n",
"# merge postal_codes with geo_spatial_data to add latitude/longitude for each postal code\n",
"toronto_locations = toronto_locations.join(geo_spatial_data.set_index('Postal Code'), on='Postcode')"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"# Import necessary libraries\n",
"import json # library to handle JSON files\n",
"import requests # library to handle requests\n",
"from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe\n",
"\n",
"# import k-means from clustering stage\n",
"from sklearn.cluster import KMeans\n",
"\n",
"#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab\n",
"from geopy.geocoders import Nominatim # convert an address into latitude and longitude values\n",
"\n",
"#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab\n",
"import folium # map rendering library\n",
"\n",
"# Matplotlib and associated plotting modules\n",
"import matplotlib.cm as cm\n",
"import matplotlib.colors as colors"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"# FourSquare parameters\n",
"CLIENT_ID = 'HKB1ULVBQPKLEH30AUOZ2V2I4YXF5EDOL1WIWT0EMECCQ1CW' # your Foursquare ID\n",
"CLIENT_SECRET = 'FFIUIMUUBC42AIDQ5UG4OYH54234PDCT4OANAHB1SNCJE0HB' # your Foursquare Secret\n",
"VERSION = '20180604'\n",
"\n",
"LIMIT = 100"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"# Function to retrieve nearby venues for each postal code area\n",
"def getNearbyVenues(names, latitudes, longitudes, radius=500):\n",
" \n",
" venues_list=[]\n",
" for name, lat, lng in zip(names, latitudes, longitudes):\n",
" print(name)\n",
" \n",
" # create the API request URL\n",
" url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(\n",
" CLIENT_ID, \n",
" CLIENT_SECRET, \n",
" VERSION, \n",
" lat, \n",
" lng, \n",
" radius, \n",
" LIMIT)\n",
" \n",
" # make the GET request\n",
" results = requests.get(url).json()['response']['groups'][0]['items']\n",
" \n",
" # return only relevant information for each nearby venue\n",
" venues_list.append([(\n",
" name, \n",
" lat, \n",
" lng, \n",
" v['venue']['name'], \n",
" v['venue']['location']['lat'], \n",
" v['venue']['location']['lng'], \n",
" v['venue']['categories'][0]['name']) for v in results])\n",
"\n",
" nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])\n",
" nearby_venues.columns = ['Neighborhood', \n",
" 'Neighborhood Latitude', \n",
" 'Neighborhood Longitude', \n",
" 'Venue', \n",
" 'Venue Latitude', \n",
" 'Venue Longitude', \n",
" 'Venue Category']\n",
" \n",
" return(nearby_venues)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Postcode</th>\n",
" <th>Borough</th>\n",
" <th>Neighbourhood</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>M4E</td>\n",
" <td>East Toronto</td>\n",
" <td>The Beaches</td>\n",
" <td>43.676357</td>\n",
" <td>-79.293031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>M4K</td>\n",
" <td>East Toronto</td>\n",
" <td>The Danforth West, Riverdale</td>\n",
" <td>43.679557</td>\n",
" <td>-79.352188</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>M4L</td>\n",
" <td>East Toronto</td>\n",
" <td>The Beaches West, India Bazaar</td>\n",
" <td>43.668999</td>\n",
" <td>-79.315572</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>M4M</td>\n",
" <td>East Toronto</td>\n",
" <td>Studio District</td>\n",
" <td>43.659526</td>\n",
" <td>-79.340923</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>M4N</td>\n",
" <td>Central Toronto</td>\n",
" <td>Lawrence Park</td>\n",
" <td>43.728020</td>\n",
" <td>-79.388790</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>M4P</td>\n",
" <td>Central Toronto</td>\n",
" <td>Davisville North</td>\n",
" <td>43.712751</td>\n",
" <td>-79.390197</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>M4R</td>\n",
" <td>Central Toronto</td>\n",
" <td>North Toronto West</td>\n",
" <td>43.715383</td>\n",
" <td>-79.405678</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>M4S</td>\n",
" <td>Central Toronto</td>\n",
" <td>Davisville</td>\n",
" <td>43.704324</td>\n",
" <td>-79.388790</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>M4T</td>\n",
" <td>Central Toronto</td>\n",
" <td>Moore Park, Summerhill East</td>\n",
" <td>43.689574</td>\n",
" <td>-79.383160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>M4V</td>\n",
" <td>Central Toronto</td>\n",
" <td>Deer Park, Forest Hill SE, Rathnelly, South Hi...</td>\n",
" <td>43.686412</td>\n",
" <td>-79.400049</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>M4W</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Rosedale</td>\n",
" <td>43.679563</td>\n",
" <td>-79.377529</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>M4X</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Cabbagetown, St. James Town</td>\n",
" <td>43.667967</td>\n",
" <td>-79.367675</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>M4Y</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Church and Wellesley</td>\n",
" <td>43.665860</td>\n",
" <td>-79.383160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Harbourfront, Regent Park</td>\n",
" <td>43.654260</td>\n",
" <td>-79.360636</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>M5B</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Ryerson, Garden District</td>\n",
" <td>43.657162</td>\n",
" <td>-79.378937</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>M5C</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>St. James Town</td>\n",
" <td>43.651494</td>\n",
" <td>-79.375418</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>M5E</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Berczy Park</td>\n",
" <td>43.644771</td>\n",
" <td>-79.373306</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>M5G</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Central Bay Street</td>\n",
" <td>43.657952</td>\n",
" <td>-79.387383</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>M5H</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Adelaide, King, Richmond</td>\n",
" <td>43.650571</td>\n",
" <td>-79.384568</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>M5J</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Harbourfront East, Toronto Islands, Union Station</td>\n",
" <td>43.640816</td>\n",
" <td>-79.381752</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>M5K</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Design Exchange, Toronto Dominion Centre</td>\n",
" <td>43.647177</td>\n",
" <td>-79.381576</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>M5L</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Commerce Court, Victoria Hotel</td>\n",
" <td>43.648198</td>\n",
" <td>-79.379817</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63</th>\n",
" <td>M5N</td>\n",
" <td>Central Toronto</td>\n",
" <td>Roselawn</td>\n",
" <td>43.711695</td>\n",
" <td>-79.416936</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>M5P</td>\n",
" <td>Central Toronto</td>\n",
" <td>Forest Hill North, Forest Hill West</td>\n",
" <td>43.696948</td>\n",
" <td>-79.411307</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>M5R</td>\n",
" <td>Central Toronto</td>\n",
" <td>The Annex, North Midtown, Yorkville</td>\n",
" <td>43.672710</td>\n",
" <td>-79.405678</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>M5S</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Harbord, University of Toronto</td>\n",
" <td>43.662696</td>\n",
" <td>-79.400049</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>M5T</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Chinatown, Grange Park, Kensington Market</td>\n",
" <td>43.653206</td>\n",
" <td>-79.400049</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>M5V</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>CN Tower, Bathurst Quay, Island airport, Harbo...</td>\n",
" <td>43.628947</td>\n",
" <td>-79.394420</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69</th>\n",
" <td>M5W</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Stn A PO Boxes 25 The Esplanade</td>\n",
" <td>43.646435</td>\n",
" <td>-79.374846</td>\n",
" </tr>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>M5X</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>First Canadian Place, Underground city</td>\n",
" <td>43.648429</td>\n",
" <td>-79.382280</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>M6G</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Christie</td>\n",
" <td>43.669542</td>\n",
" <td>-79.422564</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>M6H</td>\n",
" <td>West Toronto</td>\n",
" <td>Dovercourt Village, Dufferin</td>\n",
" <td>43.669005</td>\n",
" <td>-79.442259</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>M6J</td>\n",
" <td>West Toronto</td>\n",
" <td>Little Portugal, Trinity</td>\n",
" <td>43.647927</td>\n",
" <td>-79.419750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>M6K</td>\n",
" <td>West Toronto</td>\n",
" <td>Brockton, Exhibition Place, Parkdale Village</td>\n",
" <td>43.636847</td>\n",
" <td>-79.428191</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>M6P</td>\n",
" <td>West Toronto</td>\n",
" <td>High Park, The Junction South</td>\n",
" <td>43.661608</td>\n",
" <td>-79.464763</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>M6R</td>\n",
" <td>West Toronto</td>\n",
" <td>Parkdale, Roncesvalles</td>\n",
" <td>43.648960</td>\n",
" <td>-79.456325</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>M6S</td>\n",
" <td>West Toronto</td>\n",
" <td>Runnymede, Swansea</td>\n",
" <td>43.651571</td>\n",
" <td>-79.484450</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>M7Y</td>\n",
" <td>East Toronto</td>\n",
" <td>Business Reply Mail Processing Centre 969 Eastern</td>\n",
" <td>43.662744</td>\n",
" <td>-79.321558</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Postcode Borough \\\n",
"37 M4E East Toronto \n",
"41 M4K East Toronto \n",
"42 M4L East Toronto \n",
"43 M4M East Toronto \n",
"44 M4N Central Toronto \n",
"45 M4P Central Toronto \n",
"46 M4R Central Toronto \n",
"47 M4S Central Toronto \n",
"48 M4T Central Toronto \n",
"49 M4V Central Toronto \n",
"50 M4W Downtown Toronto \n",
"51 M4X Downtown Toronto \n",
"52 M4Y Downtown Toronto \n",
"53 M5A Downtown Toronto \n",
"54 M5B Downtown Toronto \n",
"55 M5C Downtown Toronto \n",
"56 M5E Downtown Toronto \n",
"57 M5G Downtown Toronto \n",
"58 M5H Downtown Toronto \n",
"59 M5J Downtown Toronto \n",
"60 M5K Downtown Toronto \n",
"61 M5L Downtown Toronto \n",
"63 M5N Central Toronto \n",
"64 M5P Central Toronto \n",
"65 M5R Central Toronto \n",
"66 M5S Downtown Toronto \n",
"67 M5T Downtown Toronto \n",
"68 M5V Downtown Toronto \n",
"69 M5W Downtown Toronto \n",
"70 M5X Downtown Toronto \n",
"75 M6G Downtown Toronto \n",
"76 M6H West Toronto \n",
"77 M6J West Toronto \n",
"78 M6K West Toronto \n",
"82 M6P West Toronto \n",
"83 M6R West Toronto \n",
"84 M6S West Toronto \n",
"87 M7Y East Toronto \n",
"\n",
" Neighbourhood Latitude Longitude \n",
"37 The Beaches 43.676357 -79.293031 \n",
"41 The Danforth West, Riverdale 43.679557 -79.352188 \n",
"42 The Beaches West, India Bazaar 43.668999 -79.315572 \n",
"43 Studio District 43.659526 -79.340923 \n",
"44 Lawrence Park 43.728020 -79.388790 \n",
"45 Davisville North 43.712751 -79.390197 \n",
"46 North Toronto West 43.715383 -79.405678 \n",
"47 Davisville 43.704324 -79.388790 \n",
"48 Moore Park, Summerhill East 43.689574 -79.383160 \n",
"49 Deer Park, Forest Hill SE, Rathnelly, South Hi... 43.686412 -79.400049 \n",
"50 Rosedale 43.679563 -79.377529 \n",
"51 Cabbagetown, St. James Town 43.667967 -79.367675 \n",
"52 Church and Wellesley 43.665860 -79.383160 \n",
"53 Harbourfront, Regent Park 43.654260 -79.360636 \n",
"54 Ryerson, Garden District 43.657162 -79.378937 \n",
"55 St. James Town 43.651494 -79.375418 \n",
"56 Berczy Park 43.644771 -79.373306 \n",
"57 Central Bay Street 43.657952 -79.387383 \n",
"58 Adelaide, King, Richmond 43.650571 -79.384568 \n",
"59 Harbourfront East, Toronto Islands, Union Station 43.640816 -79.381752 \n",
"60 Design Exchange, Toronto Dominion Centre 43.647177 -79.381576 \n",
"61 Commerce Court, Victoria Hotel 43.648198 -79.379817 \n",
"63 Roselawn 43.711695 -79.416936 \n",
"64 Forest Hill North, Forest Hill West 43.696948 -79.411307 \n",
"65 The Annex, North Midtown, Yorkville 43.672710 -79.405678 \n",
"66 Harbord, University of Toronto 43.662696 -79.400049 \n",
"67 Chinatown, Grange Park, Kensington Market 43.653206 -79.400049 \n",
"68 CN Tower, Bathurst Quay, Island airport, Harbo... 43.628947 -79.394420 \n",
"69 Stn A PO Boxes 25 The Esplanade 43.646435 -79.374846 \n",
"70 First Canadian Place, Underground city 43.648429 -79.382280 \n",
"75 Christie 43.669542 -79.422564 \n",
"76 Dovercourt Village, Dufferin 43.669005 -79.442259 \n",
"77 Little Portugal, Trinity 43.647927 -79.419750 \n",
"78 Brockton, Exhibition Place, Parkdale Village 43.636847 -79.428191 \n",
"82 High Park, The Junction South 43.661608 -79.464763 \n",
"83 Parkdale, Roncesvalles 43.648960 -79.456325 \n",
"84 Runnymede, Swansea 43.651571 -79.484450 \n",
"87 Business Reply Mail Processing Centre 969 Eastern 43.662744 -79.321558 "
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Working with boroughs that contain the word \"Toronto\"\n",
"toronto_data = toronto_locations[toronto_locations['Borough'].str.contains(\"Toronto\")]\n",
"toronto_data"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The Beaches\n",
"The Danforth West, Riverdale\n",
"The Beaches West, India Bazaar\n",
"Studio District\n",
"Lawrence Park\n",
"Davisville North\n",
"North Toronto West\n",
"Davisville\n",
"Moore Park, Summerhill East\n",
"Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West\n",
"Rosedale\n",
"Cabbagetown, St. James Town\n",
"Church and Wellesley\n",
"Harbourfront, Regent Park\n",
"Ryerson, Garden District\n",
"St. James Town\n",
"Berczy Park\n",
"Central Bay Street\n",
"Adelaide, King, Richmond\n",
"Harbourfront East, Toronto Islands, Union Station\n",
"Design Exchange, Toronto Dominion Centre\n",
"Commerce Court, Victoria Hotel\n",
"Roselawn\n",
"Forest Hill North, Forest Hill West\n",
"The Annex, North Midtown, Yorkville\n",
"Harbord, University of Toronto\n",
"Chinatown, Grange Park, Kensington Market\n",
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara\n",
"Stn A PO Boxes 25 The Esplanade\n",
"First Canadian Place, Underground city\n",
"Christie\n",
"Dovercourt Village, Dufferin\n",
"Little Portugal, Trinity\n",
"Brockton, Exhibition Place, Parkdale Village\n",
"High Park, The Junction South\n",
"Parkdale, Roncesvalles\n",
"Runnymede, Swansea\n",
"Business Reply Mail Processing Centre 969 Eastern\n",
"(1707, 7)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Neighborhood</th>\n",
" <th>Neighborhood Latitude</th>\n",
" <th>Neighborhood Longitude</th>\n",
" <th>Venue</th>\n",
" <th>Venue Latitude</th>\n",
" <th>Venue Longitude</th>\n",
" <th>Venue Category</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>The Beaches</td>\n",
" <td>43.676357</td>\n",
" <td>-79.293031</td>\n",
" <td>Glen Manor Ravine</td>\n",
" <td>43.676821</td>\n",
" <td>-79.293942</td>\n",
" <td>Trail</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>The Beaches</td>\n",
" <td>43.676357</td>\n",
" <td>-79.293031</td>\n",
" <td>The Big Carrot Natural Food Market</td>\n",
" <td>43.678879</td>\n",
" <td>-79.297734</td>\n",
" <td>Health Food Store</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>The Beaches</td>\n",
" <td>43.676357</td>\n",
" <td>-79.293031</td>\n",
" <td>Grover Pub and Grub</td>\n",
" <td>43.679181</td>\n",
" <td>-79.297215</td>\n",
" <td>Pub</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>The Beaches</td>\n",
" <td>43.676357</td>\n",
" <td>-79.293031</td>\n",
" <td>Glen Stewart Ravine</td>\n",
" <td>43.676300</td>\n",
" <td>-79.294784</td>\n",
" <td>Other Great Outdoors</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>The Beaches</td>\n",
" <td>43.676357</td>\n",
" <td>-79.293031</td>\n",
" <td>Upper Beaches</td>\n",
" <td>43.680563</td>\n",
" <td>-79.292869</td>\n",
" <td>Neighborhood</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Neighborhood Neighborhood Latitude Neighborhood Longitude \\\n",
"0 The Beaches 43.676357 -79.293031 \n",
"1 The Beaches 43.676357 -79.293031 \n",
"2 The Beaches 43.676357 -79.293031 \n",
"3 The Beaches 43.676357 -79.293031 \n",
"4 The Beaches 43.676357 -79.293031 \n",
"\n",
" Venue Venue Latitude Venue Longitude \\\n",
"0 Glen Manor Ravine 43.676821 -79.293942 \n",
"1 The Big Carrot Natural Food Market 43.678879 -79.297734 \n",
"2 Grover Pub and Grub 43.679181 -79.297215 \n",
"3 Glen Stewart Ravine 43.676300 -79.294784 \n",
"4 Upper Beaches 43.680563 -79.292869 \n",
"\n",
" Venue Category \n",
"0 Trail \n",
"1 Health Food Store \n",
"2 Pub \n",
"3 Other Great Outdoors \n",
"4 Neighborhood "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# getNearbyVenues parameters\n",
"names=toronto_data['Neighbourhood']\n",
"latitudes=toronto_data['Latitude']\n",
"longitudes=toronto_data['Longitude']\n",
"\n",
"toronto_venues = getNearbyVenues(names,latitudes,longitudes)\n",
"\n",
"print(toronto_venues.shape)\n",
"toronto_venues.head()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are 238 uniques categories.\n"
]
}
],
"source": [
"# Counting unique categories\n",
"toronto_venues.groupby('Neighborhood').count()\n",
"\n",
"print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Yoga Studio</th>\n",
" <th>Afghan Restaurant</th>\n",
" <th>Airport</th>\n",
" <th>Airport Food Court</th>\n",
" <th>Airport Gate</th>\n",
" <th>Airport Lounge</th>\n",
" <th>Airport Service</th>\n",
" <th>Airport Terminal</th>\n",
" <th>American Restaurant</th>\n",
" <th>Antique Shop</th>\n",
" <th>...</th>\n",
" <th>Theme Restaurant</th>\n",
" <th>Thrift / Vintage Store</th>\n",
" <th>Toy / Game Store</th>\n",
" <th>Trail</th>\n",
" <th>Train Station</th>\n",
" <th>Vegetarian / Vegan Restaurant</th>\n",
" <th>Video Game Store</th>\n",
" <th>Vietnamese Restaurant</th>\n",
" <th>Wine Bar</th>\n",
" <th>Wings Joint</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 238 columns</p>\n",
"</div>"
],
"text/plain": [
" Yoga Studio Afghan Restaurant Airport Airport Food Court Airport Gate \\\n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"\n",
" Airport Lounge Airport Service Airport Terminal American Restaurant \\\n",
"0 0 0 0 0 \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"\n",
" Antique Shop ... Theme Restaurant Thrift / Vintage Store \\\n",
"0 0 ... 0 0 \n",
"1 0 ... 0 0 \n",
"2 0 ... 0 0 \n",
"3 0 ... 0 0 \n",
"4 0 ... 0 0 \n",
"\n",
" Toy / Game Store Trail Train Station Vegetarian / Vegan Restaurant \\\n",
"0 0 1 0 0 \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"\n",
" Video Game Store Vietnamese Restaurant Wine Bar Wings Joint \n",
"0 0 0 0 0 \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"\n",
"[5 rows x 238 columns]"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# one hot encoding\n",
"toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix=\"\", prefix_sep=\"\")\n",
"\n",
"# add neighborhood column back to dataframe\n",
"toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] \n",
"\n",
"# move neighborhood column to the first column\n",
"fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])\n",
"toronto_onehot = toronto_onehot[fixed_columns]\n",
"\n",
"toronto_onehot.head()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"----Adelaide, King, Richmond----\n",
" venue freq\n",
"0 Coffee Shop 0.07\n",
"1 Café 0.05\n",
"2 Steakhouse 0.04\n",
"3 Bar 0.04\n",
"4 Thai Restaurant 0.04\n",
"\n",
"\n",
"----Berczy Park----\n",
" venue freq\n",
"0 Coffee Shop 0.09\n",
"1 Cocktail Bar 0.05\n",
"2 Seafood Restaurant 0.03\n",
"3 Steakhouse 0.03\n",
"4 Beer Bar 0.03\n",
"\n",
"\n",
"----Brockton, Exhibition Place, Parkdale Village----\n",
" venue freq\n",
"0 Breakfast Spot 0.10\n",
"1 Café 0.10\n",
"2 Coffee Shop 0.10\n",
"3 Intersection 0.05\n",
"4 Burrito Place 0.05\n",
"\n",
"\n",
"----Business Reply Mail Processing Centre 969 Eastern----\n",
" venue freq\n",
"0 Light Rail Station 0.11\n",
"1 Park 0.06\n",
"2 Recording Studio 0.06\n",
"3 Skate Park 0.06\n",
"4 Fast Food Restaurant 0.06\n",
"\n",
"\n",
"----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----\n",
" venue freq\n",
"0 Airport Service 0.18\n",
"1 Airport Lounge 0.12\n",
"2 Airport Terminal 0.12\n",
"3 Boutique 0.06\n",
"4 Harbor / Marina 0.06\n",
"\n",
"\n",
"----Cabbagetown, St. James Town----\n",
" venue freq\n",
"0 Coffee Shop 0.07\n",
"1 Restaurant 0.05\n",
"2 Pizza Place 0.05\n",
"3 Park 0.05\n",
"4 Bakery 0.05\n",
"\n",
"\n",
"----Central Bay Street----\n",
" venue freq\n",
"0 Coffee Shop 0.15\n",
"1 Café 0.05\n",
"2 Italian Restaurant 0.05\n",
"3 Ice Cream Shop 0.04\n",
"4 Sandwich Place 0.04\n",
"\n",
"\n",
"----Chinatown, Grange Park, Kensington Market----\n",
" venue freq\n",
"0 Café 0.07\n",
"1 Vegetarian / Vegan Restaurant 0.06\n",
"2 Bar 0.05\n",
"3 Vietnamese Restaurant 0.05\n",
"4 Chinese Restaurant 0.04\n",
"\n",
"\n",
"----Christie----\n",
" venue freq\n",
"0 Grocery Store 0.19\n",
"1 Café 0.19\n",
"2 Park 0.12\n",
"3 Nightclub 0.06\n",
"4 Restaurant 0.06\n",
"\n",
"\n",
"----Church and Wellesley----\n",
" venue freq\n",
"0 Coffee Shop 0.08\n",
"1 Japanese Restaurant 0.06\n",
"2 Gay Bar 0.05\n",
"3 Sushi Restaurant 0.05\n",
"4 Restaurant 0.03\n",
"\n",
"\n",
"----Commerce Court, Victoria Hotel----\n",
" venue freq\n",
"0 Coffee Shop 0.10\n",
"1 Hotel 0.06\n",
"2 Café 0.06\n",
"3 Restaurant 0.04\n",
"4 American Restaurant 0.04\n",
"\n",
"\n",
"----Davisville----\n",
" venue freq\n",
"0 Pizza Place 0.09\n",
"1 Dessert Shop 0.09\n",
"2 Sandwich Place 0.09\n",
"3 Italian Restaurant 0.06\n",
"4 Coffee Shop 0.06\n",
"\n",
"\n",
"----Davisville North----\n",
" venue freq\n",
"0 Food & Drink Shop 0.12\n",
"1 Gym 0.12\n",
"2 Sandwich Place 0.12\n",
"3 Clothing Store 0.12\n",
"4 Hotel 0.12\n",
"\n",
"\n",
"----Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West----\n",
" venue freq\n",
"0 Pub 0.13\n",
"1 Coffee Shop 0.13\n",
"2 Liquor Store 0.07\n",
"3 Light Rail Station 0.07\n",
"4 Bagel Shop 0.07\n",
"\n",
"\n",
"----Design Exchange, Toronto Dominion Centre----\n",
" venue freq\n",
"0 Coffee Shop 0.15\n",
"1 Café 0.07\n",
"2 Hotel 0.06\n",
"3 Restaurant 0.04\n",
"4 Italian Restaurant 0.04\n",
"\n",
"\n",
"----Dovercourt Village, Dufferin----\n",
" venue freq\n",
"0 Bakery 0.10\n",
"1 Pharmacy 0.10\n",
"2 Supermarket 0.10\n",
"3 Portuguese Restaurant 0.05\n",
"4 Park 0.05\n",
"\n",
"\n",
"----First Canadian Place, Underground city----\n",
" venue freq\n",
"0 Coffee Shop 0.09\n",
"1 Café 0.07\n",
"2 Hotel 0.04\n",
"3 Steakhouse 0.04\n",
"4 Asian Restaurant 0.03\n",
"\n",
"\n",
"----Forest Hill North, Forest Hill West----\n",
" venue freq\n",
"0 Sushi Restaurant 0.2\n",
"1 Jewelry Store 0.2\n",
"2 Home Service 0.2\n",
"3 Park 0.2\n",
"4 Trail 0.2\n",
"\n",
"\n",
"----Harbord, University of Toronto----\n",
" venue freq\n",
"0 Café 0.12\n",
"1 Bookstore 0.09\n",
"2 Bakery 0.06\n",
"3 Bar 0.06\n",
"4 Japanese Restaurant 0.06\n",
"\n",
"\n",
"----Harbourfront East, Toronto Islands, Union Station----\n",
" venue freq\n",
"0 Coffee Shop 0.11\n",
"1 Hotel 0.05\n",
"2 Aquarium 0.05\n",
"3 Italian Restaurant 0.04\n",
"4 Café 0.04\n",
"\n",
"\n",
"----Harbourfront, Regent Park----\n",
" venue freq\n",
"0 Coffee Shop 0.18\n",
"1 Bakery 0.06\n",
"2 Park 0.06\n",
"3 Theater 0.04\n",
"4 Breakfast Spot 0.04\n",
"\n",
"\n",
"----High Park, The Junction South----\n",
" venue freq\n",
"0 Café 0.09\n",
"1 Bar 0.09\n",
"2 Mexican Restaurant 0.09\n",
"3 Speakeasy 0.04\n",
"4 Arts & Crafts Store 0.04\n",
"\n",
"\n",
"----Lawrence Park----\n",
" venue freq\n",
"0 Park 0.33\n",
"1 Swim School 0.33\n",
"2 Bus Line 0.33\n",
"3 Yoga Studio 0.00\n",
"4 Museum 0.00\n",
"\n",
"\n",
"----Little Portugal, Trinity----\n",
" venue freq\n",
"0 Bar 0.14\n",
"1 Coffee Shop 0.06\n",
"2 Asian Restaurant 0.05\n",
"3 Boutique 0.03\n",
"4 Restaurant 0.03\n",
"\n",
"\n",
"----Moore Park, Summerhill East----\n",
" venue freq\n",
"0 Playground 0.33\n",
"1 Trail 0.33\n",
"2 Restaurant 0.33\n",
"3 Yoga Studio 0.00\n",
"4 Music Store 0.00\n",
"\n",
"\n",
"----North Toronto West----\n",
" venue freq\n",
"0 Coffee Shop 0.13\n",
"1 Yoga Studio 0.07\n",
"2 Chinese Restaurant 0.07\n",
"3 Sporting Goods Shop 0.07\n",
"4 Spa 0.07\n",
"\n",
"\n",
"----Parkdale, Roncesvalles----\n",
" venue freq\n",
"0 Breakfast Spot 0.13\n",
"1 Gift Shop 0.13\n",
"2 Bookstore 0.07\n",
"3 Italian Restaurant 0.07\n",
"4 Movie Theater 0.07\n",
"\n",
"\n",
"----Rosedale----\n",
" venue freq\n",
"0 Park 0.4\n",
"1 Playground 0.2\n",
"2 Trail 0.2\n",
"3 Building 0.2\n",
"4 Music Store 0.0\n",
"\n",
"\n",
"----Roselawn----\n",
" venue freq\n",
"0 Garden 0.5\n",
"1 Pool 0.5\n",
"2 Music Venue 0.0\n",
"3 Martial Arts Dojo 0.0\n",
"4 Mediterranean Restaurant 0.0\n",
"\n",
"\n",
"----Runnymede, Swansea----\n",
" venue freq\n",
"0 Café 0.08\n",
"1 Coffee Shop 0.08\n",
"2 Pizza Place 0.08\n",
"3 Sushi Restaurant 0.05\n",
"4 Italian Restaurant 0.05\n",
"\n",
"\n",
"----Ryerson, Garden District----\n",
" venue freq\n",
"0 Coffee Shop 0.09\n",
"1 Clothing Store 0.07\n",
"2 Cosmetics Shop 0.04\n",
"3 Café 0.03\n",
"4 Fast Food Restaurant 0.03\n",
"\n",
"\n",
"----St. James Town----\n",
" venue freq\n",
"0 Coffee Shop 0.08\n",
"1 Café 0.05\n",
"2 Hotel 0.05\n",
"3 Restaurant 0.05\n",
"4 Italian Restaurant 0.04\n",
"\n",
"\n",
"----Stn A PO Boxes 25 The Esplanade----\n",
" venue freq\n",
"0 Coffee Shop 0.10\n",
"1 Restaurant 0.04\n",
"2 Café 0.04\n",
"3 Hotel 0.03\n",
"4 Beer Bar 0.03\n",
"\n",
"\n",
"----Studio District----\n",
" venue freq\n",
"0 Café 0.10\n",
"1 Coffee Shop 0.07\n",
"2 Italian Restaurant 0.05\n",
"3 Gastropub 0.05\n",
"4 American Restaurant 0.05\n",
"\n",
"\n",
"----The Annex, North Midtown, Yorkville----\n",
" venue freq\n",
"0 Coffee Shop 0.12\n",
"1 Sandwich Place 0.12\n",
"2 Café 0.12\n",
"3 Pizza Place 0.08\n",
"4 BBQ Joint 0.04\n",
"\n",
"\n",
"----The Beaches----\n",
" venue freq\n",
"0 Health Food Store 0.2\n",
"1 Other Great Outdoors 0.2\n",
"2 Trail 0.2\n",
"3 Pub 0.2\n",
"4 Music Store 0.0\n",
"\n",
"\n",
"----The Beaches West, India Bazaar----\n",
" venue freq\n",
"0 Pizza Place 0.05\n",
"1 Italian Restaurant 0.05\n",
"2 Fish & Chips Shop 0.05\n",
"3 Burger Joint 0.05\n",
"4 Burrito Place 0.05\n",
"\n",
"\n",
"----The Danforth West, Riverdale----\n",
" venue freq\n",
"0 Greek Restaurant 0.22\n",
"1 Coffee Shop 0.10\n",
"2 Italian Restaurant 0.07\n",
"3 Furniture / Home Store 0.05\n",
"4 Ice Cream Shop 0.05\n",
"\n",
"\n"
]
}
],
"source": [
"# Top 5 venues per Neighborhood\n",
"toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()\n",
"\n",
"num_top_venues = 5\n",
"\n",
"for hood in toronto_grouped['Neighborhood']:\n",
" print(\"----\"+hood+\"----\")\n",
" temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()\n",
" temp.columns = ['venue','freq']\n",
" temp = temp.iloc[1:]\n",
" temp['freq'] = temp['freq'].astype(float)\n",
" temp = temp.round({'freq': 2})\n",
" print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))\n",
" print('\\n')"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Neighborhood</th>\n",
" <th>1st Most Common Venue</th>\n",
" <th>2nd Most Common Venue</th>\n",
" <th>3rd Most Common Venue</th>\n",
" <th>4th Most Common Venue</th>\n",
" <th>5th Most Common Venue</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Adelaide, King, Richmond</td>\n",
" <td>Coffee Shop</td>\n",
" <td>Café</td>\n",
" <td>American Restaurant</td>\n",
" <td>Bar</td>\n",
" <td>Steakhouse</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Berczy Park</td>\n",
" <td>Coffee Shop</td>\n",
" <td>Cocktail Bar</td>\n",
" <td>Seafood Restaurant</td>\n",
" <td>Beer Bar</td>\n",
" <td>Bakery</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Brockton, Exhibition Place, Parkdale Village</td>\n",
" <td>Coffee Shop</td>\n",
" <td>Café</td>\n",
" <td>Breakfast Spot</td>\n",
" <td>Yoga Studio</td>\n",
" <td>Bar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Business Reply Mail Processing Centre 969 Eastern</td>\n",
" <td>Light Rail Station</td>\n",
" <td>Auto Workshop</td>\n",
" <td>Smoke Shop</td>\n",
" <td>Brewery</td>\n",
" <td>Spa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CN Tower, Bathurst Quay, Island airport, Harbo...</td>\n",
" <td>Airport Service</td>\n",
" <td>Airport Terminal</td>\n",
" <td>Airport Lounge</td>\n",
" <td>Harbor / Marina</td>\n",
" <td>Plane</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Neighborhood 1st Most Common Venue \\\n",
"0 Adelaide, King, Richmond Coffee Shop \n",
"1 Berczy Park Coffee Shop \n",
"2 Brockton, Exhibition Place, Parkdale Village Coffee Shop \n",
"3 Business Reply Mail Processing Centre 969 Eastern Light Rail Station \n",
"4 CN Tower, Bathurst Quay, Island airport, Harbo... Airport Service \n",
"\n",
" 2nd Most Common Venue 3rd Most Common Venue 4th Most Common Venue \\\n",
"0 Café American Restaurant Bar \n",
"1 Cocktail Bar Seafood Restaurant Beer Bar \n",
"2 Café Breakfast Spot Yoga Studio \n",
"3 Auto Workshop Smoke Shop Brewery \n",
"4 Airport Terminal Airport Lounge Harbor / Marina \n",
"\n",
" 5th Most Common Venue \n",
"0 Steakhouse \n",
"1 Bakery \n",
"2 Bar \n",
"3 Spa \n",
"4 Plane "
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Most common venues per Neighborhood\n",
"def return_most_common_venues(row, num_top_venues):\n",
" row_categories = row.iloc[1:]\n",
" row_categories_sorted = row_categories.sort_values(ascending=False)\n",
" \n",
" return row_categories_sorted.index.values[0:num_top_venues]\n",
"\n",
"num_top_venues = 5\n",
"\n",
"indicators = ['st', 'nd', 'rd']\n",
"\n",
"# create columns according to number of top venues\n",
"columns = ['Neighborhood']\n",
"for ind in np.arange(num_top_venues):\n",
" try:\n",
" columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))\n",
" except:\n",
" columns.append('{}th Most Common Venue'.format(ind+1))\n",
"\n",
"# create a new dataframe\n",
"neighborhoods_venues_sorted = pd.DataFrame(columns=columns)\n",
"neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']\n",
"\n",
"for ind in np.arange(toronto_grouped.shape[0]):\n",
" neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)\n",
"\n",
"neighborhoods_venues_sorted.head()"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2,\n",
" 4, 2, 3, 2, 2, 1, 0, 2, 2, 2, 2, 2, 2, 1, 2, 2], dtype=int32)"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# set number of clusters\n",
"kclusters = 5\n",
"\n",
"toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)\n",
"\n",
"# run k-means clustering\n",
"kmeans = KMeans(n_clusters=kclusters, random_state=1).fit(toronto_grouped_clustering)\n",
"\n",
"# check cluster labels generated for each row in the dataframe\n",
"kmeans.labels_"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Postcode</th>\n",
" <th>Borough</th>\n",
" <th>Neighbourhood</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" <th>Cluster Labels</th>\n",
" <th>1st Most Common Venue</th>\n",
" <th>2nd Most Common Venue</th>\n",
" <th>3rd Most Common Venue</th>\n",
" <th>4th Most Common Venue</th>\n",
" <th>5th Most Common Venue</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>M4E</td>\n",
" <td>East Toronto</td>\n",
" <td>The Beaches</td>\n",
" <td>43.676357</td>\n",
" <td>-79.293031</td>\n",
" <td>0</td>\n",
" <td>Health Food Store</td>\n",
" <td>Other Great Outdoors</td>\n",
" <td>Trail</td>\n",
" <td>Pub</td>\n",
" <td>Wings Joint</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>M4K</td>\n",
" <td>East Toronto</td>\n",
" <td>The Danforth West, Riverdale</td>\n",
" <td>43.679557</td>\n",
" <td>-79.352188</td>\n",
" <td>0</td>\n",
" <td>Greek Restaurant</td>\n",
" <td>Coffee Shop</td>\n",
" <td>Italian Restaurant</td>\n",
" <td>Ice Cream Shop</td>\n",
" <td>Furniture / Home Store</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>M4L</td>\n",
" <td>East Toronto</td>\n",
" <td>The Beaches West, India Bazaar</td>\n",
" <td>43.668999</td>\n",
" <td>-79.315572</td>\n",
" <td>0</td>\n",
" <td>Sushi Restaurant</td>\n",
" <td>Movie Theater</td>\n",
" <td>Brewery</td>\n",
" <td>Sandwich Place</td>\n",
" <td>Board Shop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>M4M</td>\n",
" <td>East Toronto</td>\n",
" <td>Studio District</td>\n",
" <td>43.659526</td>\n",
" <td>-79.340923</td>\n",
" <td>0</td>\n",
" <td>Café</td>\n",
" <td>Coffee Shop</td>\n",
" <td>Bakery</td>\n",
" <td>Gastropub</td>\n",
" <td>Italian Restaurant</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>M4N</td>\n",
" <td>Central Toronto</td>\n",
" <td>Lawrence Park</td>\n",
" <td>43.728020</td>\n",
" <td>-79.388790</td>\n",
" <td>1</td>\n",
" <td>Bus Line</td>\n",
" <td>Park</td>\n",
" <td>Swim School</td>\n",
" <td>Wings Joint</td>\n",
" <td>Ethiopian Restaurant</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Postcode Borough Neighbourhood Latitude \\\n",
"37 M4E East Toronto The Beaches 43.676357 \n",
"41 M4K East Toronto The Danforth West, Riverdale 43.679557 \n",
"42 M4L East Toronto The Beaches West, India Bazaar 43.668999 \n",
"43 M4M East Toronto Studio District 43.659526 \n",
"44 M4N Central Toronto Lawrence Park 43.728020 \n",
"\n",
" Longitude Cluster Labels 1st Most Common Venue 2nd Most Common Venue \\\n",
"37 -79.293031 0 Health Food Store Other Great Outdoors \n",
"41 -79.352188 0 Greek Restaurant Coffee Shop \n",
"42 -79.315572 0 Sushi Restaurant Movie Theater \n",
"43 -79.340923 0 Café Coffee Shop \n",
"44 -79.388790 1 Bus Line Park \n",
"\n",
" 3rd Most Common Venue 4th Most Common Venue 5th Most Common Venue \n",
"37 Trail Pub Wings Joint \n",
"41 Italian Restaurant Ice Cream Shop Furniture / Home Store \n",
"42 Brewery Sandwich Place Board Shop \n",
"43 Bakery Gastropub Italian Restaurant \n",
"44 Swim School Wings Joint Ethiopian Restaurant "
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# add clustering labels\n",
"neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)\n",
"\n",
"toronto_merged = toronto_data\n",
"\n",
"# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood\n",
"toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')\n",
"\n",
"toronto_merged.head()"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The geograpical coordinates of Toronto are 43.653963, -79.387207.\n"
]
}
],
"source": [
"# Defining location\n",
"address = 'Toronto, Ontario'\n",
"\n",
"geolocator = Nominatim(user_agent=\"can_explorer\")\n",
"location = geolocator.geocode(address)\n",
"latitude = location.latitude\n",
"longitude = location.longitude\n",
"print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"width:100%;\"><div style=\"position:relative;width:100%;height:0;padding-bottom:60%;\"><iframe src=\"data:text/html;charset=utf-8;base64,\" style=\"position:absolute;width:100%;height:100%;left:0;top:0;border:none !important;\" allowfullscreen webkitallowfullscreen mozallowfullscreen></iframe></div></div>"
],
"text/plain": [
"<folium.folium.Map at 0x7fc2b4aea860>"
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create map\n",
"map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)\n",
"\n",
"# set color scheme for the clusters\n",
"x = np.arange(kclusters)\n",
"ys = [i + x + (i*x)**2 for i in range(kclusters)]\n",
"colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))\n",
"rainbow = [colors.rgb2hex(i) for i in colors_array]\n",
"\n",
"# add markers to the map\n",
"markers_colors = []\n",
"for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):\n",
" label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)\n",
" folium.CircleMarker(\n",
" [lat, lon],\n",
" radius=5,\n",
" popup=label,\n",
" color=rainbow[cluster-1],\n",
" fill=True,\n",
" fill_color=rainbow[cluster-1],\n",
" fill_opacity=0.7).add_to(map_clusters)\n",
" \n",
"map_clusters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment