This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
url = requests.get('https://en.wikipedia.org/wiki/List_of_districts_and_neighbourhoods_of_Los_Angeles').text | |
soup = BeautifulSoup(url,"html.parser") | |
lis = [] | |
for li in soup.findAll('li'): | |
if li.find(href="/wiki/Portal:Los_Angeles"): | |
break | |
if li.find(href=re.compile("^/wiki/")): | |
lis.append(li) | |
if li.text=='Pico Robertson[34]': #Pico Robertson is the only item on the list that does not have a hyperlink reference |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df['Neighbourhood'] = df.Neighbourhood.str.partition('[')[0] #Removes the citation and reference brackets | |
df['Neighbourhood'] = df.Neighbourhood.str.partition(',')[0] #Removes the alternatives for 'Bel Air' | |
df=df[df.Neighbourhood!='Baldwin Hills/Crenshaw'] #Removes redundancy as 'Baldwin Hills' and 'Crenshaw' exist already | |
df=df[df.Neighbourhood!='Hollywood Hills West'] #Removes redundancy as it has the same coordinates as 'Hollywood Hills' | |
df=df[df.Neighbourhood!='Brentwood Circle'] #Removes redundancy as it has the same coordinates as 'Brentwood' | |
df=df[df.Neighbourhood!='Wilshire Park'] #Removes redundancy as it has the same coordinates as 'Wilshire Center' | |
df.reset_index(inplace=True,drop=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# define the data frame columns | |
column_names = ['Neighbourhood', 'Latitude', 'Longitude'] | |
# instantiate the data frame | |
nhoods = pd.DataFrame(columns=column_names) | |
geolocator = Nominatim(user_agent="la_explorer",timeout=5) | |
for i in range(0,len(df)): | |
address = df.Neighbourhood[i]+', Los Angeles' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
address = 'Los Angeles, USA' | |
geolocator = Nominatim(user_agent="la_explorer") | |
location = geolocator.geocode(address) | |
latitude = location.latitude | |
longitude = location.longitude | |
print('The geograpical coordinates of {} are {}, {}.'.format(address,latitude, longitude)) | |
# create map of LA using latitude and longitude values | |
map_la = folium.Map(location=[latitude, longitude], zoom_start=10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CLIENT_ID = '' # Foursquare ID | |
CLIENT_SECRET = '' # Foursquare Secret | |
VERSION = '' # Foursquare API version | |
print('Your credentails:') | |
print('CLIENT_ID: ' + CLIENT_ID) | |
print('CLIENT_SECRET: ' + CLIENT_SECRET) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
neighbourhood_latitude = nhoods.loc[0, 'Latitude'] # neighbourhood latitude value | |
neighbourhood_longitude = nhoods.loc[0, 'Longitude'] # neighbourhood longitude value | |
neighbourhood_name = nhoods.loc[0, 'Neighbourhood'] # neighbourhood name | |
print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, | |
neighbourhood_latitude, | |
neighbourhood_longitude)) | |
LIMIT = 100 # limit of number of venues returned by Foursquare API |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def getNearbyVenues(names, latitudes, longitudes, radius=500): | |
venues_list=[] | |
for name, lat, lng in zip(names, latitudes, longitudes): | |
# create the API request URL | |
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format( | |
CLIENT_ID, | |
CLIENT_SECRET, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# one hot encoding | |
la_onehot = pd.get_dummies(la_venues[['Venue Category']], prefix="", prefix_sep="") | |
# add neighbourhood column back to data frame | |
la_onehot['Neighbourhood'] = la_venues['Neighbourhood'] | |
# move neighbourhood column to the first column | |
fixed_columns = [la_onehot.columns[-1]] + list(la_onehot.columns[:-1]) | |
la_onehot = la_onehot[fixed_columns] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def return_most_common_venues(row, num_top_venues): | |
row_categories = row.iloc[1:] | |
row_categories_sorted = row_categories.sort_values(ascending=False) | |
return row_categories_sorted.index.values[0:num_top_venues] | |
num_top_venues = 12 | |
indicators = ['st', 'nd', 'rd'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import silhouette_score | |
la_grouped_clustering = la_grouped.drop('Neighbourhood', 1) | |
for n_cluster in range(2, 12): | |
kmeans = KMeans(n_clusters=n_cluster).fit(la_grouped_clustering) | |
label = kmeans.labels_ | |
sil_coeff = silhouette_score(la_grouped_clustering, label, metric='euclidean') | |
print("For n_clusters={}, The Silhouette Coefficient is {}".format(n_cluster, sil_coeff)) | |
OlderNewer