Skip to content

Instantly share code, notes, and snippets.

View ShayanRiyaz's full-sized avatar
🎯
Focusing

Shayan Riyaz ShayanRiyaz

🎯
Focusing
View GitHub Profile
url = requests.get('https://en.wikipedia.org/wiki/List_of_districts_and_neighbourhoods_of_Los_Angeles').text
soup = BeautifulSoup(url,"html.parser")
lis = []
for li in soup.findAll('li'):
if li.find(href="/wiki/Portal:Los_Angeles"):
break
if li.find(href=re.compile("^/wiki/")):
lis.append(li)
if li.text=='Pico Robertson[34]': #Pico Robertson is the only item on the list that does not have a hyperlink reference
df['Neighbourhood'] = df.Neighbourhood.str.partition('[')[0] #Removes the citation and reference brackets
df['Neighbourhood'] = df.Neighbourhood.str.partition(',')[0] #Removes the alternatives for 'Bel Air'
df=df[df.Neighbourhood!='Baldwin Hills/Crenshaw'] #Removes redundancy as 'Baldwin Hills' and 'Crenshaw' exist already
df=df[df.Neighbourhood!='Hollywood Hills West'] #Removes redundancy as it has the same coordinates as 'Hollywood Hills'
df=df[df.Neighbourhood!='Brentwood Circle'] #Removes redundancy as it has the same coordinates as 'Brentwood'
df=df[df.Neighbourhood!='Wilshire Park'] #Removes redundancy as it has the same coordinates as 'Wilshire Center'
df.reset_index(inplace=True,drop=True)
# define the data frame columns
column_names = ['Neighbourhood', 'Latitude', 'Longitude']
# instantiate the data frame
nhoods = pd.DataFrame(columns=column_names)
geolocator = Nominatim(user_agent="la_explorer",timeout=5)
for i in range(0,len(df)):
address = df.Neighbourhood[i]+', Los Angeles'
address = 'Los Angeles, USA'
geolocator = Nominatim(user_agent="la_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of {} are {}, {}.'.format(address,latitude, longitude))
# create map of LA using latitude and longitude values
map_la = folium.Map(location=[latitude, longitude], zoom_start=10)
CLIENT_ID = '' # Foursquare ID
CLIENT_SECRET = '' # Foursquare Secret
VERSION = '' # Foursquare API version
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET: ' + CLIENT_SECRET)
neighbourhood_latitude = nhoods.loc[0, 'Latitude'] # neighbourhood latitude value
neighbourhood_longitude = nhoods.loc[0, 'Longitude'] # neighbourhood longitude value
neighbourhood_name = nhoods.loc[0, 'Neighbourhood'] # neighbourhood name
print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name,
neighbourhood_latitude,
neighbourhood_longitude))
LIMIT = 100 # limit of number of venues returned by Foursquare API
def getNearbyVenues(names, latitudes, longitudes, radius=500):
venues_list=[]
for name, lat, lng in zip(names, latitudes, longitudes):
# create the API request URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
# one hot encoding
la_onehot = pd.get_dummies(la_venues[['Venue Category']], prefix="", prefix_sep="")
# add neighbourhood column back to data frame
la_onehot['Neighbourhood'] = la_venues['Neighbourhood']
# move neighbourhood column to the first column
fixed_columns = [la_onehot.columns[-1]] + list(la_onehot.columns[:-1])
la_onehot = la_onehot[fixed_columns]
def return_most_common_venues(row, num_top_venues):
row_categories = row.iloc[1:]
row_categories_sorted = row_categories.sort_values(ascending=False)
return row_categories_sorted.index.values[0:num_top_venues]
num_top_venues = 12
indicators = ['st', 'nd', 'rd']
from sklearn.metrics import silhouette_score
la_grouped_clustering = la_grouped.drop('Neighbourhood', 1)
for n_cluster in range(2, 12):
kmeans = KMeans(n_clusters=n_cluster).fit(la_grouped_clustering)
label = kmeans.labels_
sil_coeff = silhouette_score(la_grouped_clustering, label, metric='euclidean')
print("For n_clusters={}, The Silhouette Coefficient is {}".format(n_cluster, sil_coeff))