Skip to content

Instantly share code, notes, and snippets.

# There are four pages for the list, so we stored the web links for these four pages into `domain`
headers = ({'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'})
domain = []
domain.append("https://en.wikipedia.org/wiki/Category:Suburbs_of_Sydney")
domain.append("https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Sydney&pagefrom=Dharruk%2C+New+South+Wales#mw-pages")
domain.append("https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Sydney&pagefrom=Macgraths+Hill%0AMcGraths+Hill%2C+New+South+Wales#mw-pages")
domain.append("https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Sydney&pagefrom=Singletons+Mill%2C+New+South+Wales#mw-pages")
# Create a empty list to store content
suburb_list =[]
for i in range(len(domain)):
@Perishleaf
Perishleaf / GET_BS4_compiling.py
Created December 15, 2019 21:27
Define a function for compiling GET request and BeatifulSoup info into array
# Define a function for compiling info into array
def getDemography(suburb_names, postcode_list, state='NSW'):
Demography_list=[]
for suburb, postcode in zip(suburb_names, postcode_list):
print(suburb)
suburb = suburb.replace(' ', '-')
# create the API request URL
headers = ({'User-Agent':
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'})
url = 'https://www.domain.com.au/suburb-profile/{}-nsw-{}'.format(
@Perishleaf
Perishleaf / choropleth_map.py
Created December 15, 2019 21:29
Create choropleth map
# Sydney latitude and longitude values
latitude = -33.892319
longitude = 151.146167
# create a plain Sydney map
population_map = folium.Map(location=[latitude, longitude], zoom_start=11, tiles='cartodbpositron')
# create a list for population scale
bins = list(range(0, 60000, 10000))
# generate choropleth map using the total population of each suburb in Sydney
population_map.choropleth(
geo_data=NSW_data,
@Perishleaf
Perishleaf / foursquare_venues.py
Created December 15, 2019 21:30
Get nearby venues using Foursquare API
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
venues_list=[]
for name, lat, lng in zip(names, latitudes, longitudes):
print(name)
# create the API request URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
VERSION,
lat,
@Perishleaf
Perishleaf / k_mean_cluster.py
Created December 15, 2019 21:37
Classification using K-mean cluster
# set number of clusters
kclusters = 4
sydney_grouped_clustering = sydney_grouped.drop('Suburb', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(sydney_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]
sydney_merged.loc[sydney_merged['Cluster Labels'] == 2, sydney_merged.columns[[0] + list(range(4, sydney_merged.shape[1]))]]
@Perishleaf
Perishleaf / fill_0_with_predict.py
Created December 15, 2019 21:43
Value imputation
new_data = []
for x, y, i, k in zip(data_for_pred.House_buy, data_for_pred.House_rent, data_for_pred.Suburb, data_for_pred.Postcode):
if y != 0:
x_pred = (y - regr.intercept_[0])/regr.coef_[0][0]
new_data.append([(i, k, y, x_pred)])
else:
y_pred = regr.coef_[0][0]*x + regr.intercept_[0]
new_data.append([(i, k, y_pred, x)])
@Perishleaf
Perishleaf / top_20_suburbs.py
Created December 15, 2019 21:45
List the top 20 suburbs with the most number of venues
sydney_venues_num = sydney_venues.groupby('Suburb').count()
sydney_venues_num = sydney_venues_num.drop(columns=['Suburb Latitude', 'Suburb Longitude', 'Venue Latitude', 'Venue Longitude', 'Venue Category'])
sydney_venues_num = sydney_venues_num.sort_values(['Venue'], ascending=False).reset_index(drop=False)
sydney_venues_num['Suburb'] = sydney_venues_num['Suburb'].str.title()
print(sydney_venues_num.shape[0])
sydney_venues_num.head(20)
# Create bar plot for trace2
trace2 = []
for q in Types:
trace2.append(go.Bar(
x=median_price_new.sort_values([q], ascending=False).head(10)[q],
y=median_price_new.sort_values([q], ascending=False).head(10)['Suburb_name_geojson'].str.title().tolist(),
xaxis='x2',
yaxis='y2',
marker=dict(
color='rgba(91, 207, 135, 0.3)',
# add a dropdown menu in the layout
layout.update(updatemenus=list([
dict(x=0,
y=1,
xanchor='left',
yanchor='middle',
buttons=list([
dict(
args=['visible', [True, False, False, False]],
label='Property type: Unit buy/M',
# Suburbs order should be the same as "id" passed to location
for q in Types:
trace1.append(go.Choroplethmapbox(
geojson = Sydney_data,
locations = median_price_new['id'].tolist(),
z = median_price_new[q].tolist(),
colorscale = pl_deep,
text = suburbs,
colorbar = dict(thickness=20, ticklen=3),
marker_line_width=0, marker_opacity=0.7,