Skip to content

Instantly share code, notes, and snippets.

@Perishleaf
Created December 15, 2019 21:27
Show Gist options
  • Save Perishleaf/e8427b1217ff1bfbcc43fc95847c7a0e to your computer and use it in GitHub Desktop.
Save Perishleaf/e8427b1217ff1bfbcc43fc95847c7a0e to your computer and use it in GitHub Desktop.
Define a function for compiling GET request and BeatifulSoup info into array
# Define a function for compiling info into array
def getDemography(suburb_names, postcode_list, state='NSW'):
Demography_list=[]
for suburb, postcode in zip(suburb_names, postcode_list):
print(suburb)
suburb = suburb.replace(' ', '-')
# create the API request URL
headers = ({'User-Agent':
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'})
url = 'https://www.domain.com.au/suburb-profile/{}-nsw-{}'.format(
suburb,
postcode)
# make the GET request
response = get(url, headers=headers)
# Parse the html
html_soup = BeautifulSoup(response.text, 'html.parser')
info_containers = html_soup.find_all('div', class_="suburb-profile__row")
try:
if info_containers != []:
demography = info_containers[0].find_all('div', class_="css-jkrtif")[0].find_all('div', class_="css-54bw0x")
if demography != []:
population = demography[0].text
population = population.replace(',', '')
age = demography[1].text
else:
# sometime there will be a promotion section on the result site, hence demography info locates in different section.
demography = info_containers[1].find_all('div', class_="css-jkrtif")[0].find_all('div', class_="css-54bw0x")
if demography != []:
population = demography[0].text
population = population.replace(',', '')
age = demography[1].text
else:
# sometimes there will be no infomation.
population = "NA"
age = "NA"
else:
# sometimes there is no infomation
population = "NA"
age = "NA"
except:
pass
# return only relevant information for suburb
Demography_list.append([(
suburb,
postcode,
population,
age)])
# Wait a given time bewteen 5 to 15 seconds for scraping the next website to mimic a humanbeing search.
time.sleep(random.randint(5,15))
return(Demography_list)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment