Skip to content

Instantly share code, notes, and snippets.

@lafftar
Created August 30, 2020 12:04
Show Gist options
  • Save lafftar/7b0313b39b8077e4f27c68ae1ae8d6c9 to your computer and use it in GitHub Desktop.
Save lafftar/7b0313b39b8077e4f27c68ae1ae8d6c9 to your computer and use it in GitHub Desktop.
GTA Cities Real Estate - 27 Cities - Quick Data Study - 5
from requests import Session
from pandas import DataFrame
from bs4 import BeautifulSoup as bs
from time import time
t1 = time()
main_session = Session()
page = main_session.get('https://en.wikipedia.org/wiki/Greater_Toronto_and_Hamilton_Area').content
page = bs(page, 'lxml')
dump = []
for element in page.find_all('table', class_='wikitable sortable')[0].find_all('a', attrs={'href': True})[1:]:
url = f"https://{'-'.join(element.text.split()).lower()}.listing.ca/real-estate-price-history.htm"
print(f"Scraping {element.text}")
resp = main_session.get(url)
if resp.url == 'https://listing.ca':
print(f"{element.text} - {url} got redirected, skipping")
continue
listing_ca_page = bs(resp.content, 'lxml')
avg_prop_price = listing_ca_page.select_one('#right > div.mt30 > div:nth-child(3) '
'> div.rbox > div:nth-child(1)').text.strip() # avg prop price
ten_years_roc = listing_ca_page.select_one('#right > div.mt30 '
'> div:nth-child(4)> div.rbox > table > tr:nth-child(10)'
' > td:nth-child(3) > span').text.strip() # 10 yrs roc
five_years_roc = listing_ca_page.select_one('#right > div.mt30 > div:nth-child(4)'
'> div.rbox > table > tr:nth-child(9) > td:nth-child(3)'
' > span').text.strip() # 5 yr roc
one_year_roc = listing_ca_page.select_one('#right > div.mt30 > div:nth-child(4)>'
' div.rbox > table > tr:nth-child(7) >'
' td:nth-child(3) > span').text.strip() # 1 yr roc
url = f"https://{'-'.join(element.text.split()).lower()}.listing.ca/real-estate-prices-by-community.htm"
print(url)
resp = main_session.get(url)
print(resp.url)
listing_ca_page = bs(resp.content, 'lxml')
three_bed_home = listing_ca_page.find('a', attrs={'href': '/3-bedroom-detached-home'
'-prices-by-community.htm'})\
.next.next.text.strip() # avg price of detached 3 bedroom home
dump.append({
"City": element.text,
"Average Property Price": avg_prop_price,
"10 Year Rate of Change": ten_years_roc,
"5 Year Rate of Change": five_years_roc,
"1 Year Rate of Change": one_year_roc,
"3 Bedroom Home Price": three_bed_home,
"Url": url
})
print(f"Done scraping {element.text}")
print('========================================')
data_frame = DataFrame(dump)
data_frame.to_excel('Relevant Real Estate Info - GTA Communities.xlsx', index=False)
t2 = time()
print(t2 - t1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment