Perishleaf/beautifulsoup.py

## beautifulsoup.py
# There are four pages for the list, so we stored the web links for these four pages into `domain`
headers = ({'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'})
domain = []
domain.append("https://en.wikipedia.org/wiki/Category:Suburbs_of_Sydney")
domain.append("https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Sydney&pagefrom=Dharruk%2C+New+South+Wales#mw-pages")
domain.append("https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Sydney&pagefrom=Macgraths+Hill%0AMcGraths+Hill%2C+New+South+Wales#mw-pages")
domain.append("https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Sydney&pagefrom=Singletons+Mill%2C+New+South+Wales#mw-pages")
# Create a empty list to store content
suburb_list =[]
for i in range(len(domain)):
    response = get(domain[i], headers=headers)
    # Check if get infomation from the target website, "200" denotes ok.
    print(response)
    html_soup = BeautifulSoup(response.text, 'html.parser')
    # After inspecting the first "find_all", the list we need is in [1]. The result will be a list of lists
    suburb_list.append(html_soup.find_all('div', class_="mw-category")[1].find_all('div', class_="mw-category-group"))
	# There are four pages for the list, so we stored the web links for these four pages into `domain`
	headers = ({'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'})
	domain = []
	domain.append("https://en.wikipedia.org/wiki/Category:Suburbs_of_Sydney")
	domain.append("https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Sydney&pagefrom=Dharruk%2C+New+South+Wales#mw-pages")
	domain.append("https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Sydney&pagefrom=Macgraths+Hill%0AMcGraths+Hill%2C+New+South+Wales#mw-pages")
	domain.append("https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Sydney&pagefrom=Singletons+Mill%2C+New+South+Wales#mw-pages")
	# Create a empty list to store content
	suburb_list =[]
	for i in range(len(domain)):
	response = get(domain[i], headers=headers)
	# Check if get infomation from the target website, "200" denotes ok.
	print(response)
	html_soup = BeautifulSoup(response.text, 'html.parser')
	# After inspecting the first "find_all", the list we need is in [1]. The result will be a list of lists
	suburb_list.append(html_soup.find_all('div', class_="mw-category")[1].find_all('div', class_="mw-category-group"))