Skip to content

Instantly share code, notes, and snippets.

@vb100
Created July 11, 2017 19:54
Show Gist options
  • Save vb100/c92a543b8030f252cb8201df10d19a51 to your computer and use it in GitHub Desktop.
Save vb100/c92a543b8030f252cb8201df10d19a51 to your computer and use it in GitHub Desktop.
This Python application read all data of real estate object directly from webpage. Then wrrite all the data into Panda dataframe and retrieve all the data on terminal.
import requests
from bs4 import BeautifulSoup as bc
l =[] #List
base_url = "http://www.pythonhow.com/real-estate/rock-springs-wy/LCWYROCKSPRINGS/"
r = requests.get(base_url)
c = r.content
soup = bc(c, "html.parser")
page_nr = soup.find_all("a",{"class":"Page"})[-1].text
print(page_nr)
for page in range(0,int(page_nr)*10,10):
r = requests.get("http://www.pythonhow.com/real-estate/rock-springs-wy/LCWYROCKSPRINGS/t=0&s="+str(page)+".html")
print ("http://www.pythonhow.com/real-estate/rock-springs-wy/LCWYROCKSPRINGS/t=0&s="+str(page)+".html")
print(r)
c = r.content
soup = bc(c,"html.parser")
all= soup.find_all("div",{"class":"propertyRow"})
for item in all:
d={}
d["Address"] = item.find_all("span",{"class","propAddressCollapse"})[0].text
try:
d["Locality"] = item.find_all("span",{"class","propAddressCollapse"})[1].text
except:
d["Locality"] = None
d["Price"] = item.find("h4", {"class": "propPrice"}).text.replace("\n", "").replace(" ", "")
try:
d["Beds"] = item.find("span",{"class","infoBed"}).find("b").text
except:
d["Beds"] = None
try:
d["Area"] = item.find("span",{"class","infoSqFt"}).find("b").text
except:
d["Area"] = None
try:
d["Full Baths"] = item.find("span",{"class","infoValueFullBath"}).find("b").text
except:
d["Full Baths"] = None
try:
d["Half Baths"] = item.find("span", {"class", "infoValueHalfBath"}).find("b").text
except:
d["Half Baths"] = None
for column_group in item.find_all("div",{"class":"columnGroup"}):
#print(column_group)
for feature_group, feature_name in zip(column_group.find_all("span",{"class":"featureGroup"}), column_group.find_all("span",{"class":"featureName"})):
#print feature_group.text, feature_name.text
if "Lot Size" in feature_group.text:
d["Lot Size"] = feature_name.text
l.append(d)
import pandas
df = pandas.DataFrame(l)
print(df)
df.to_csv("Output.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment