Skip to content

Instantly share code, notes, and snippets.

@nubela
Created November 20, 2014 16:13
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nubela/f4452e34a4adfd38ecf6 to your computer and use it in GitHub Desktop.
Save nubela/f4452e34a4adfd38ecf6 to your computer and use it in GitHub Desktop.
PropertyGuru scraper because why the f*ck do websites block copy paste?
import json
from BeautifulSoup import BeautifulSoup
import requests
FILE_NAME = "result.txt"
BASE_URL = "http://www.propertyguru.com.sg/"
URL = "http://www.propertyguru.com.sg/singapore-property-listing/property-for-rent/%d?property_type=H" \
"&property_type_code[]=HDB&minprice=1500&maxprice=2500&minsize=1000&distance=0.5&center_lat=1" \
".39126455055&center_long=103.89543056488&latitude=1.39126455055&longitude=103.89543056488"
def get_listings(page=1):
r = requests.get(URL % (page))
soup = BeautifulSoup(r.text)
all_listings = soup.findAll("a", {"class": "infotitle listing_action clearfix "})
return map(lambda x: BASE_URL + x["href"], all_listings)
def process_listings(url):
r = requests.get(url)
soup = BeautifulSoup(r.text)
agent_info = soup.find("div", {"class": "agent_info"})
name = agent_info.h3.string.strip()
no = agent_info.div.string.strip().split("\r")[0]
info1 = soup.find("div", {"class": "info1"})
info_all = info1.findAll("p")
price = int(info_all[0].span.string.strip().replace("S$ ", "").replace(" / month", "").replace(",", ""))
size = int(info_all[2].string.strip().split(" ")[0].replace(",", ""))
address = info_all[3].string
address = address.replace("\t", " ")
address = address.replace("\r\n", " ")
while " " in address:
address = address.replace(" ", " ")
return {
"agent_name": name,
"no": no,
"price": price,
"size": size,
"address": address,
}
for i in range(5):
results = []
all_listings = get_listings(i)
for l in all_listings:
results += [process_listings(l)]
f = open(FILE_NAME, "w")
f.write(json.dumps(results))
f.close()
@ganeshraj
Copy link

Thanks for this code. I'm trying to filter property guru for places with no live in landlords but it won't let me do it

@Ismail-Ishak
Copy link

@ganeshraj is the script still working?

@maxxbw54
Copy link

I tested the code, unfortunately, the code does not work as requests does not allow for the site.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment