Skip to content

Instantly share code, notes, and snippets.

@nubela
Created November 20, 2014 16:13
Show Gist options
  • Save nubela/f4452e34a4adfd38ecf6 to your computer and use it in GitHub Desktop.
Save nubela/f4452e34a4adfd38ecf6 to your computer and use it in GitHub Desktop.
PropertyGuru scraper because why the f*ck do websites block copy paste?
import json
from BeautifulSoup import BeautifulSoup
import requests
FILE_NAME = "result.txt"
BASE_URL = "http://www.propertyguru.com.sg/"
URL = "http://www.propertyguru.com.sg/singapore-property-listing/property-for-rent/%d?property_type=H" \
"&property_type_code[]=HDB&minprice=1500&maxprice=2500&minsize=1000&distance=0.5&center_lat=1" \
".39126455055&center_long=103.89543056488&latitude=1.39126455055&longitude=103.89543056488"
def get_listings(page=1):
r = requests.get(URL % (page))
soup = BeautifulSoup(r.text)
all_listings = soup.findAll("a", {"class": "infotitle listing_action clearfix "})
return map(lambda x: BASE_URL + x["href"], all_listings)
def process_listings(url):
r = requests.get(url)
soup = BeautifulSoup(r.text)
agent_info = soup.find("div", {"class": "agent_info"})
name = agent_info.h3.string.strip()
no = agent_info.div.string.strip().split("\r")[0]
info1 = soup.find("div", {"class": "info1"})
info_all = info1.findAll("p")
price = int(info_all[0].span.string.strip().replace("S$ ", "").replace(" / month", "").replace(",", ""))
size = int(info_all[2].string.strip().split(" ")[0].replace(",", ""))
address = info_all[3].string
address = address.replace("\t", " ")
address = address.replace("\r\n", " ")
while " " in address:
address = address.replace(" ", " ")
return {
"agent_name": name,
"no": no,
"price": price,
"size": size,
"address": address,
}
for i in range(5):
results = []
all_listings = get_listings(i)
for l in all_listings:
results += [process_listings(l)]
f = open(FILE_NAME, "w")
f.write(json.dumps(results))
f.close()
@maxxbw54
Copy link

I tested the code, unfortunately, the code does not work as requests does not allow for the site.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment