Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
import requests
import urllib
import json
def get_rect_from_zip(session, zip_code):
Given a zipcode, retrieve a coordinate bounding box for use with Zillow Search
url = "" + str(zip_code)
r = session.get(url)
cookie = r.headers['Set-Cookie'].split(';')[0]
cookie_decoded = urllib.unquote(cookie).decode('utf8')
rect = urllib.unquote(cookie_decoded.split('rect=')[-1]).split('&')[0]
rect = [str(float(r) * 1e6).split('.')[0] for r in rect.split(',')]
rect = [rect[3], rect[2], rect[1], rect[0]]
return ",".join(rect)
def scrape_rect(session, page, rect):
Given a coordinate bounding box, get rental results.
url = ",&mp=,&bd=0%2C&ba=0%2C&sf=,&lot=0%2C&yr=,&singlestory=0&hoa=0%2C&pho=0&pets=0&parking=0&laundry=0&income-restricted=0&pnd=0&red=0&zso=0&days=any&ds=all&pmf=0&pf=0&sch=100111&sort=days&search=maplist&disp=1&rid=62022&rt=7&listright=true&photoCardsEnabled=true&isMapSearch=true&zoom=15"
url += "&rect={0}&p={1}".format(rect, page)
r = session.get(url)
return r.json()
def get_num_pages_from_rect_data(data):
Determine number of pages in result set for a given coordinate bounding box.
return int(data['list']['numPages'])
def parse_rect_data(data):
Fetch list of buildings in result set.
return data['map']['buildings']
def scrape_zip_code(session, zip_code):
1. Fetch bounding box for a zip code.
2. Fetch first page of results
3. Paginate through all results.
4. Return Data as a list of buildings.
rect = get_rect_from_zip(session, zip_code)
print "Fetching page 1 for zip code {}".format(zip_code)
data = scrape_rect(session, 1, rect)
num_pages = get_num_pages_from_rect_data(data)
print "Found {} pages for zip code {}".format(num_pages, zip_code)
# for testing purposes, set num pages to 10
num_pages = 10
# parse initial output to append to
data = parse_rect_data(data)
# fetch all pages
for page in range(2, num_pages+1):
print "Fetching page {} for zip code {}".format(page, zip_code)
page_data = parse_rect_data(scrape_rect(session, page, rect))
return data
def scrape_zip_codes(zip_codes):
Scrape multiple zip codes
session = requests.Session()
data = []
for zc in zip_codes:
data.extend(scrape_zip_code(session, zc))
return data
if __name__ == '__main__':
# run the scraper and write the results to a json file.
output = scrape_zip_codes(ZIP_CODES)
print "Outputting {} records to output.json".format(len(output))
with open('output.json', 'wb') as f:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.