Created
November 8, 2016 20:01
-
-
Save AlJohri/dc51918a65752099b2a8f4df5dba7f93 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from urllib.parse import urlencode | |
from datetime import datetime, timezone | |
def jsonsearch(city, sort="date", **kwargs): | |
def process_post_json(post): | |
return { | |
'id': post['PostingID'], | |
'title': post['PostingTitle'], | |
'url': cdn_url_to_http(post['PostingURL']), | |
'longitude': post['Longitude'], | |
'latitude': post['Latitude'], | |
'price': post['Ask'], | |
'bedrooms': post['Bedrooms'], | |
'date': datetime.fromtimestamp(float(post['PostedDate']), timezone.utc).isoformat(), | |
'thumbnail': post.get('ImageThumb'), | |
'category_id': post['CategoryID'], | |
} | |
def get_posts(url): | |
response = requests.get(url) | |
items, meta = response.json() | |
baseurl = cdn_url_to_http(meta['baseurl']) | |
posts = [process_post_json(x) for x in items if not x.get('GeoCluster')] | |
clusters = [x for x in items if x.get('GeoCluster')] | |
yield from posts | |
for cluster in clusters: | |
yield from get_posts(baseurl + cluster['url']) | |
url = get_query_url(city, "jsonsearch", sort=sort, **kwargs) | |
yield from get_posts(url) | |
def get_url_base(city): | |
return "https://{}.craigslist.org".format(city) | |
def get_query_url(city, search_type, offset=0, sort="date", **kwargs): | |
params = {"s": offset, "sort": sort, **kwargs} | |
params = {k:v for k,v in params.items() if v is not None} | |
url = get_url_base(city) + "/{}/apa?{}".format(search_type, urlencode(params)) | |
return url | |
def cdn_url_to_http(url): | |
return "http://" + url.lstrip("//") | |
if __name__ == '__main__': | |
params = dict( | |
postal=20071, | |
search_distance=2, | |
min_price=900, | |
max_price=1600, | |
hasPic=1, | |
availabilityMode=1) | |
for post in jsonsearch('washingtondc', **params): | |
print(post) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment