Skip to content

Instantly share code, notes, and snippets.

@pawanjay176
Created March 4, 2023 15:13
Show Gist options
  • Save pawanjay176/c93e82d2e31688e19c62ec67270974d1 to your computer and use it in GitHub Desktop.
Save pawanjay176/c93e82d2e31688e19c62ec67270974d1 to your computer and use it in GitHub Desktop.
Scrapes apartment listings on craigslist
# Usage: python3 apartment.py <RADIUS-FROM-YALETOWN>
import requests
DB_FILE = "db.txt"
# Because craigslist is dumb
def get_name(a, b, c):
if isinstance(a, list) and len(a) == 2 and a[0] == 6:
return a[1]
elif isinstance(b, list) and len(b) == 2 and b[0] == 6:
return b[1]
elif isinstance(c, list) and len(c) == 2 and c[0] == 6:
return c[1]
else:
return "ohh noo"
BASE_API_URL = "https://sapi.craigslist.org/web/v7/postings/search/full?"
BASE_LISTING_URL = "https://vancouver.craigslist.org/van/apa/d/"
MAX_PRICE = 2500
RADIUS = 4
b = "batch=16-0-360-0-0&cc=US&lang=en&max_price=2500&postal=V6B5P2&searchPath=apa&search_distance=3.125"
params = {
"batch": "16-0-360-0-0",
"cc": "US",
"lang": "en",
"max_price": str(MAX_PRICE),
"postal": "V6B5P2", # Yaletown
"searchPath": "apa",
"search_distance": RADIUS,
}
def get_listings():
listings = list()
r = requests.get(BASE_API_URL, params=params)
response = r.json()
# This is the base of the unique id for each listing
min_posting_id = response["data"]["decode"]["minPostingId"]
for item in response["data"]["items"]:
listing_uid = item[0] + min_posting_id
name = get_name(item[6], item[7], item[5])
try:
listing_url = BASE_LISTING_URL + name + "/" + str(listing_uid) + ".html"
listings.append(listing_url)
except Exception as e:
print(e)
print(name)
return listings[:20]
def send_on_channel(listings_to_send):
bot_token="6255577679:AAHhEW-FREcAnjv65qvKkayO33CfS0Qp4U0"
channel_id="-1001566731041"
url = "https://api.telegram.org/bot" + bot_token + "/sendMessage?chat_id=" + channel_id + "&text="
for listing in listings_to_send:
r = requests.get(url + listing)
def persist(listings):
with open(DB_FILE, "w") as f:
f.writelines(line + '\n' for line in listings)
def load():
with open(DB_FILE, "r") as f:
listings = f.readlines()
return [line.strip() for line in listings]
def main():
import os
if not os.path.isfile(DB_FILE):
listings = get_listings()
persist(listings)
send_on_channel(listings)
else:
persisted_listings = load()
listings = get_listings()
new_listings = set(listings).difference(persisted_listings)
all_listings = set(listings).union(persisted_listings)
print(new_listings)
persist(all_listings)
send_on_channel(new_listings)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment