Skip to content

Instantly share code, notes, and snippets.

@loisaidasam
Last active June 16, 2023 14:30
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save loisaidasam/8734712 to your computer and use it in GitHub Desktop.
Save loisaidasam/8734712 to your computer and use it in GitHub Desktop.
Hack of the day: How to scour craigslist for a 62cm-64cm bicycle
import time
from bs4 import BeautifulSoup
import requests
BASE_URL = "http://newyork.craigslist.org"
URL = "http://newyork.craigslist.org/search/?sort=rel&areaID=3&subAreaID=&query=bicycle&catAbb=sss"
SLEEP_BETWEEN_REQS_SECS = 5
# Set search items to be the lowercased search strings you're looking for
# (for example, these bike frame sizes that I'm looking for)
SEARCH_ITEMS = []
for size in (62, 63, 64):
SEARCH_ITEMS.append('%s cm' % size)
SEARCH_ITEMS.append('%scm' % size)
def scour_link(link):
print "scour_link(%s)" % link
response = requests.get(link)
response.raise_for_status()
soup = BeautifulSoup(response.content)
print soup.title.string
for search_item in SEARCH_ITEMS:
if search_item in response.content.lower():
print "\tFOUND %s!"
print ""
# print "Sleeping for %s seconds..." % SLEEP_BETWEEN_REQS_SECS
time.sleep(SLEEP_BETWEEN_REQS_SECS)
def scour():
response = requests.get(URL)
response.raise_for_status()
soup = BeautifulSoup(response.content)
content = soup.find('div', attrs={'class': 'content'})
links = set()
for link in content.find_all('a'):
url = link.get('href')
if '.html' in url:
links.add(url)
print "Found %s links to scour" % len(links)
for link in links:
if not link.startswith('http'):
link = "%s%s" % (BASE_URL, link)
scour_link(link)
def main():
scour()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment