Skip to content

Instantly share code, notes, and snippets.

@sayhiben
Last active April 6, 2021 17:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sayhiben/f1c85c4e319ceb0cd452332be4a5f4b9 to your computer and use it in GitHub Desktop.
Save sayhiben/f1c85c4e319ceb0cd452332be4a5f4b9 to your computer and use it in GitHub Desktop.
Save a list of available puppies on GoodDog.com in specific breeds to CSV - GoodDog.com's UX is based on navigating from breed -> breeder -> puppies. There are no options to browse available puppies across breeds or even to identify which breeders may soon have another litter. Good Dog expects its users to dig through its listings, page by page,…
import csv
import logging
import requests
import time
logging.basicConfig(level=logging.NOTSET)
logger = logging.getLogger(__name__)
# Seconds between requests
RATE_LIMIT = 2
class GooddogScraper:
"""
GoodDog.com's UX is based on navigating from breed -> breeder -> puppies. There
are no options to browse available puppies across breeds or even to identify
which breeders may soon have another litter. Good Dog expects its users to
dig through its listings, page by page, even though their API returns a far
more useful and filterable dataset.
This scraper accepts a list of breeds and a rate limit, then it gathers all of the
currently available puppies of those respective breeds on Good Dog and outputs
to a CSV.
Also - Hey Good Dog Engineers; you're gonna be tempted to limit requests to your
API in a way that doesn't let me do this. Consider talking to your UX folx
instead.
"""
API_BASE = "https://www.gooddog.com/api"
DEFAULT_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
}
def __init__(self, postal_code, breeds, rate_limit):
self.postal_code = postal_code
self.breeds = breeds
self.rate_limit = rate_limit
def scrape(self):
logger.info(f"Scraping {len(self.breeds)} breeds")
puppies = []
for breed in self.breeds:
time.sleep(self.rate_limit)
logger.info(f" Requesting info for {breed}")
breed_puppies = self._get_puppies(breed)
logger.info(f" {len(breed_puppies)} puppies available")
puppies.extend(breed_puppies)
with open("puppies.csv", "w") as csvfile:
logger.info(f" Writing {len(puppies)} to csv...")
fieldnames = puppies[0].keys()
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(puppies)
logger.info(f" {len(puppies)} puppies saved to CSV")
def _get_puppies(self, breed):
url = f"{self.API_BASE}/breeds/{breed}/breeder_profiles.json?city=&distance_in_miles=100&non_standard=true&page=1&per_page=500&postal_code={self.postal_code}&state="
logger.info(f" Requesting {url}...")
response = requests.get(url, headers=self.DEFAULT_HEADERS)
logger.info(f" Response code: {response.status_code}")
breeders = response.json().get('data', {})
puppies = []
for breeder in breeders:
for puppy in breeder.get('puppies', []):
delivery_methods = [m.get('name') for m in breeder.get('delivery_methods', [])]
delivery_methods_string = ', '.join(delivery_methods)
url = f"https://www.gooddog.com{puppy.get('url')}"
puppy = {
'breed': breed,
'name': puppy.get('name'),
'url': url,
'distance': breeder.get('distance_in_miles'),
'health_tier': breeder.get('health_tier', {}).get('title'),
'breeder_name': breeder.get('name'),
'breeder_wait': breeder.get('estimated_wait_description'),
'breeder_delivery_methods': delivery_methods_string,
}
logger.info(f" Found puppy: {puppy}")
puppies.append(puppy)
return puppies
breeds = [
'alaskan-klee-kai',
'australian-terrier',
'biewer-terrier',
'cairn-terrier',
'cavapoo',
'glen-of-imaal-terrier',
'havanese',
'havapoo',
'italian-greyhound',
'jack-russell-terrier',
'maltipoo',
'mi-ki',
'miniature-pinscher',
'miniature-schnauzer',
'morkie',
'norfolk-terrier',
'norwich-terrier',
'papillon',
'parson-russell-terrier',
'pomeranian',
'pomsky',
'rat-terrier',
'russell-terrier',
'schipperke',
'schnoodle',
'scottish-terrier',
'shiba-inu',
'shichon',
'shihpoo',
'shorkie',
'soft-coated-wheaten-terrier',
'swedish-vallhund',
'tibetan-terrier',
'toy-fox-terrier',
'welsh-terrier',
'west-highland-white-terrier',
'wire-fox-terrier',
'yorkipoo',
'yorkshire-terrier',
'silky-terrier'
]
scraper = GooddogScraper("98101", breeds, RATE_LIMIT)
scraper.scrape()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment