Skip to content

Instantly share code, notes, and snippets.

@reinderien
Created January 28, 2016 21:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reinderien/2261b6e57d17ab11564e to your computer and use it in GitHub Desktop.
Save reinderien/2261b6e57d17ab11564e to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
from requests import Session
import csv
def ref(head):
return '%s%d' % (chr(ord('A') + theads.index(head)), irow)
sess = Session()
theads = ('Breed Name', 'Tested', 'Passed', 'Failed', 'Percent')
with open('dogs.csv', 'w', newline='') as f:
tab = csv.DictWriter(f, fieldnames=theads)
tab.writeheader()
irow = 2
for page in range(1, 9):
resp = sess.get('http://atts.org/breed-statistics/statistics-page%d' % page)
bs = BeautifulSoup(resp.text, 'html.parser')
heads = [s.text for s in bs.find('tr').find_all('span')]
for row in bs.find_all('tr')[1:]:
cells = [s.text for s in row.find_all('strong')]
if cells[heads.index('Breed Name')] == 'TOTALS': exit()
trow = {h: cells[heads.index(h)]
for h in ('Breed Name', 'Passed', 'Failed')}
trow.update({'Tested': '=%s+%s' % (ref('Passed'), ref('Failed')),
'Percent': '=%s/%s' % (ref('Passed'), ref('Tested'))})
tab.writerow(trow)
irow += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment