streeter/backprint_scraper.py

## backprint_scraper.py
#!/usr/bin/env python

import os
import re

import requests

event = 138191
bibs = (496, )

url = 'http://www.backprint.com/facchinophotography/{event}/{bib}'

thumb_pattern = re.compile(r'http\:\/\/webres.backprint.com/.*?t\.jpg')

for bib in bibs:
    try:
        res = requests.get(url.format(event=event, bib=bib))
        res.raise_for_status()
    except requests.exceptions.HTTPError:
        print('[ERROR] Unable to get info for bib {}'.format(event, bib))
        continue

    # Get all the thumbnail links
    strings = thumb_pattern.findall(res.content)
    if not strings:
        print('[ERROR] Unable to find any matches for the thumbnail pattern!')
        continue

    # Create a list of all the large images. There are images at
    # 'f.jpg', 'h.jpg', and 't.jpg', the size specifier is case-insenstive.
    images = [thumb.replace('t.jpg', 'h.jpg') for thumb in strings]

    # Create a directory to store in
    photo_dir = 'photos_{}'.format(bib)
    if not os.path.isdir(photo_dir):
        os.makedirs(photo_dir)

    for image_url in images:
        filename = os.path.basename(image_url)
        try:
            r = requests.get(image_url)
            r.raise_for_status()
        except requests.exceptions.HTTPError:
            print('[ERROR] Unable to download the image: {}'.format(image_url))
            continue

        with open(os.path.join(photo_dir, filename), 'wb') as fd:
            for chunk in r.iter_content(512):
                fd.write(chunk)
        print('Downloaded ' + filename)
	#!/usr/bin/env python

	import os
	import re

	import requests

	event = 138191
	bibs = (496, )

	url = 'http://www.backprint.com/facchinophotography/{event}/{bib}'

	thumb_pattern = re.compile(r'http\:\/\/webres.backprint.com/.*?t\.jpg')

	for bib in bibs:
	try:
	res = requests.get(url.format(event=event, bib=bib))
	res.raise_for_status()
	except requests.exceptions.HTTPError:
	print('[ERROR] Unable to get info for bib {}'.format(event, bib))
	continue

	# Get all the thumbnail links
	strings = thumb_pattern.findall(res.content)
	if not strings:
	print('[ERROR] Unable to find any matches for the thumbnail pattern!')
	continue

	# Create a list of all the large images. There are images at
	# 'f.jpg', 'h.jpg', and 't.jpg', the size specifier is case-insenstive.
	images = [thumb.replace('t.jpg', 'h.jpg') for thumb in strings]

	# Create a directory to store in
	photo_dir = 'photos_{}'.format(bib)
	if not os.path.isdir(photo_dir):
	os.makedirs(photo_dir)

	for image_url in images:
	filename = os.path.basename(image_url)
	try:
	r = requests.get(image_url)
	r.raise_for_status()
	except requests.exceptions.HTTPError:
	print('[ERROR] Unable to download the image: {}'.format(image_url))
	continue

	with open(os.path.join(photo_dir, filename), 'wb') as fd:
	for chunk in r.iter_content(512):
	fd.write(chunk)
	print('Downloaded ' + filename)