Skip to content

Instantly share code, notes, and snippets.

@streeter
Last active July 24, 2016 03:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save streeter/8fedecdeec3d0c7f960a to your computer and use it in GitHub Desktop.
Save streeter/8fedecdeec3d0c7f960a to your computer and use it in GitHub Desktop.
Scrape the backprint.com event photo site and download the largest sized images from guessable URLs
#!/usr/bin/env python
import os
import re
import requests
event = 138191
bibs = (496, )
url = 'http://www.backprint.com/facchinophotography/{event}/{bib}'
thumb_pattern = re.compile(r'http\:\/\/webres.backprint.com/.*?t\.jpg')
for bib in bibs:
try:
res = requests.get(url.format(event=event, bib=bib))
res.raise_for_status()
except requests.exceptions.HTTPError:
print('[ERROR] Unable to get info for bib {}'.format(event, bib))
continue
# Get all the thumbnail links
strings = thumb_pattern.findall(res.content)
if not strings:
print('[ERROR] Unable to find any matches for the thumbnail pattern!')
continue
# Create a list of all the large images. There are images at
# 'f.jpg', 'h.jpg', and 't.jpg', the size specifier is case-insenstive.
images = [thumb.replace('t.jpg', 'h.jpg') for thumb in strings]
# Create a directory to store in
photo_dir = 'photos_{}'.format(bib)
if not os.path.isdir(photo_dir):
os.makedirs(photo_dir)
for image_url in images:
filename = os.path.basename(image_url)
try:
r = requests.get(image_url)
r.raise_for_status()
except requests.exceptions.HTTPError:
print('[ERROR] Unable to download the image: {}'.format(image_url))
continue
with open(os.path.join(photo_dir, filename), 'wb') as fd:
for chunk in r.iter_content(512):
fd.write(chunk)
print('Downloaded ' + filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment