Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Scrape the CDN for Runway Shows
import requests, os
import multiprocessing
from PIL import Image
from StringIO import StringIO
from BeautifulSoup import BeautifulSoup
class VogueGallery(object):
def __init__(self, url):
self.base_url = 'HTTP://' = requests.get(url).content
self.designers = []
def populate_shows(self):
shows = BeautifulSoup('li', 'directoryListItem')
show_urls = map(
lambda tag:
'image': tag['data-directoryimageurl'],
'collection': self.base_url+tag['data-showurl']
for data in show_urls:
show = Show(data['image'], data['collection'])
except Exception as e:
print e
class ImageURLParser(object):
def __init__(self, url):
vals = url.split('/')[5:]
self.season = vals[0][:2]
self.year = vals[0][2:] = vals[1]
self.collection_type = vals[2]
self.store_name = vals[3]
class Show(object):
def __init__(self, image_url, collection_url):
parsedURL = ImageURLParser(image_url) = self._toHumanName(parsedURL.store_name)
self.store_name = parsedURL.store_name
self.season = parsedURL.season
self.show_type = parsedURL.collection_type
self.year = int(parsedURL.year) =
self.base_url = collection_url
self.image_count = None
def images_url(self):
return (""
"%(season)s%(year)d/%(city)s/%(show_type)s/%(designer)s/" % ({
'season': self.season,
'year': self.year,
'show_type': self.show_type,
'designer': self.store_name
def get_image_count(self):
def parse_total(page):
sentence = page.find(id="SlideNumbering").contents[0].strip()
if sentence:
total = int(sentence.split(' of ')[-1])
if total:
return total
return None
page = requests.get(self.base_url+'/image/1').content
soup = BeautifulSoup(page)
self.image_count = parse_total(soup)
return True
def download_show(self):
designer = self.store_name
season = self.season
show_type = self.show_type
year = self.year
city =
def prepare_path(designer, season, show_type, year):
designer =
return "%(designer)s/%(show_type)s/%(season)s%(year)d/" % locals()
path = prepare_path(designer, season, show_type, year)
print path
for i in xrange(10, 2000000, 10):
img = str(i).zfill(5)
name = str(i/10)+'.jpg'
url = "%s%s" % (self.images_url, ("%sbig.jpg" % img))
if not os.path.exists(path):
if not os.path.exists(path+name):
r = requests.get(url)
if not (r.status_code >= 200 and r.status_code <= 301):
print "Failed to fetch %s " % self
im =, 'JPEG')
except IOError as e:
print "Failed to write out " + path+name
return True
def __call__(self):
def _toHumanName(self, name):
return name.replace('_', ' ')
def __repr__(self):
return "%s %s %s%s" % (, self.show_type, self.season, self.year)
if __name__ == '__main__':
ss2013 = VogueGallery('')
res = []
pool = multiprocessing.Pool(processes=8)
for designer in ss2013.designers:
[r.get() for r in res]

This comment has been minimized.

Copy link
Owner Author

commented Jul 19, 2012

Still a bit rough and poorly tested - need to refactor into either a proper command line script or library of some sort

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.