Scrape the CDN for Runway Shows
import requests, os
import multiprocessing
from PIL import Image
from StringIO import StringIO
from BeautifulSoup import BeautifulSoup
class VogueGallery(object):
def __init__(self, url):
self.base_url = 'HTTP://' = requests.get(url).content
self.designers = []
def populate_shows(self):
shows = BeautifulSoup('li', 'directoryListItem')
show_urls = map(
lambda tag:
'image': tag['data-directoryimageurl'],
'collection': self.base_url+tag['data-showurl']
for data in show_urls:
show = Show(data['image'], data['collection'])
except Exception as e:
print e
class ImageURLParser(object):
def __init__(self, url):
vals = url.split('/')[5:]
self.season = vals[0][:2]
self.year = vals[0][2:] = vals[1]
self.collection_type = vals[2]
self.store_name = vals[3]
class Show(object):
def __init__(self, image_url, collection_url):
parsedURL = ImageURLParser(image_url) = self._toHumanName(parsedURL.store_name)
self.store_name = parsedURL.store_name
self.season = parsedURL.season
self.show_type = parsedURL.collection_type
self.year = int(parsedURL.year) =
self.base_url = collection_url
self.image_count = None
def images_url(self):
return (""
"%(season)s%(year)d/%(city)s/%(show_type)s/%(designer)s/" % ({
'season': self.season,
'year': self.year,
'show_type': self.show_type,
'designer': self.store_name
def get_image_count(self):
def parse_total(page):
sentence = page.find(id="SlideNumbering").contents[0].strip()
if sentence:
total = int(sentence.split(' of ')[-1])
if total:
return total
return None
page = requests.get(self.base_url+'/image/1').content
soup = BeautifulSoup(page)
self.image_count = parse_total(soup)
return True
def download_show(self):
designer = self.store_name
season = self.season
show_type = self.show_type
year = self.year
city =
def prepare_path(designer, season, show_type, year):
designer =
return "%(designer)s/%(show_type)s/%(season)s%(year)d/" % locals()
path = prepare_path(designer, season, show_type, year)
print path
for i in xrange(10, 2000000, 10):
img = str(i).zfill(5)
name = str(i/10)+'.jpg'
url = "%s%s" % (self.images_url, ("%sbig.jpg" % img))
if not os.path.exists(path):
if not os.path.exists(path+name):
r = requests.get(url)
if not (r.status_code >= 200 and r.status_code <= 301):
print "Failed to fetch %s " % self
im =, 'JPEG')
except IOError as e:
print "Failed to write out " + path+name
return True
def __call__(self):
def _toHumanName(self, name):
return name.replace('_', ' ')
def __repr__(self):
return "%s %s %s%s" % (, self.show_type, self.season, self.year)
if __name__ == '__main__':
ss2013 = VogueGallery('')
res = []
pool = multiprocessing.Pool(processes=8)
for designer in ss2013.designers:
[r.get() for r in res]

Still a bit rough and poorly tested - need to refactor into either a proper command line script or library of some sort

