Skip to content

Instantly share code, notes, and snippets.

@danielcaraway
Created March 5, 2020 22:43
Show Gist options
  • Save danielcaraway/dc0fdc88a43752c0cafa313d30ea5ed1 to your computer and use it in GitHub Desktop.
Save danielcaraway/dc0fdc88a43752c0cafa313d30ea5ed1 to your computer and use it in GitHub Desktop.
Get photos of JCrew items with Beautiful Soup
import re
import urllib
from bs4 import BeautifulSoup
## STEP 1: Get product ids
## Scrape the specific category page and get ids
## (CSS as of 3/5/20)
def get_shirts(category):
url = "https://www.jcrew.com/c/womens_category/"+category+"?Npge=1&Nrpp=1000"
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html, 'html.parser')
text = soup.findAll("div", {"class": "product-tile"})
items = []
for num,t in enumerate(text):
if t.get('data-product') != None:
d = eval(t.get('data-product'))
items.append(str(d['id']) + '_' + str(d['color']))
else:
print(t)
return items
## ---- for full loop
## categories = ['shirts_tops','pants','denim_jeans','dressesandjumpsuits','shoes']
## Run the thing!!
items = get_shirts('shoes')
## ---- I separated these because I was testing in between, can loop for full file
## STEP TWO: Download the images
## Use the ids from step 1 to download the images
import urllib.request
category = 'shoes'
for item in items:
url = 'https://www.jcrew.com/s7-img-facade/' + item + '?fmt=jpeg&qlt=90,0&resMode=sharp&op_usm=.1,0,0,0&crop=0,0,0,0&wid=160&hei=160'
filename = category + '_'+ item + '.jpeg'
urllib.request.urlretrieve(url, filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment