danielcaraway/getJCrewItems.py

## getJCrewItems.py
import re
import urllib
from bs4 import BeautifulSoup

## STEP 1: Get product ids
## Scrape the specific category page and get ids
## (CSS as of 3/5/20)
def get_shirts(category):
    url = "https://www.jcrew.com/c/womens_category/"+category+"?Npge=1&Nrpp=1000"
    html = urllib.request.urlopen(url).read()
    soup = BeautifulSoup(html, 'html.parser')
    text = soup.findAll("div", {"class": "product-tile"})
    items = []
    for num,t in enumerate(text):
        if t.get('data-product') != None:
            d = eval(t.get('data-product'))
            items.append(str(d['id']) + '_' + str(d['color']))
        else:
            print(t)
    return items

## ---- for full loop
## categories = ['shirts_tops','pants','denim_jeans','dressesandjumpsuits','shoes']

## Run the thing!!
items = get_shirts('shoes')

## ---- I separated these because I was testing in between, can loop for full file
## STEP TWO: Download the images
## Use the ids from step 1 to download the images
import urllib.request

category = 'shoes'
for item in items:
    url = 'https://www.jcrew.com/s7-img-facade/' + item + '?fmt=jpeg&qlt=90,0&resMode=sharp&op_usm=.1,0,0,0&crop=0,0,0,0&wid=160&hei=160'
    filename = category + '_'+ item + '.jpeg'
    urllib.request.urlretrieve(url, filename)
	import re
	import urllib
	from bs4 import BeautifulSoup

	## STEP 1: Get product ids
	## Scrape the specific category page and get ids
	## (CSS as of 3/5/20)
	def get_shirts(category):
	url = "https://www.jcrew.com/c/womens_category/"+category+"?Npge=1&Nrpp=1000"
	html = urllib.request.urlopen(url).read()
	soup = BeautifulSoup(html, 'html.parser')
	text = soup.findAll("div", {"class": "product-tile"})
	items = []
	for num,t in enumerate(text):
	if t.get('data-product') != None:
	d = eval(t.get('data-product'))
	items.append(str(d['id']) + '_' + str(d['color']))
	else:
	print(t)
	return items

	## ---- for full loop
	## categories = ['shirts_tops','pants','denim_jeans','dressesandjumpsuits','shoes']

	## Run the thing!!
	items = get_shirts('shoes')

	## ---- I separated these because I was testing in between, can loop for full file
	## STEP TWO: Download the images
	## Use the ids from step 1 to download the images
	import urllib.request

	category = 'shoes'
	for item in items:
	url = 'https://www.jcrew.com/s7-img-facade/' + item + '?fmt=jpeg&qlt=90,0&resMode=sharp&op_usm=.1,0,0,0&crop=0,0,0,0&wid=160&hei=160'
	filename = category + '_'+ item + '.jpeg'
	urllib.request.urlretrieve(url, filename)