Skip to content

Instantly share code, notes, and snippets.

@fvaletk
Created February 22, 2017 02:56
Show Gist options
  • Save fvaletk/66dcafe5bd0bbcfeb63297f8bb217cbb to your computer and use it in GitHub Desktop.
Save fvaletk/66dcafe5bd0bbcfeb63297f8bb217cbb to your computer and use it in GitHub Desktop.
jcpenny_scraper.rb
require 'HTTParty'
require 'Nokogiri'
require 'Pry'
@image_base_url = 'http://zoom.jcpenney.com/is/image/'
@image_medium = '?wid=640&hei=640&op_usm=.4,.8,0,0&resmode=sharp2&op_usm=1.5,.8,0,0&resmode=sharp'
@image_small = '?wid=96&hei=96&op_usm=.4,.8,0,0&resmode=sharp2&op_usm=1.5,.8,0,0&resmode=sharp'
@image_large = '?wid=2000&hei=2000&op_usm=.4,.8,0,0&resmode=sharp2&op_usm=1.5,.8,0,0&resmode=sharp'
def price_to_float(price)
price.to_f
end
page_dom = HTTParty.get('http://www.jcpenney.com/dale-tiffany-rose-floor-lamp/prod.jump?ppId=pp5004610106&catId=SearchResults&searchTerm=lamp')
page = Nokogiri::HTML(page_dom)
product = {}
item = page.at_css('.pdp_details')
product_id = page.at_css('#ppIdorLotId').text
ar = page_dom.match(/<input id=\"loginModelParentUrl\".*?selectedSKUId=([0-9]*?)\&/i)
puts "1***************************************"
if ar
puts "AR #{ar.inspect}"
else
puts "AR Nil"
end
puts "2***************************************"
# Prices
if page.at_css('span.gallery_page_price.flt_wdt')
sale_price = page.at_css('span.gallery_page_price.flt_wdt').text.strip
elsif page.at_css('span.gallery_page_price.flt_wdt comparisonPrice')
sale_price = page.at_css('span.gallery_page_price.flt_wdt.comparisonPrice').text.strip
else
sale_price = nil
end
if sale_price
if ar = sale_price.match(/(\d+\.?\d*)/)
sale_price = price_to_float ar[1]
end
end
puts "SALE PRICE #{sale_price.inspect}"
puts "3***************************************"
list_price = page.css('span.pp_page_price.flt_wdt').text.strip
if ar = list_price.match(/(\d+\.?\d*)/)
list_price = price_to_float ar[1]
else
list_price = nil
end
puts "LIST PRICE #{list_price.inspect}"
puts "4***************************************"
if sale_price
product['price'] = sale_price
if list_price
product['initial_price'] = list_price
end
elsif list_price
product['price'] = list_price
end
puts "PRODUCT #{product.inspect}"
options = Hash.new
names = []
nSkus = 0
puts "5****************************************"
#products (?)
subproducts = []
item.css('li#product a').each do |sr|
subproducts << sr.text.strip
end
unless subproducts.empty?
subproducts = subproducts.uniq
nSkus += 1
options[nSkus] = subproducts
names[nSkus] = 'selectedLotValue'
end
puts "OPTIONS 1 -> #{options.inspect}"
puts "SUBPRODUCTS -> 1 #{subproducts.inspect}"
puts "nSkus 1 -> #{nSkus.inspect}"
puts "NAMES 1 -> #{names.inspect}"
puts "6*****************************************"
#size_range
size_range = []
item.xpath("//li[@id='size range']/a").each do |sr|
size_range << sr.text.strip
end
unless size_range.empty?
size_range = size_range.uniq
nSkus += 1
options[nSkus] = size_range
names[nSkus] = 'selectedLotValue'
end
puts "SIZE RANGE -> #{size_range.inspect}"
puts "OPTIONS 2 -> #{options.inspect}"
puts "SUBPRODUCTS 2 -> #{subproducts.inspect}"
puts "nSkus 2 -> #{nSkus.inspect}"
puts "NAMES 2 -> #{names.inspect}"
puts "7*****************************************"
# sizes
sizes = []
item.css('div#skuOptions_size li#size a').each do |link|
sizes << link.text.strip
end
unless sizes.empty?
sizes = sizes.uniq
nSkus = nSkus + 1
options[nSkus] = sizes
names[nSkus] = 'skuSelectionMap.SIZE'
end
puts "SIZE -> #{sizes.inspect}"
puts "OPTIONS 3 -> #{options.inspect}"
puts "SUBPRODUCTS 3 -> #{subproducts.inspect}"
puts "nSkus 3 -> #{nSkus.inspect}"
puts "NAMES 3 -> #{names.inspect}"
puts "8*****************************************"
# waist
waist = []
item.css('div#skuOptions_waist li#waist a').each do |w|
waist << w.text.strip
end
unless waist.empty?
waist = waist.uniq
nSkus = nSkus + 1
options[nSkus] = waist
names[nSkus] = 'skuSelectionMap.WAIST'
end
puts "WAIST -> #{waist.inspect}"
puts "OPTIONS 4 -> #{options.inspect}"
puts "SUBPRODUCTS 4 -> #{subproducts.inspect}"
puts "nSkus 4 -> #{nSkus.inspect}"
puts "NAMES 4 -> #{names.inspect}"
puts "9*****************************************"
# inseam
inseam = []
item.css('div#skuOptions_inseam li#inseam a').each do |i|
inseam << i.text.strip
end
unless inseam.empty?
inseam = inseam.uniq
nSkus = nSkus + 1
options[nSkus] = inseam
names[nSkus] = 'skuSelectionMap.INSEAM'
end
puts "INSEAM -> #{inseam.inspect}"
puts "OPTIONS 5 -> #{options.inspect}"
puts "SUBPRODUCTS 5 -> #{subproducts.inspect}"
puts "nSkus 5 -> #{nSkus.inspect}"
puts "NAMES 5 -> #{names.inspect}"
puts "10****************************************"
# neck size
neck_size = []
item.xpath("//li[@id='neck size']/a").each do |n|
neck_size << n.text.strip
end
unless neck_size.empty?
neck_size = neck_size.uniq
nSkus = nSkus + 1
options[nSkus] = neck_size
names[nSkus] = 'skuSelectionMap.NECK_SIZE'
end
puts "NECK SIZE -> #{neck_size.inspect}"
puts "OPTIONS 6 -> #{options.inspect}"
puts "SUBPRODUCTS 6 -> #{subproducts.inspect}"
puts "nSkus 6 -> #{nSkus.inspect}"
puts "NAMES 6 -> #{names.inspect}"
puts "11****************************************"
# sleeves
sleeves = []
item.css('div#skuOptions_sleeve li#sleeve a').each do |s|
sleeves << s.text.strip
end
unless sleeves.empty?
sleeves = sleeves.uniq
nSkus = nSkus + 1
options[nSkus] = sleeves
names[nSkus] = 'skuSelectionMap.SLEEVE'
end
puts "SLEEVE -> #{sleeves.inspect}"
puts "OPTIONS 7 -> #{options.inspect}"
puts "SUBPRODUCTS 7 -> #{subproducts.inspect}"
puts "nSkus 7 -> #{nSkus.inspect}"
puts "NAMES 7 -> #{names.inspect}"
puts "12****************************************"
#chest
chest = []
item.css('div#skuOptions_chest li#chest a').each do |c|
chest << c.text.strip
end
unless chest.empty?
chest = chest.uniq
nSkus = nSkus + 1
options[nSkus] = chest
names[nSkus] = 'skuSelectionMap.CHEST'
end
puts "CHEST -> #{chest.inspect}"
puts "OPTIONS 8 -> #{options.inspect}"
puts "SUBPRODUCTS 8 -> #{subproducts.inspect}"
puts "nSkus 8 -> #{nSkus.inspect}"
puts "NAMES 8 -> #{names.inspect}"
puts "13****************************************"
# length
length = []
item.css('div#skuOptions_length li#length a').each do |c|
length << c.text.strip
end
unless length.empty?
length = length.uniq
nSkus = nSkus + 1
options[nSkus] = length
names[nSkus] = 'skuSelectionMap.LENGTH'
end
puts "LENGTH -> #{length.inspect}"
puts "OPTIONS 9 -> #{options.inspect}"
puts "SUBPRODUCTS 9 -> #{subproducts.inspect}"
puts "nSkus 9 -> #{nSkus.inspect}"
puts "NAMES 9 -> #{names.inspect}"
puts "14****************************************"
# width
width = []
item.css('div#skuOptions_width li#width a').each do |w|
width << w.text.strip
end
unless width.empty?
width = width.uniq
nSkus = nSkus + 1
options[nSkus] = width
names[nSkus] = 'skuSelectionMap.WIDTH'
end
puts "WIDTH -> #{width.inspect}"
puts "OPTIONS 10 -> #{options.inspect}"
puts "SUBPRODUCTS 10 -> #{subproducts.inspect}"
puts "nSkus 10 -> #{nSkus.inspect}"
puts "NAMES 10 -> #{names.inspect}"
puts "15****************************************"
# color names
colors = []
images = {} # image file base name by color name
item.css('ul.small_swatches a.swatch img').each do |img|
colors << img['name']
end
colors = colors.uniq
# the default image name of the product page, we use it for colors that have no image of their own
default_image = page.at_css('meta[property="og:image"]')
if default_image
if match = default_image['content'].match(/[^\/?]+\.tif/)
default_image = match.to_s
end
end
# we search for the other image names
item.css('ul.small_swatches a.swatch').each do |a|
color_name = a.xpath('img/@name').to_s
if match = a['onclick'].match(/'([^']+\.tif)'/i)
images[color_name] = match[1]
else
# this should have worked, otherwise we have no particular image, so we use the default
images[color_name] = default_image
end
end
puts "COLORS -> #{colors.inspect}"
puts "OPTIONS 11 -> #{options.inspect}"
puts "SUBPRODUCTS 11 -> #{subproducts.inspect}"
puts "nSkus 11 -> #{nSkus.inspect}"
puts "NAMES 11 -> #{names.inspect}"
puts "16****************************************"
# colors and sizes
product['colors'] = []
product['style_id'] = product_id
colors.each do |color|
# first, add the color in product.colors
color_size_dicts = []
color_dict = {
'name' => color.strip,
'sizes' => color_size_dicts,
'image' => @image_base_url + images[color] + @image_small,
'medium_image' => @image_base_url + images[color] + @image_medium,
'large_image' => @image_base_url + images[color] + @image_large,
}
product['colors'] << color_dict
# size_combos contains the 'sizes' arrays of the final color_size_dict get_size_info_params
size_combos = nil
options.each do |name_index, opt_values|
# this way we know we're on the first size level
if size_combos
# we generate the cartesian product of current comvos with additional opt_values
size_combos = size_combos.product(opt_values)
else
# we initialize size_combos to the first opt_values
# we put each opt_values element in a separate Array for conformity with other cases
size_combos = opt_values.collect{|v| [v]}
end
end
if options.empty?
# if we have no options, we have only one size per color
size_combos = [[]] # an Array of Arrays is what we want for conformity with other cases
end
# create the color_size_dict for every size_combo
size_combos.each do |size_combo|
# the size_combo elements may be nested inside arrays
# because of succesive #product calls, so we flatten them (the order is preserved)
size_combo.flatten! if Array === size_combo
# get the sizes hash for get_size_info_params
sizes_dict = {}
options.each do |name_index, opt_values|
size_type = names[name_index]
# the size_combo is an array with size values ordered as in options, but with a different index value
sizes_dict[size_type] = size_combo[name_index - 1]
end
# sizes_dict will be empty if one size only
size_name = ''
sizes_dict.each do |key, value|
size_name += " #{value.strip.upcase}"
end
puts "SIZE NAME 11 -> #{size_name.inspect}"
# size_name will be blank if one size only
size_name = ONE_SIZE if size_name == ''
color_size_dicts << {
'name' => size_name.strip,
'get_size_info_params' => {
'style_id' => product_id,
'color' => color.strip,
'sizes' => sizes_dict,
}
}
end
puts "SIZE COMBOS 11 -> #{size_combos.inspect}"
end
puts "OPTIONS 11 -> #{options.inspect}"
puts "SUBPRODUCTS 11 -> #{subproducts.inspect}"
puts "nSkus 11 -> #{nSkus.inspect}"
puts "NAMES 11 -> #{names.inspect}"
puts "PRODUCTS -> #{product.inspect}"
Pry.start(binding)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment