Skip to content

Instantly share code, notes, and snippets.

@fractalatcarf
Created April 4, 2017 16:11
Show Gist options
  • Save fractalatcarf/8966c679abe1d85b8211a8b388716003 to your computer and use it in GitHub Desktop.
Save fractalatcarf/8966c679abe1d85b8211a8b388716003 to your computer and use it in GitHub Desktop.
Zalendo shoes scrapping
require 'open-uri'
require 'nokogiri'
require 'csv'
require 'json'
def scrap_zalendo(url)
shoes = []
10.times do |i|
puts "scrapping page #{i+1} ..."
html_file = open("#{url}/?p=#{i+1}")
html_doc = Nokogiri::HTML(html_file)
html_doc.search('.catalogArticlesList_infoPrice').each do |element|
shoes << {
brand: element.search('.catalogArticlesList_brandName').text,
model: element.search('.catalogArticlesList_articleName').text,
price: element.search('.catalogArticlesList_price').text.match(/(?<price>\d*,\d*)/)[:price],
}
end
end
return shoes
end
db = scrap_zalendo("https://www.zalando.fr/chaussures-femme/")
puts "site scrapped, #{db.size} items found"
# store into csv
csv_options = { col_sep: ',', force_quotes: true, quote_char: '"' }
csv_filepath = 'shoes.csv'
CSV.open(csv_filepath, 'wb', csv_options) do |csv|
csv << ['brand', 'model', 'price']
db.each do |shoe|
csv << shoe.values
end
end
puts "file #{csv_filepath} created"
json_filepath = "shoes.json"
File.open(json_filepath, 'wb') do |file|
file.write(JSON.pretty_generate(db))
end
puts "file #{json_filepath} created"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment