Skip to content

Instantly share code, notes, and snippets.

Created November 5, 2014 02:16
Show Gist options
  • Save anonymous/68f2291ac04b3ab2023f to your computer and use it in GitHub Desktop.
Save anonymous/68f2291ac04b3ab2023f to your computer and use it in GitHub Desktop.
require 'nokogiri'
require 'open-uri'
require 'mechanize'
#ARGV.each do|a|
#url = "#{a}"
url = "http://www.tristar.eu/pt/Electronica/Colunas_de_som/Speakers_Bluetooth/SK-1512/3/6193"
mechanize = Mechanize.new
nomeprod = Array.new
codpro = Array.new
desc = Array.new
specs= Array.new
peso= Array.new
ean= Array.new
img1 = Array.new
img2 = Array.new
img3 = Array.new
specs2 = Array.new
doc = Nokogiri::HTML(open(url))
nomeprod = doc.at_css('#_c3890_uxHeaderBox').text.strip[7..-1]
codprod = doc.at_css('#_c3890_uxHeaderBox').text.strip[0..6]
desc = doc.xpath(' //*[(@id = "Gegevens")]//p ').collect {|node| node.text.strip}
specs = doc.xpath(' //*[(@id = "Gegevens")]//li').collect {|node| node.text.strip}
pesotemp = doc.xpath(' //tr[(((count(preceding-sibling::*) + 1) = 5) and parent::*)]//td[(((count(preceding-sibling::*) + 1) = 4) and parent::*)]').collect {|node| node.text.strip}
peso = pesotemp[0][0..-1]
ean = doc.xpath(' //tr[(((count(preceding-sibling::*) + 1) = 3) and parent::*)]//td[(((count(preceding-sibling::*) + 1) = 3) and parent::*)]').collect {|node| node.text.strip}
#catmaintemp = doc.xpath(' //*[(@id = "AlgNavigatie")]//h2 ').collect {|node| node.text.strip}
#catmain = "#{catmaintemp}"[2..-3]
#catsub = doc.xpath(' //*[contains(concat( " ", @class, " " ), concat( " ", "active", " " ))] ').collect {|node| node.text.strip}
#specs
specs2 = "#{specs}"
specs3 = specs2[2..-3]
#categorias
catmaintemp = doc.xpath(' //*[(@id = "AlgNavigatie")]//h2 ').collect {|node| node.text.strip}
catmain = "#{catmaintemp}"[2..-3]
catsubtemp = doc.xpath(' //*[contains(concat( " ", @class, " " ), concat( " ", "active", " " ))] ').collect {|node| node.text.strip}
catsubtemp2 = "#{catsubtemp}"
catsubtemp3 = catsubtemp2.gsub "#{catmain}"[0..-1], " " #
catsubtemp4 = catsubtemp3.tr('"','')
#img
img1 = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "image", " " ))]//a ')[0]["href"]
img2 = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "image", " " ))]//a ')[1]["href"]
img3 = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "image", " " ))]//a ')[2]["href"]
img4 = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "image", " " ))]//a ')[3]["href"]
img5 = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "image", " " ))]//a ')[4]["href"]
mechaimg1 = "http://www.tristar.eu#{img1}"
mechaimg2 = "http://www.tristar.eu#{img2}"
mechaimg3 = "http://www.tristar.eu#{img3}"
mechaimg4 = "http://www.tristar.eu#{img4}"
mechaimg5 = "http://www.tristar.eu#{img5}"
uriimg1 = mechanize.get(mechaimg1).uri
uriimg2 = mechanize.get(mechaimg2).uri
uriimg3 = mechanize.get(mechaimg3).uri
uriimg4 = mechanize.get(mechaimg4).uri
uriimg5 = mechanize.get(mechaimg5).uri
urlpre1 = "#{uriimg1}"
urlpre2 = "#{uriimg2}"
urlpre3 = "#{uriimg3}"
urlpre4 = "#{uriimg4}"
urlpre5 = "#{uriimg5}"
#catmain = "#{catmaintemp}"[5..-2]
#catmain1 = cat2.gsub "#{catmain}"[1..-1], " " #
#specs tem que ser exportados todos numa linha como "descriçao"
#desc tem que ser exportado como "descrição breve"
# puts catsub.gsub "#{catsubtemp}"[1..-1], " " #
#base "#{VAR}" + ";"
puts "1" + ";" + "1000" + ";" + "Tristar" + ";" + "#{nomeprod}" + ";" "#{codprod}" + ";" + "#{desc}"[2..-3] + ";" "#{specs3}" + ";" + "#{peso}" + ";" + "#{ean}"[2..-3] + ";;" +"#{urlpre1}" + ","+"#{urlpre2}" + ","+"#{urlpre3}" + ","+"#{urlpre4}" + ","+"#{urlpre5}" + ";" + "#{catmain}" +","+ "#{catsubtemp4}"[2..-5] + ";"
# end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment