/xtrafoo.rb Secret

## xtrafoo.rb
require 'nokogiri'
require 'open-uri'
require 'mechanize'

#ARGV.each do|a|
#url = "#{a}"

url = "http://www.tristar.eu/pt/Electronica/Colunas_de_som/Speakers_Bluetooth/SK-1512/3/6193"
mechanize = Mechanize.new


doc = Nokogiri::HTML(open(url))
nomeprod = doc.at_css('#_c3890_uxHeaderBox').text.strip[7..-1]
codprod = doc.at_css('#_c3890_uxHeaderBox').text.strip[0..6]
desc = doc.xpath(' //*[(@id = "Gegevens")]//p ').collect {|node| node.text.strip}
specs = doc.xpath(' //*[(@id = "Gegevens")]//li').collect {|node| node.text.strip}
peso = doc.xpath(' //tr[(((count(preceding-sibling::*) + 1) = 5) and parent::*)]//td[(((count(preceding-sibling::*) + 1) = 4) and parent::*)]').collect {|node| node.text.strip}.first
ean = doc.xpath(' //tr[(((count(preceding-sibling::*) + 1) = 3) and parent::*)]//td[(((count(preceding-sibling::*) + 1) = 3) and parent::*)]').collect {|node| node.text.strip}
#specs
specs2 = "#{specs}"
specs3 = specs2[2..-3]

#categorias
catmaintemp = doc.xpath(' //*[(@id = "AlgNavigatie")]//h2 ').collect {|node| node.text.strip}
catmain = "#{catmaintemp}"[2..-3]
catsubtemp = doc.xpath(' //*[contains(concat( " ", @class, " " ), concat( " ", "active", " " ))] ').collect {|node| node.text.strip}
catsubtemp2 = "#{catsubtemp}"
catsubtemp3 = catsubtemp2.gsub "#{catmain}"[0..-1], " " #
catsubtemp4 = catsubtemp3.tr('"','')
#img
image_urls = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "image", " " ))]//a ').collect do |el|
mechanize.get("http://www.tristar.eu#{el['href']}").uri
end

#pdf_url = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "pdf", " " ))]//a ' ).collect do |el|
#mechanize.get("http://www.tristar.eu#{el['href']}").uri
#end

#pdf_url = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "pdf", " " ))]//a ' ).collect {|node| node.text.strip}

#doc_urls = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "pdf", " " ), contact)]//a ').collect do |el|
#mechanize.get("http://www.tristar.eu#{el['href']}").uri
#end

#temp = doc.xpath('//*[(@id = "Downloads")]//article ').collect {|node| node.text.strip}
#specs2 = specs.to_s[2..-3]
specs2 = "#{specs}"
#specs3 = specs2.gsub('",/' , '\\n •')
specs3 = "#{specs2}".gsub(/",/, '\\n•')

descshort = specs3
#specs4 = specs3.gsub(/"/ , '')
#descshort = specs3.to_s[2..-3]

output = [1, 1000, 'Tristar', nomeprod, codprod, desc, descshort, peso, ean.to_s[2..-3], image_urls.join(', '), catmain, catsubtemp4.to_s[2..-5]]
puts output.join(';')


#output = [1, 1000, 'Tristar', nomeprod, codprod, desc[2..-3], specs2, peso, ean.to_s[2..-3], '', image_urls.join(', '), catmain, #catsubtemp4.to_s[2..-5], '']
#puts output.join(';')


#puts "1" + ";" + "1000" + ";" + "Tristar" + ";" + "#{nomeprod}" + ";" "#{codprod}" + ";" + "#{desc}"[2..-3] + ";" "#{specs3}" + ";" + "#{peso}" + ";" + "#{ean}"[2..-3] + ";;" + image_urls.join(', ') + ";" + "#{catmain}" +","+ "#{catsubtemp4}"[2..-5] + ";"
#end
	require 'nokogiri'
	require 'open-uri'
	require 'mechanize'

	#ARGV.each do\|a\|
	#url = "#{a}"

	url = "http://www.tristar.eu/pt/Electronica/Colunas_de_som/Speakers_Bluetooth/SK-1512/3/6193"
	mechanize = Mechanize.new



	doc = Nokogiri::HTML(open(url))
	nomeprod = doc.at_css('#_c3890_uxHeaderBox').text.strip[7..-1]
	codprod = doc.at_css('#_c3890_uxHeaderBox').text.strip[0..6]
	desc = doc.xpath(' //*[(@id = "Gegevens")]//p ').collect {\|node\| node.text.strip}
	specs = doc.xpath(' //*[(@id = "Gegevens")]//li').collect {\|node\| node.text.strip}
	peso = doc.xpath(' //tr[(((count(preceding-sibling::) + 1) = 5) and parent::)]//td[(((count(preceding-sibling::) + 1) = 4) and parent::)]').collect {\|node\| node.text.strip}.first
	ean = doc.xpath(' //tr[(((count(preceding-sibling::) + 1) = 3) and parent::)]//td[(((count(preceding-sibling::) + 1) = 3) and parent::)]').collect {\|node\| node.text.strip}
	#specs
	specs2 = "#{specs}"
	specs3 = specs2[2..-3]

	#categorias
	catmaintemp = doc.xpath(' //*[(@id = "AlgNavigatie")]//h2 ').collect {\|node\| node.text.strip}
	catmain = "#{catmaintemp}"[2..-3]
	catsubtemp = doc.xpath(' //*[contains(concat( " ", @class, " " ), concat( " ", "active", " " ))] ').collect {\|node\| node.text.strip}
	catsubtemp2 = "#{catsubtemp}"
	catsubtemp3 = catsubtemp2.gsub "#{catmain}"[0..-1], " " #
	catsubtemp4 = catsubtemp3.tr('"','')
	#img
	image_urls = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "image", " " ))]//a ').collect do \|el\|
	mechanize.get("http://www.tristar.eu#{el['href']}").uri
	end

	#pdf_url = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "pdf", " " ))]//a ' ).collect do \|el\|
	#mechanize.get("http://www.tristar.eu#{el['href']}").uri
	#end

	#pdf_url = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "pdf", " " ))]//a ' ).collect {\|node\| node.text.strip}

	#doc_urls = doc.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "pdf", " " ), contact)]//a ').collect do \|el\|
	#mechanize.get("http://www.tristar.eu#{el['href']}").uri
	#end

	#temp = doc.xpath('//*[(@id = "Downloads")]//article ').collect {\|node\| node.text.strip}
	#specs2 = specs.to_s[2..-3]
	specs2 = "#{specs}"
	#specs3 = specs2.gsub('",/' , '\\n •')
	specs3 = "#{specs2}".gsub(/",/, '\\n•')

	descshort = specs3
	#specs4 = specs3.gsub(/"/ , '')
	#descshort = specs3.to_s[2..-3]

	output = [1, 1000, 'Tristar', nomeprod, codprod, desc, descshort, peso, ean.to_s[2..-3], image_urls.join(', '), catmain, catsubtemp4.to_s[2..-5]]
	puts output.join(';')




	#output = [1, 1000, 'Tristar', nomeprod, codprod, desc[2..-3], specs2, peso, ean.to_s[2..-3], '', image_urls.join(', '), catmain, #catsubtemp4.to_s[2..-5], '']
	#puts output.join(';')



	#puts "1" + ";" + "1000" + ";" + "Tristar" + ";" + "#{nomeprod}" + ";" "#{codprod}" + ";" + "#{desc}"[2..-3] + ";" "#{specs3}" + ";" + "#{peso}" + ";" + "#{ean}"[2..-3] + ";;" + image_urls.join(', ') + ";" + "#{catmain}" +","+ "#{catsubtemp4}"[2..-5] + ";"
	#end