Skip to content

Instantly share code, notes, and snippets.

@nerdoncall
Created August 24, 2015 14:58
Show Gist options
  • Save nerdoncall/0aaffb6f0076b1d0c630 to your computer and use it in GitHub Desktop.
Save nerdoncall/0aaffb6f0076b1d0c630 to your computer and use it in GitHub Desktop.
Trying to scrape our website and create a csv file. Noobie here. Getting error on the copying of the data from the hash to the csv. I dont know what I have screwed up. Please help.
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'mechanize'
require 'csv'
BASEURL = "http://www.pbm-erson.com"
BASEURL1 = "http://www.pbm-erson.com/Catalog/PBM/Camshafts_PBM/Performance_CPBM?&sortby=2&pageSize=5&page="
HSH = Hash.new
RESULTS = []
BRY = Array.new #attribute name array from products
LAST_PAGE_NUMBER = 2 #last page to stop on
ARR1 = []
$bsv
#go through and get links for each product page
CSV.open("results-test.csv","w") do |csv|
#$bsv = csv
#def csvwrite
# csv << $bsv
#end
def csvheader (bry)
#csv << bry
puts bry
end
def find_link
puts " Finding Links"
item_info_css = ".item-info"
product_item_row = ".item-row"
product_num = "span.item-num"
for current_page_number in 1..LAST_PAGE_NUMBER
url = "#{BASEURL1}#{current_page_number}"
puts url
doc = Nokogiri::HTML(open(url))
products = doc.css(item_info_css)
products.each do |product|
product_nodes = product.css(product_num)
links = product.css('a')
linkref = links[0]["href"]
raw_results = BASEURL + linkref
RESULTS << raw_results
sleep 1
end
end
puts RESULTS#finding links
end #find_link
def build_att_name_array
BRY.push("ProductID:")
BRY.push("Name:")
BRY.push("Short Description:")
BRY.push("HTML Content:")#building attribute name list
puts "Building Table Headers"
RESULTS.each do |link|
url = link
doc = Nokogiri::HTML(open(url))
doc.css(".attribute-list-item").each do |attrib|
att = attrib.at_css("span.attrib-name").text
BRY << att
end
sleep 1
end
end #build_att_name_array
#need to verify the lining up of the code from here down
def find_att_value
puts "Matching attributes to attribute name"
hry = BRY.uniq
RESULTS.each do |link|
url = link
doc = Nokogiri::HTML(open(url))
erp_num = doc.at_css("span.erp-num").text.strip #our part number from Sxe
short_desc = doc.at_css(".pd-header").text.strip # short description
long_desc = doc.at_css(".product-cm").text.strip #html content
id = doc.at_css("input#CurrentProductId")['value'] #ID number in console
hry.each do |ab|
if ab == "ProductID:"
HSH[ab] = id
elsif ab == "Name:"
HSH[ab] = erp_num
elsif ab == "Short Description:"
HSH[ab] = short_desc
elsif ab == "HTML Content:"
HSH[ab] = long_desc
else
begin
els = doc.search"[text()*='#{ab}']"
el = els.first
eln = el.next_element.text.strip
HSH[ab] = eln
rescue
HSH[ab] = ""
end #begin
end #if
puts "-------------------------"
puts HSH
puts "-------------------------"
HSH.each_value do |val|
ARR1 << val
end #HSH
csv << ARR1 # ERROR - undefined local variable or method `csv'
ARR1.clear
HSH.clear
end #hry
end#RESULTS
end #find_att_value
find_link
build_att_name_array
bry = BRY.uniq
csv << bry
csvheader(bry)
find_att_value
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment