Skip to content

Instantly share code, notes, and snippets.

@topperge
Created May 10, 2012 05:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save topperge/2651266 to your computer and use it in GitHub Desktop.
Save topperge/2651266 to your computer and use it in GitHub Desktop.
Quick and Dirty Script to Grab all the Breweries and Beers for Savor and match them to Beer Advocate Profiles
require 'nokogiri'
require 'open-uri'
doc = Nokogiri::HTML(open("http://www.savorcraftbeer.com/beer-food/savor-breweries/"))
@beerUrls = doc.xpath('//div[@class="entry"]/p/a[@href]').map { |link| link['href'] }
puts 'Title,Brewery,Table,Style,Food,Beer Advocate Url,Rating,ABV,IBU'
@beerUrls.each do |beerUrl|
beerTitle = ''
beerBrewery = ''
beerTable = ''
beerStyle = ''
beerFood = ''
getTheBeerBeerAdvocateUrl = ''
beerAdvocateUrl = ''
beerRating = ''
beerABV = ''
beerIBU = ''
beer = Nokogiri::HTML(open(beerUrl))
beerTitle = beer.xpath('//article/h1').text
tags = beer.xpath("//span[@class='tags']/a")
if tags.length == 3
beerBrewery = tags[0].text.gsub('LLC','').gsub('Co.','').gsub('Beer','').gsub('Brewing','').gsub('Company','').gsub('Brewery','')
beerTable = tags[1].text.split[2]
beerStyle = tags[2].text
elsif tags.length == 2
beerBrewery = tags[0].text.gsub('LLC','').gsub('Co.','').gsub('Beer','').gsub('Brewing','').gsub('Company','').gsub('Brewery','')
beerTable = 'Supporters Circle'
beerStyle = tags[1].text
elsif tags.length == 1
beerBrewery = tags[0].text.gsub('LLC','').gsub('Co.','').gsub('Beer','').gsub('Brewing','').gsub('Company','').gsub('Brewery','')
end
beerFood = beer.xpath("//div/p[@class='pairings']").text
#Get Beer Advocate Rating
beerAdvocateUrl = URI.escape('http://beeradvocate.com/search?qt=beer&q='+beerTitle+' '+beerBrewery)
beerAdvocate = Nokogiri::HTML(open(beerAdvocateUrl))
@beerAdvocateList = beerAdvocate.xpath("//td[@id='mainContent']/div/ul/li/a[@href]").map { |link| link['href'] }
if @beerAdvocateList.length > 0
getTheBeerBeerAdvocateUrl = 'http://beeradvocate.com' + @beerAdvocateList[0]
getTheBeerBeerAdvocate = Nokogiri::HTML(open(getTheBeerBeerAdvocateUrl))
beerRating = getTheBeerBeerAdvocate.xpath("//span[@class='BAscore_big']").first.text if !getTheBeerBeerAdvocate.xpath("//span[@class='BAscore_big']").first.nil?
firstABVPosition = getTheBeerBeerAdvocate.xpath("//table/tr/td/table/tr[2]/td").text.index('ABV')
secondABVPosition = getTheBeerBeerAdvocate.xpath("//table/tr/td/table/tr[2]/td").text.index('ABV', firstABVPosition+1) if !firstABVPosition.nil?
percentABVPosition = getTheBeerBeerAdvocate.xpath("//table/tr/td/table/tr[2]/td").text.index('%', firstABVPosition) if !firstABVPosition.nil?
lastSpacePosition = getTheBeerBeerAdvocate.xpath("//table/tr/td/table/tr[2]/td").text.rindex(' ', percentABVPosition) if !firstABVPosition.nil? & !percentABVPosition.nil?
beerABV = getTheBeerBeerAdvocate.xpath("//table/tr/td/table/tr[2]/td").text.slice(lastSpacePosition, percentABVPosition - lastSpacePosition + 1) if !firstABVPosition.nil? & !percentABVPosition.nil? & !lastSpacePosition.nil?
firstIBUPosition = getTheBeerBeerAdvocate.xpath("//table/tr/td/table/tr[2]/td").text.index('IBU')
spaceBeforeIBUPosition = getTheBeerBeerAdvocate.xpath("//table/tr/td/table/tr[2]/td").text.rindex("\r", firstIBUPosition) if !firstIBUPosition.nil?
spaceBeforeIBUPosition = getTheBeerBeerAdvocate.xpath("//table/tr/td/table/tr[2]/td").text.rindex(" ", firstIBUPosition - 1) if spaceBeforeIBUPosition.nil?
beerIBU = getTheBeerBeerAdvocate.xpath("//table/tr/td/table/tr[2]/td").text.slice(spaceBeforeIBUPosition+1, 2) if !firstIBUPosition.nil? & !spaceBeforeIBUPosition.nil?
end
beerTitle = '' if beerTitle.nil?
beerBrewery = '' if beerBrewery.nil?
beerTable = '' if beerTable.nil?
beerStyle = '' if beerStyle.nil?
beerFood = '' if beerFood.nil?
getTheBeerBeerAdvocateUrl = '' if getTheBeerBeerAdvocateUrl.nil?
beerAdvocateUrl = '' if beerAdvocateUrl.nil?
beerRating = '' if beerRating.nil?
beerABV = '' if beerABV.nil?
beerIBU = '' if beerIBU.nil?
puts beerBrewery.strip+','+beerTitle.strip+','+beerTable.strip+','+beerStyle.strip+','+beerFood.strip+','+getTheBeerBeerAdvocateUrl.strip+','+beerRating.strip+','+beerABV.strip+','+beerIBU.strip
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment