-
-
Save phoozle/3458771 to your computer and use it in GitHub Desktop.
Find twitter and facebook accounts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'mechanize' | |
require 'csv' | |
require 'fileutils' | |
google_allowing_us = true | |
SOURCE_CSV = ARGV[0] || "./wineries.csv" | |
def get_facebook(winery) | |
facebook = {:facebook_title => nil, :facebook_link => nil} | |
agent = Mechanize.new | |
agent.get("http://www.google.com.au/search?q=site:facebook.com+#{winery}") | |
if link = agent.page.link_with(:text => /\| Facebook/) | |
puts "Found #{winery} Facebook!" | |
facebook[:facebook_title] = link.text # Title | |
facebook[:facebook_link] = (link.uri.to_s.match(/url\?q=([^\&]+)/) || [])[1] # Link | |
end | |
return facebook | |
end | |
def get_twitter(winery) | |
twitter = {:twitter_title => "Missing", :twitter_link => "Missing"} | |
agent = Mechanize.new | |
agent.get("http://www.google.com.au/search?q=site:twitter.com+#{winery}") | |
if link = agent.page.link_with(:text => /on Twitter$/) | |
puts "Found #{winery} Twitter!" | |
twitter[:twitter_title] = link.text # Title | |
twitter[:twitter_link] = (link.uri.to_s.match(/url\?q=([^\&]+)/) || [])[1] # Link | |
end | |
return twitter | |
end | |
output_csv = CSV.open("./wineries_temp.csv", "w") | |
output_csv << ["Winery Name", "Twitter Title", "Twitter Link", "Facebook Title", "Facebook Link", "Facebook Incorrect", "Twitter Incorrect"] | |
begin | |
count = 0 | |
CSV.foreach(SOURCE_CSV, "r+") do |row| | |
unless count == 0 # Ignore first row (headers). Need to find a better solution to this! (FasterCSV) | |
winery_name = row.first | |
winery = {:twitter_title => row[1], :twitter_link => row[2], :facebook_title => row[3], :facebook_link => row[4]} | |
begin | |
# If row doesn't have data and we aren't blocked by Google | |
winery.merge! get_facebook(winery_name) if row[3] == nil && google_allowing_us | |
winery.merge! get_twitter(winery_name) if row[1] == nil && google_allowing_us | |
rescue | |
google_allowing_us = false | |
puts "Google has blocked us :( \nSkipping Google Searches are finishing..." | |
end | |
output_csv << [winery_name, winery[:twitter_title], winery[:twitter_link], winery[:facebook_title], winery[:facebook_link], row[5], row[6]] | |
end | |
count += 1 | |
end | |
rescue Interrupt | |
puts "\n> Aborting, no changes have been made..." | |
output_csv.close | |
FileUtils.rm("./wineries_temp.csv") | |
exit(0) | |
end | |
output_csv.close | |
FileUtils.mv("./wineries_temp.csv", "./wineries.csv") | |
puts "> Output saved as wineries.csv" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment