Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
require 'rubygems'
require 'mechanize'
require 'uri'
require 'csv'
#this script looks up vendors from a seed file and creates a new seed file with the domain
# using the first google search result returned
def get_host_without_www(url)
uri = URI.parse(url)
uri = URI.parse("http://#{url}") if uri.scheme.nil?
host = uri.host.downcase
host.start_with?('www.') ? host[4..-1] : host
end
def get_domain_for_company(name)
agent = Mechanize.new
#agent.set_proxy '78.186.178.153', 8080
page = agent.get('http://www.google.com/')
google_form = page.form('f')
google_form.q = name
page = agent.submit(google_form, google_form.buttons.first)
page.links.each do |link|
if link.href.to_s =~/url.q/
str=link.href.to_s
strList=str.split(%r{=|&})
url=strList[1]
#puts url
#puts get_host_without_www(url)
return get_host_without_www(url)
end
end
end
#puts get_domain_for_company('Reflect Media')
# puts "Seeding #{__method__.to_s}"
#
CSV.open("new_operators_seed.csv", "ab") do |csv|
#puts hashes.first.keys
csv << %w(id name domain)
CSV.foreach('db/operators_seed.csv', headers: true) do |row|
#vendor = GeoPathVendor.create!(name: row['name'], operator_id: row['id'])
# if vendor.name == 'Lamar' || vendor.name = 'Clear Channel'
# generate_vendor_users(vendor)
# end
#
# row['name']
data = row.clone
begin
data['domain'] = get_domain_for_company(row['name'])
rescue
puts "could not get #{row['name']} domain"
data['domain'] = ''
end
csv << [data['id'], data['name'], data['domain']]
end
end
def write_hashes_to_file_panel(hashes, filename)
#attributes_to_scrub = %w(county media_type plant media_type operator panel_segments markets)
#attributes_to_scrub.map { |x| x.to_sym }
CSV.open("#{filename}.csv", "ab", headers: hashes.first.keys) do |csv|
#puts hashes.first.keys
csv << %w(id name domain)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.