Skip to content

Instantly share code, notes, and snippets.

@joshcrews
Created February 21, 2015 13:09
Show Gist options
  • Save joshcrews/42b9dd7779dfee965cc2 to your computer and use it in GitHub Desktop.
Save joshcrews/42b9dd7779dfee965cc2 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'mechanize'
require 'csv'
charities = []
('A'..'Z').each do |letter|
puts "\nstarting #{letter}"
url = "http://www.ecfa.org/MemberSearch.aspx?FirstLetter=#{letter}"
agent = Mechanize.new { |agent|
agent.user_agent_alias = 'Mac Safari'
}
page = agent.get(url)
page.search('#BaseContent_Content_GridViewData tr').each do |row|
next if row.css('td').first.nil?
print "."
begin
charity = {
name: row.css('td').first.text.strip,
city: row.css('td')[2].text.strip,
state: row.css('td')[3].text.strip,
detail_path: row.css('td')[4].css('a').attr('href').text.strip,
}
charity_page = agent.get(charity[:detail_path])
charity[:detail_url] = "http://www.ecfa.org/#{charity[:detail_path]}"
charity[:cash_donations] = charity_page.search('#BaseContent_Content_lblCashDonations').text.strip.gsub(',', '')
charity[:noncash_donations] = charity_page.search('#BaseContent_Content_lblNonCashDonations').text.strip.gsub(',', '')
charity[:other_revenue] = charity_page.search('#BaseContent_Content_lblOtherRevenue').text.strip.gsub(',', '')
charity[:net_assets] = charity_page.search('#BaseContent_Content_lblNetAssets').text.strip.gsub(',', '')
charity[:contact_phone] = charity_page.search('#BaseContent_Content_lblContactInfoPhone').text.strip
charity[:website] = charity_page.search('#BaseContent_Content_lblContactInfoWebsite').text.strip
charity[:executive_director] = charity_page.search('#BaseContent_Content_lblContact').text.strip
charity[:ministry_types] = charity_page.search('#BaseContent_Content_lstMinistryTypes td').map(&:text).map(&:strip).select{|t| !t.empty?}.join(', ')
charity[:ministry_sectors] = charity_page.search('#BaseContent_Content_lstMinistrySectors td').map(&:text).map(&:strip).select{|t| !t.empty?}.join(', ')
charities << charity
#
# optional sleep so as to go easy on their servers
#
sleep 2
rescue
puts "fail on #{row.text}"
end
end
end
row_headers = [
:name,
:city,
:state,
:detail_url,
:cash_donations,
:noncash_donations,
:other_revenue,
:net_assets,
:contact_phone,
:website,
:executive_director,
:ministry_types,
:ministry_sectors,
]
CSV.open("output.csv", "wb") do |csv|
csv << row_headers.map(&:to_s)
charities.each do |charity|
csv << row_headers.map{|h| charity[h]}
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment