Skip to content

Instantly share code, notes, and snippets.

@statianzo
Forked from m0tive/gist:1884821
Created February 22, 2012 18:07
Show Gist options
  • Save statianzo/1886365 to your computer and use it in GitHub Desktop.
Save statianzo/1886365 to your computer and use it in GitHub Desktop.
export google history
require 'mechanize'
require 'logger'
require 'date'
## Call:
## $ ruby export.rb <gmail> <password>
caFile = File.join(File.dirname(__FILE__), 'cacert.pem')
if not File.exists? caFile
require 'net/http'
puts 'Getting cacert.pem'
Net::HTTP.start('curl.haxx.se') do |http|
resp = http.get '/ca/cacert.pem'
open(caFile, 'w') { |f| f.write resp.body }
end
end
agent = Mechanize.new do |a|
a.user_agent_alias = 'Linux Mozilla'
a.log = Logger.new(STDOUT)
a.log.level = Logger::WARN
a.ca_file = caFile
end
puts 'Logging in...'
login_page = agent.get('http://www.google.com/history')
success_page = login_page.form_with(:action => 'https://accounts.google.com/ServiceLoginAuth') do |f|
f.Email = ARGV[0]
f.Passwd = ARGV[1]
end.click_button
puts 'Getting history...'
feedURL = 'http://www.google.com/history/lookup?output=rss&num=9999'
prevDate = nil
d = nil
loop do
rss = agent.get(feedURL)
rssXml = Nokogiri::XML(rss.body)
pubDates = rssXml.xpath('//pubDate')
d = DateTime.parse(pubDates.last.inner_text)
# end if the last date
break if prevDate == d
prevDate = d
firstDate = DateTime.parse(pubDates.first.inner_text)
fileName = "#{firstDate.year}#{'%02d' % firstDate.month}#{'%02d' % firstDate.day}.xml"
rss.save(fileName)
puts 'Saved ' + fileName
feedURL = "http://www.google.com/history/lookup?output=rss&num=9999&month=#{d.month}&day=#{d.day}&yr=#{d.year}"
end
puts 'Last item: ' + d.to_s
puts 'Done'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment