Skip to content

Instantly share code, notes, and snippets.

@vaiorabbit
Created May 30, 2011 14:28
Show Gist options
  • Save vaiorabbit/998972 to your computer and use it in GitHub Desktop.
Save vaiorabbit/998972 to your computer and use it in GitHub Desktop.
Collects tweets posted by specified user.
# -*- coding: utf-8-unix -*-
#
# Tested on :
# - Ruby 1.9.2-p180 (2011-02-18 revision 30909) [x86_64-darwin10.7.0]
#
# Usage : $ ruby TwitterBackup.rb [user name]
#
require 'date'
require 'net/http'
require 'rexml/document'
class TwitterStatuses
NSTATUS_PER_REQ = 20
NSTATUS_MAXIMUM = 3200
SLEEP_SECOND = 30
attr_reader :statuses_count
def initialize( screen_name )
@screen_name = screen_name
@statuses_count = nil
end
def update_statuses_count
uri = "http://api.twitter.com/1/users/show.xml?screen_name=#{@screen_name}"
resp = nil
begin
resp = Net::HTTP.get_response( URI.parse(uri) )
rescue
raise RuntimeError, "Connection error."
end
users_show_xmldoc = REXML::Document.new( resp.body )
@statuses_count = users_show_xmldoc.root.elements["statuses_count"].text.to_i
@statuses_count = NSTATUS_MAXIMUM if @statuses_count > NSTATUS_MAXIMUM
end
def get_user_timeline( page )
uri = "http://api.twitter.com/1/statuses/user_timeline.xml?screen_name=#{@screen_name}&include_rts=1&include_entities=1&page=#{page}"
resp = nil
begin
resp = Net::HTTP.get_response( URI.parse(uri) )
rescue
raise RuntimeError, "Connection error."
end
return REXML::Document.new( resp.body )
end
def fetch_statuses( pages_fetch = nil )
backup_filename = "TwitterBackup_#{@screen_name}-[#{Time.now.to_datetime}].xml"
backup_xmldoc = REXML::Document.new( "<?xml version='1.0' encoding='UTF-8'?>" )
statuses_aggregate = backup_xmldoc.add_element( 'statuses', {'type' => 'array'} )
partial_xmldoc = nil
if ( pages_fetch != nil )
pages = pages_fetch
else
pages = @statuses_count / NSTATUS_PER_REQ
pages += @statuses_count & NSTATUS_PER_REQ != 0 ? 1 : 0
end
begin
pages.times do |page|
partial_xmldoc = get_user_timeline( page + 1 )
status_count = partial_xmldoc.root.elements.size
statuses_aggregate.add_element( partial_xmldoc.root )
print( "Fetched page# #{page+1}/#{pages} (status count=#{status_count}). Waiting #{SLEEP_SECOND} seconds.\n" )
sleep( SLEEP_SECOND ) if page + 1 != pages
end
rescue => e
p e
ensure
backup_xmldoc.write( File.new(backup_filename, 'w') )
end
end
end
if __FILE__ == $0
ts = TwitterStatuses.new( ARGV[0] )
ts.update_statuses_count
ts.fetch_statuses
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment