Created
May 30, 2011 14:28
-
-
Save vaiorabbit/998972 to your computer and use it in GitHub Desktop.
Collects tweets posted by specified user.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8-unix -*- | |
# | |
# Tested on : | |
# - Ruby 1.9.2-p180 (2011-02-18 revision 30909) [x86_64-darwin10.7.0] | |
# | |
# Usage : $ ruby TwitterBackup.rb [user name] | |
# | |
require 'date' | |
require 'net/http' | |
require 'rexml/document' | |
class TwitterStatuses | |
NSTATUS_PER_REQ = 20 | |
NSTATUS_MAXIMUM = 3200 | |
SLEEP_SECOND = 30 | |
attr_reader :statuses_count | |
def initialize( screen_name ) | |
@screen_name = screen_name | |
@statuses_count = nil | |
end | |
def update_statuses_count | |
uri = "http://api.twitter.com/1/users/show.xml?screen_name=#{@screen_name}" | |
resp = nil | |
begin | |
resp = Net::HTTP.get_response( URI.parse(uri) ) | |
rescue | |
raise RuntimeError, "Connection error." | |
end | |
users_show_xmldoc = REXML::Document.new( resp.body ) | |
@statuses_count = users_show_xmldoc.root.elements["statuses_count"].text.to_i | |
@statuses_count = NSTATUS_MAXIMUM if @statuses_count > NSTATUS_MAXIMUM | |
end | |
def get_user_timeline( page ) | |
uri = "http://api.twitter.com/1/statuses/user_timeline.xml?screen_name=#{@screen_name}&include_rts=1&include_entities=1&page=#{page}" | |
resp = nil | |
begin | |
resp = Net::HTTP.get_response( URI.parse(uri) ) | |
rescue | |
raise RuntimeError, "Connection error." | |
end | |
return REXML::Document.new( resp.body ) | |
end | |
def fetch_statuses( pages_fetch = nil ) | |
backup_filename = "TwitterBackup_#{@screen_name}-[#{Time.now.to_datetime}].xml" | |
backup_xmldoc = REXML::Document.new( "<?xml version='1.0' encoding='UTF-8'?>" ) | |
statuses_aggregate = backup_xmldoc.add_element( 'statuses', {'type' => 'array'} ) | |
partial_xmldoc = nil | |
if ( pages_fetch != nil ) | |
pages = pages_fetch | |
else | |
pages = @statuses_count / NSTATUS_PER_REQ | |
pages += @statuses_count & NSTATUS_PER_REQ != 0 ? 1 : 0 | |
end | |
begin | |
pages.times do |page| | |
partial_xmldoc = get_user_timeline( page + 1 ) | |
status_count = partial_xmldoc.root.elements.size | |
statuses_aggregate.add_element( partial_xmldoc.root ) | |
print( "Fetched page# #{page+1}/#{pages} (status count=#{status_count}). Waiting #{SLEEP_SECOND} seconds.\n" ) | |
sleep( SLEEP_SECOND ) if page + 1 != pages | |
end | |
rescue => e | |
p e | |
ensure | |
backup_xmldoc.write( File.new(backup_filename, 'w') ) | |
end | |
end | |
end | |
if __FILE__ == $0 | |
ts = TwitterStatuses.new( ARGV[0] ) | |
ts.update_statuses_count | |
ts.fetch_statuses | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment