Skip to content

Instantly share code, notes, and snippets.

@adrianshort
Created March 19, 2012 15:46
Show Gist options
  • Save adrianshort/2116789 to your computer and use it in GitHub Desktop.
Save adrianshort/2116789 to your computer and use it in GitHub Desktop.
Download Twitter searches to TSV
#!/usr/bin/ruby
require 'rubygems'
require 'json'
require 'httpclient'
require 'uri'
require 'time'
def backup(query, since_id = nil)
uri = URI::parse('http://search.twitter.com')
client = HTTPClient.new
@tweets = 0
@page = 1
@next_page = "?q=#{query}&rpp=100&page=#{@page}&result_type=recent"
unless since_id.nil?
@next_page += "&since_id=#{since_id}"
end
loop do
$stderr.puts "Trying page #{@page}"
url = "#{uri}/search.json#{@next_page}"
$stderr.puts url
response = client.get(url)
@json = JSON.parse(response.body)
if response.status_code == 200
$stderr.puts "Got page #{@page} OK"
@count = @json['results'].size
@next_page = @json['next_page']
@json['results'].each do |tweet|
bits = []
bits << Time.parse(tweet['created_at']).to_s
bits << tweet['id_str']
bits << tweet['from_user']
bits << tweet['text'].gsub(/\n/, ' ').gsub(/\t/, ' ')
bits << tweet['source']
bits << tweet['from_user_id_str']
bits << tweet['to_user']
bits << tweet['to_user_id_str']
puts bits.join "\t"
end
$stderr.puts "#{@count} tweets processed"
@tweets += @count
@page += 1
else
# Some kind of error
$stderr.puts "HTTP #{response.status_code} status code"
@json['errors'].each do |error|
$stderr.puts "ERROR code #{error['code']}: #{error['message']}"
end
if response.status_code == 403
break
end
end
sleep(10)
if @count == 0
break
end
end
$stderr.puts "#{@tweets} tweets collected"
end
backup(ARGV.shift, ARGV.shift)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment