Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
basic script to scrape twitter for tweets specific to custom filter
require "twitter"
require "logger"
require "csv"
require "geocoder"
# Need to get:
# all mr_c posts
# if is reply to then who mr_c replied to
# from that who can then work out where
#csv data will be
#mr_c tweet_id, mr_c tweet text, reply_to tweet id,
#reply_to tweet text, reply_to_userid,reply_to_username, reply_lon, lat, place
#get page
file_path = '/root/scripts/mr-correcter/page.tmp'
file = File.open(file_path, "rb")
page_str = file.read
PAGE = Integer(page_str)
options = {
:page => PAGE.to_s,
:include_entities => true,
:include_rts => true,
:exclude_replies => false,
:count => 25
}
puts "Init with page #{PAGE.to_s}"
pages = [1]# roughly 246 tweets so 3 pages of 100, 6 of 50, 12 of 25, etc
tweets = Array.new
pages.each do |i|
#options[:page] = i
temp = Twitter.user_timeline("mr_correcter", options)
tweets = tweets + temp#concat arrays
end
puts "Downladed #{tweets.count} tweets"
puts "Processing..."
CSV.open("tw_#{PAGE.to_s}.csv", "wb", {:col_sep => ","}) do |csv|
csv << ["mr_c-tweet_id", "mr_c-tweet_text", "reply_to-tweet id", "reply_to-username", "reply_to-tweet_text", "reply_long", "reply_lat", "reply_geo_loc"]
tweets.each do |t|
temp = []
#puts "id: #{t.id}"
#puts "text: #{t.text}"
#puts "reply_to id: #{t.in_reply_to_status_id}"
#puts "to_user_id: #{t.in_reply_to_user_id}"
temp.push("#{t.id}")
temp.push("#{t.text}")
temp.push("#{t.in_reply_to_status_id}")
temp.push("#{t.in_reply_to_user_id}")
temp.push("#{t.in_reply_to_screen_name}")
if t.in_reply_to_status_id
begin
tweet = Twitter.status(t.in_reply_to_status_id)
temp.push("#{tweet.text}")
if tweet.place
lon = tweet.place.bounding_box.coordinates[0][0][0]#long
lat = tweet.place.bounding_box.coordinates[0][0][1]#lat
geo_res = Geocoder.search("#{lat},#{lon}").first
temp.push(lon)
temp.push(lat)
temp.push("#{geo_res.city}, #{geo_res.country}")
else
usr_loc = Twitter.user(t.in_reply_to_user_id).location
if usr_loc
usr_loc_geo = Geocoder.search("#{usr_loc}").first
if usr_loc_geo
temp.push(usr_loc_geo.longitude)
temp.push(usr_loc_geo.latitude)
temp.push("#{usr_loc_geo.city}, #{usr_loc_geo.country}")
end
temp.push("#{usr_loc}")
end
end
rescue Exception => e
temp.push("Error: #{e.message}")
puts "Error: #{e.message}"
end
end
csv << temp
end
end
#puts Twitter.status(227376948400230400).place.bounding_box.coordinates
puts "done."
next_page = PAGE+1
File.open(file_path, 'w+') {|f| f.write(next_page.to_s) }
Owner

rob-murray commented Oct 7, 2013

Thats not pretty ;)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment