Skip to content

Instantly share code, notes, and snippets.

@rob-murray
Created September 17, 2012 18:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rob-murray/3739021 to your computer and use it in GitHub Desktop.
Save rob-murray/3739021 to your computer and use it in GitHub Desktop.
basic script to scrape twitter for tweets specific to custom filter
require "twitter"
require "logger"
require "csv"
require "geocoder"
# Need to get:
# all mr_c posts
# if is reply to then who mr_c replied to
# from that who can then work out where
#csv data will be
#mr_c tweet_id, mr_c tweet text, reply_to tweet id,
#reply_to tweet text, reply_to_userid,reply_to_username, reply_lon, lat, place
#get page
file_path = '/root/scripts/mr-correcter/page.tmp'
file = File.open(file_path, "rb")
page_str = file.read
PAGE = Integer(page_str)
options = {
:page => PAGE.to_s,
:include_entities => true,
:include_rts => true,
:exclude_replies => false,
:count => 25
}
puts "Init with page #{PAGE.to_s}"
pages = [1]# roughly 246 tweets so 3 pages of 100, 6 of 50, 12 of 25, etc
tweets = Array.new
pages.each do |i|
#options[:page] = i
temp = Twitter.user_timeline("mr_correcter", options)
tweets = tweets + temp#concat arrays
end
puts "Downladed #{tweets.count} tweets"
puts "Processing..."
CSV.open("tw_#{PAGE.to_s}.csv", "wb", {:col_sep => ","}) do |csv|
csv << ["mr_c-tweet_id", "mr_c-tweet_text", "reply_to-tweet id", "reply_to-username", "reply_to-tweet_text", "reply_long", "reply_lat", "reply_geo_loc"]
tweets.each do |t|
temp = []
#puts "id: #{t.id}"
#puts "text: #{t.text}"
#puts "reply_to id: #{t.in_reply_to_status_id}"
#puts "to_user_id: #{t.in_reply_to_user_id}"
temp.push("#{t.id}")
temp.push("#{t.text}")
temp.push("#{t.in_reply_to_status_id}")
temp.push("#{t.in_reply_to_user_id}")
temp.push("#{t.in_reply_to_screen_name}")
if t.in_reply_to_status_id
begin
tweet = Twitter.status(t.in_reply_to_status_id)
temp.push("#{tweet.text}")
if tweet.place
lon = tweet.place.bounding_box.coordinates[0][0][0]#long
lat = tweet.place.bounding_box.coordinates[0][0][1]#lat
geo_res = Geocoder.search("#{lat},#{lon}").first
temp.push(lon)
temp.push(lat)
temp.push("#{geo_res.city}, #{geo_res.country}")
else
usr_loc = Twitter.user(t.in_reply_to_user_id).location
if usr_loc
usr_loc_geo = Geocoder.search("#{usr_loc}").first
if usr_loc_geo
temp.push(usr_loc_geo.longitude)
temp.push(usr_loc_geo.latitude)
temp.push("#{usr_loc_geo.city}, #{usr_loc_geo.country}")
end
temp.push("#{usr_loc}")
end
end
rescue Exception => e
temp.push("Error: #{e.message}")
puts "Error: #{e.message}"
end
end
csv << temp
end
end
#puts Twitter.status(227376948400230400).place.bounding_box.coordinates
puts "done."
next_page = PAGE+1
File.open(file_path, 'w+') {|f| f.write(next_page.to_s) }
@rob-murray
Copy link
Author

Thats not pretty ;)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment