rob-murray/mr-c-twitter-data.rb

## mr-c-twitter-data.rb
require "twitter"
require "logger"
require "csv"
require "geocoder"


# Need to get:
# all mr_c posts
# if is reply to then who mr_c replied to
# from that who can then work out where


#csv data will be
#mr_c tweet_id, mr_c tweet text, reply_to tweet id,
#reply_to tweet text, reply_to_userid,reply_to_username, reply_lon, lat, place

#get page
file_path = '/root/scripts/mr-correcter/page.tmp'
file = File.open(file_path, "rb")

page_str = file.read
PAGE = Integer(page_str)
options = {
  :page => PAGE.to_s,
  :include_entities => true,
  :include_rts => true,
  :exclude_replies => false,
  :count => 25
}

puts "Init with page #{PAGE.to_s}"

pages = [1]# roughly 246 tweets so 3 pages of 100, 6 of 50, 12 of 25, etc
tweets = Array.new


pages.each do |i|

  #options[:page] = i
  temp = Twitter.user_timeline("mr_correcter", options)
  tweets = tweets + temp#concat arrays

end

puts "Downladed #{tweets.count} tweets"
puts "Processing..."


CSV.open("tw_#{PAGE.to_s}.csv", "wb", {:col_sep => ","}) do |csv|
  csv << ["mr_c-tweet_id", "mr_c-tweet_text", "reply_to-tweet id", "reply_to-username", "reply_to-tweet_text", "reply_long", "reply_lat", "reply_geo_loc"]
  tweets.each do |t|
    temp = []
    #puts "id: #{t.id}"
    #puts "text: #{t.text}"
    #puts "reply_to id: #{t.in_reply_to_status_id}"
    #puts "to_user_id: #{t.in_reply_to_user_id}"
    temp.push("#{t.id}")
    temp.push("#{t.text}")
    temp.push("#{t.in_reply_to_status_id}")
    temp.push("#{t.in_reply_to_user_id}")
    temp.push("#{t.in_reply_to_screen_name}")

    if t.in_reply_to_status_id

      begin
        tweet = Twitter.status(t.in_reply_to_status_id)
        temp.push("#{tweet.text}")

        if tweet.place
          lon = tweet.place.bounding_box.coordinates[0][0][0]#long
          lat = tweet.place.bounding_box.coordinates[0][0][1]#lat

          geo_res = Geocoder.search("#{lat},#{lon}").first

          temp.push(lon)
          temp.push(lat)

          temp.push("#{geo_res.city}, #{geo_res.country}")

        else
          usr_loc = Twitter.user(t.in_reply_to_user_id).location

          if usr_loc
            usr_loc_geo = Geocoder.search("#{usr_loc}").first

            if usr_loc_geo
              temp.push(usr_loc_geo.longitude)
              temp.push(usr_loc_geo.latitude)
              temp.push("#{usr_loc_geo.city}, #{usr_loc_geo.country}")
            end

            temp.push("#{usr_loc}")

          end

        end
      rescue Exception => e
        temp.push("Error: #{e.message}")
        puts "Error: #{e.message}"
      end

    end

    csv << temp

  end
end
#puts Twitter.status(227376948400230400).place.bounding_box.coordinates

puts "done."
next_page = PAGE+1
File.open(file_path, 'w+') {|f| f.write(next_page.to_s) }
	require "twitter"
	require "logger"
	require "csv"
	require "geocoder"


	# Need to get:
	# all mr_c posts
	# if is reply to then who mr_c replied to
	# from that who can then work out where


	#csv data will be
	#mr_c tweet_id, mr_c tweet text, reply_to tweet id,
	#reply_to tweet text, reply_to_userid,reply_to_username, reply_lon, lat, place

	#get page
	file_path = '/root/scripts/mr-correcter/page.tmp'
	file = File.open(file_path, "rb")

	page_str = file.read
	PAGE = Integer(page_str)
	options = {
	:page => PAGE.to_s,
	:include_entities => true,
	:include_rts => true,
	:exclude_replies => false,
	:count => 25
	}

	puts "Init with page #{PAGE.to_s}"

	pages = [1]# roughly 246 tweets so 3 pages of 100, 6 of 50, 12 of 25, etc
	tweets = Array.new



	pages.each do \|i\|

	#options[:page] = i
	temp = Twitter.user_timeline("mr_correcter", options)
	tweets = tweets + temp#concat arrays

	end

	puts "Downladed #{tweets.count} tweets"
	puts "Processing..."


	CSV.open("tw_#{PAGE.to_s}.csv", "wb", {:col_sep => ","}) do \|csv\|
	csv << ["mr_c-tweet_id", "mr_c-tweet_text", "reply_to-tweet id", "reply_to-username", "reply_to-tweet_text", "reply_long", "reply_lat", "reply_geo_loc"]
	tweets.each do \|t\|
	temp = []
	#puts "id: #{t.id}"
	#puts "text: #{t.text}"
	#puts "reply_to id: #{t.in_reply_to_status_id}"
	#puts "to_user_id: #{t.in_reply_to_user_id}"
	temp.push("#{t.id}")
	temp.push("#{t.text}")
	temp.push("#{t.in_reply_to_status_id}")
	temp.push("#{t.in_reply_to_user_id}")
	temp.push("#{t.in_reply_to_screen_name}")

	if t.in_reply_to_status_id

	begin
	tweet = Twitter.status(t.in_reply_to_status_id)
	temp.push("#{tweet.text}")

	if tweet.place
	lon = tweet.place.bounding_box.coordinates[0][0][0]#long
	lat = tweet.place.bounding_box.coordinates[0][0][1]#lat

	geo_res = Geocoder.search("#{lat},#{lon}").first

	temp.push(lon)
	temp.push(lat)

	temp.push("#{geo_res.city}, #{geo_res.country}")

	else
	usr_loc = Twitter.user(t.in_reply_to_user_id).location

	if usr_loc
	usr_loc_geo = Geocoder.search("#{usr_loc}").first

	if usr_loc_geo
	temp.push(usr_loc_geo.longitude)
	temp.push(usr_loc_geo.latitude)
	temp.push("#{usr_loc_geo.city}, #{usr_loc_geo.country}")
	end

	temp.push("#{usr_loc}")

	end

	end
	rescue Exception => e
	temp.push("Error: #{e.message}")
	puts "Error: #{e.message}"
	end

	end

	csv << temp

	end
	end
	#puts Twitter.status(227376948400230400).place.bounding_box.coordinates

	puts "done."
	next_page = PAGE+1
	File.open(file_path, 'w+') {\|f\| f.write(next_page.to_s) }