Skip to content

Instantly share code, notes, and snippets.

@jenningsanderson
Created February 27, 2014 18:26
Show Gist options
  • Save jenningsanderson/9255819 to your computer and use it in GitHub Desktop.
Save jenningsanderson/9255819 to your computer and use it in GitHub Desktop.
require 'json'
require 'csv'
require 'time'
file_in = '/Users/jenningsanderson/Documents/Boulder_Floods/2013ColoradoFloods.json'
file_out = "/Users/jenningsanderson/Dropbox/SocialComputing/GroupMiniProject-GeoCoded/geo_tagged_counts.csv"
def get_geo_users(in_file)
screen_names = []
ticker=0
File.open( in_file ).each do |line|
#Load the tweet object -- this is computationally expensive, do it once
tweet = JSON.load(line)
if tweet['coordinates']
unless screen_names.include? tweet['user']['screen_name']
screen_names << tweet['user']['screen_name']
end
end
if ticker %10000 == 0 #Logging
puts "Processed #{ticker} tweets, found #{screen_names.size} users"
end
ticker+=1
end
screen_names
end
def write_csv(geo, in_file, write_file)
ticker = 0
#Open the CSV
val = []
mobiles = ['phone','instagram','ipad','ios','android','blackberry','mobile','vine', 'foursquare']
CSV.open(write_file, "w") do |csv|
csv << ['Handle' , 'Total' , 'Geo' , 'Ratio' , 'ChangeTimes', 'MobileCount', 'Mobile/Geo', 'RT', 'InterestLevel']
#Open the file
File.open( in_file ).each do |line|
#Load the tweet object -- this is computationally expensive, do it once
tweet = JSON.load(line)
handle = tweet['user']['screen_name']
#Processing
if geo.keys.include? handle
geo[handle][:total]+=1
#Get the *initial* status for our dataset:
if geo[handle][:total]<2
geo[handle][:changed]=0
if tweet['coordinates']
geo[handle][:on] = 1
else
geo[handle][:on] = 0
end
end
retweet = not((tweet['text'].downcase =~ /^(MT|RT)/i).nil?)
if tweet['coordinates']
geo[handle][:geo]+=1
if retweet:
geo[handle][:TaggedRT]+=1
end
#If here, then we know this particular tweet is geo-tagged.
unless geo[handle][:on] > 0 #Meaning it was already tagged,
geo[handle][:changed]+=1
end
geo[handle][:on] =1
else #Not Geo-tagged
if retweet:
geo[handle][:nonTaggedRT]+=1
end
unless geo[handle][:on] < 1
geo[handle][:changed]+=1
end
geo[handle][:on] =0
end
source = tweet['source'].downcase
mobiles.each do |mobile|
if source.include? mobile
geo[handle][:mobile]+=1
end
end
end
#Logging
ticker+=1
if ticker %10000 == 0 #Logging
puts "Processed #{ticker} tweets"
end
end #close file
#Writing
geo.each { |k,v|
csv << [k, v[:total], v[:geo], v[:geo]/v[:total], v[:changed], v[:mobile], v[:mobile] / v[:geo], v[:RT], v[:geo]/v[:RT]]
}
end #close the csv
end #end function
users = get_geo_users(file_in)
puts "Total number of unique screen_names: #{users.size}, now calculating geo"
#Make the users hash
geo = {}
users.each { |user_name| geo[user_name] ||= Hash.new(0.0)}
write_csv(geo, file_in, file_out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment