Skip to content

Instantly share code, notes, and snippets.

@kejadlen
Created July 25, 2012 01:13
Show Gist options
  • Save kejadlen/3173786 to your computer and use it in GitHub Desktop.
Save kejadlen/3173786 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'fastercsv'
require 'tweet'
base_path = File.dirname(__FILE__)
csv_files = Dir["#{base_path}/*.csv"].sort_by do |filename|
DateTime.parse(File.basename(filename, '.csv'))
end
last_update = DateTime.parse(File.basename(csv_files.last, '.csv')) unless csv_files.empty?
tweets = Tweet.new(ARGV.shift)
current_update_time = tweets.current_tweet.last
if last_update.nil? or current_update_time > last_update
FasterCSV.open(File.join(base_path, "#{current_update_time.to_s}.csv"), 'w') do |csv|
while t = tweets.succ
tweet,time = t
break if last_update and time <= last_update
csv << [tweet, time.to_s]
end
end
end
#!/usr/bin/env ruby
require 'fastercsv'
require 'gchart'
require 'tweet'
base_path = File.dirname(__FILE__)
year = 2007
month_data = Array.new(12, 0)
hour_data = Array.new(24, 0)
reply_data = Hash.new(0)
Dir["#{base_path}/*.csv"].each do |filename|
FasterCSV.foreach(filename) do |row|
tweet = row.first
time = DateTime.parse(row.last)
month_data[time.month - 1] += 1 if time.year == year
hour_data[(time.hour-8)%24] += 1 if time.year == year
reply_data[$1] += 1 if tweet =~ /@<a href="\/([^"]+)">\1<\/a>/ and time.year == year
end
end
puts GChart.line(
:title => 'Tweets per Hour',
:data => hour_data,
:width => 400,
:height => 300,
:extras => { 'chxt' => 'x,y', 'chxl' => "0:|#{(0..23).to_a.join('|')}|1:|#{hour_data.min}|#{hour_data.max}" }
).to_url
puts GChart.bar(
:title => 'Tweets per Month',
:data => month_data,
:width => 400,
:height => 300,
:extras => { 'chxt' => 'x,y', 'chxl' => "0:|#{Date::ABBR_MONTHNAMES.compact.join('|')}|1:|#{month_data.min}|#{month_data.max}" },
:orientation => :vertical
).to_url
require 'hpricot'
require 'open-uri'
class Tweet
def initialize(user)
@user_url = "http://twitter.com/#{user}"
@doc = Hpricot(open(@user_url))
@page = 1
@tweets = [current_tweet]
@tweets += page_to_tweets
end
def current_tweet
tweet,time = @doc/'div.desc'/'p'
tweet = tweet.inner_html
time = DateTime.parse(time.at('abbr')['title'])
[tweet, time]
end
def page_to_tweets
(@doc/'div.tab'/'tr.hentry').map do |tweet|
tweet,time = tweet/'span'
tweet = tweet.inner_html.gsub(/^\s*(.*)\s*$/, '\1')
time = DateTime.parse(time.at('abbr')['title'])
[tweet, time]
end
end
def older?
(@doc/'div.tab'/'div.pagination'/'a').last.inner_text =~ /Older/
end
def succ
if @tweets.empty?
return nil unless older?
@page += 1
@doc = Hpricot(open("#{@user_url}?page=#{@page}"))
@tweets = page_to_tweets
end
@tweets.shift
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment