require 'csv' | |
require 'numo/gnuplot' # You must run `gem install numo-gnuplot` | |
KEYWORD = 'にのめ' | |
tweets_file_path = ARGV[0] || 'tweets.csv' | |
csv = CSV.open(tweets_file_path, headers: true) | |
# Convert from CSV to array of Hash. | |
tweets = csv.map {|row| row.to_hash } | |
# Exclude retweets. | |
tweets = tweets.select {|t| t['retweeted_status_id'].empty? } | |
# Convert timestamp to Time object. | |
tweets.each {|t| t['timestamp'] = DateTime.parse(t['timestamp']).to_time } | |
# Select tweets tweeted in this year. | |
this_year_tweets = tweets.select {|t| t['timestamp'] >= Time.new(2016, 1, 1, 0, 0, 0) } | |
# Group tweets by same date. | |
this_year_tweets_by_date = this_year_tweets.group_by {|t| t['timestamp'].to_date } | |
# Count tweets | |
all_tweet_counts = this_year_tweets_by_date.map {|date, ts| [date, ts.size] }.to_h | |
ninome_tweet_counts = this_year_tweets_by_date.map {|date, ts| [date, ts.count {|t| t['text'].include?(KEYWORD) }] }.to_h | |
puts "All tweets: ##{all_tweet_counts.values.reduce(:+)}" | |
puts "Tweets including \"#{KEYWORD}\": ##{ninome_tweet_counts.values.reduce(:+)}" | |
x = ninome_tweet_counts.keys.map {|date| date.to_s } | |
y = ninome_tweet_counts.values | |
Numo.gnuplot do | |
set 'datafile separator " "' | |
set 'xdata time' | |
set 'timefmt "%Y-%m-%d"' | |
set 'xrange ["2016-01-01":"2016-12-31"]' | |
set "yrange [0:#{y.max + 1}]" | |
set 'format x "%m/%d"' | |
unset 'key' | |
unless ENV['DEBUG'] | |
set 'terminal png size 1280,320' | |
set 'output "tweet_counts.png"' | |
end | |
plot x, y, using: '1:2', with: 'impulses', lw: 2 | |
gets if ENV['DEBUG'] | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment