public
Created

Plot a time series of a hashtag where height is tweet count and color is unique user count.

  • Download Gist
plotHashtag2.R
R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
 
# @author: Bommarito Consulting, LLC; http://michaelbommarito.com/
# @date: May 21, 2012
# @email: michael@bommaritollc.com
# @packages: ggplot2, plyr
 
# Clear and import.
rm(list=ls())
library(ggplot2)
library(plyr)
 
# Controlling parameters.
hashtag <- "#nonato" # Hashtag for label purposes
cutoff <- as.POSIXct("2012-01-11 00:00:00", tz="EDT") # First timestamp we will consider
dt <- 30 # \Delta t, minutes
 
# Load and pre-process tweets
tweets <- unique(read.table('data/tweets.csv', sep="\t", quote="", comment.char="",
stringsAsFactors=FALSE, header=FALSE, nrows=300000))
names(tweets) <- c("id", "date", "user", "text")
tweets$date <- as.POSIXct(strptime(tweets$date, "%a, %d %b %Y %H:%M:%S %z", tz = "GMT"))
tweets <- tweets[which(tweets$date > cutoff), ]
 
# Build date breaks
minDate <- min(tweets$date)
maxDate <- max(tweets$date) + 60 * dt
dateBreaks <- seq(minDate, maxDate, by=60 * dt)
 
# Use hist to count the number of tweets per bin; don't plot.
tweetCount <- hist(tweets$date, breaks=dateBreaks, plot=FALSE)
 
# Strip out the left endpoint of each bin.
binBreaks <- tweetCount$breaks[1:length(tweetCount$breaks)-1]
 
# Count number of unique tweeters per bin.
userCount <- sapply(binBreaks, function(d) length(unique(tweets$user[which((tweets$date >= d) & (tweets$date <= d + 60*dt))])))
 
# Plot data
plotData <- data.frame(dates=dateBreaks[1:length(dateBreaks)-1], tweets=as.numeric(tweetCount$count), users=as.numeric(userCount))
ggplot(plotData) +
geom_bar(aes(x=dates, y=tweets, color=users), stat="identity") +
scale_x_datetime("Date") +
scale_y_continuous("Number of tweets") +
opts(title="Number of tweets and unique users : #nonato")
ggsave("fig/ts_tweet_user.jpg", width=12, height=8)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.