Created July 15, 2014 11:10
# 1. Save file to same local directory
# 2. Change time zone specification in third IMPORT DATA statement
# 3. In each top_n_for_hour or top_n_for_day, test to statistical significance by comparing the result of the top_n function and the result of the matrix t test
data = read.csv("tweet_activity_metrics.csv")
data$time = as.POSIXct(data$time, tz="Europe/London")
data$time = format(data$time, tz="America/Los_Angeles")
data$hour = as.POSIXlt(strftime(data$time, format="%H:%M"), format="%H:%M")$hour
data = subset(data, hour>4)
data = subset(data, hour<21)
ggplot(data) + geom_point(aes(data$hour, data$engagement.rate), size=5, alpha=0.7, colour="red", position="jitter") + xlab("") + ylab("Engagement Rate") + ggtitle("Engagement Rate by Time of Day")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + scale_colour_manual(values = c("red", "dodgerblue4")) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8) + scale_y_continuous( labels = percent_format())
ggsave("era_by_tod.png", dpi=300, width=9, height=6)
top_n_for_hour = function(df, n){
ordered = df[order(df$average, decreasing=TRUE),][1:n,]
return (ordered)
time_of_day_t = function(df, field, top_hours){
x = data.frame(matrix(NA, nrow = 25, ncol = 25))
colnames(x) = seq(1:25)
rownames(x) = seq(1:25)
for (i in 5:20){
current_hour = subset(df, df$hour == i)
for (j in 5:20){
testing_hour = subset(df, df$hour == j)
if ( && nrow(testing_hour) > 1 && nrow(current_hour)>1){
x[i, j] = t.test(current_hour[field], testing_hour[field])$p.value
else {
x[i, j] = NA
x$hour1 = factor(rownames(x))
x = melt(x)
x = subset(x, value < 0.05)
x = subset(x, hour1 %in% top_hours)
x = subset(x, variable %in% top_hours)
return (x)
designate_top = function(hourly_data, top_hours){
hourly_data$top = 0
hourly_data$top[hourly_data$hour %in% top_hours]=1
return (hourly_data)
rt_by_hour = ddply(data, .(hour), summarise, average=mean(retweets), count_posts = length(
top_retweet_hours = top_n_for_hour(rt_by_hour, 3)$hour
rt_by_hour = designate_top(rt_by_hour, top_retweet_hours)
retweet_t = time_of_day_t(data, "retweets", top_retweet_hours)
## 8 and 9 are the best hours
ggplot(rt_by_hour) + geom_bar(aes(hour, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Retweets per Post") + ggtitle("Best Time of Day to Maximize RT")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ theme(legend.position="none")
ggsave("time_of_day_max_rt.png", dpi=300, width=12, height=9)
imp_by_hour = ddply(data, .(hour), summarise, average = mean(impressions), count = length(
top_imp_hours = top_n_for_hour(imp_by_hour, 5)$hour
imp_by_hour = designate_top(imp_by_hour, top_imp_hours)
imp_t = time_of_day_t(data, "impressions", top_imp_hours)
ggplot(imp_by_hour) + geom_bar(aes(hour, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Impressions") + ggtitle("Best Time of Day to Maximize Impressions")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + scale_colour_manual(values = c("red", "dodgerblue4")) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") +annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ theme(legend.position="none")
ggsave("time_of_day_max_imp.png", dpi=300, width=12, height=9)
er_by_hour = ddply(data, .(hour), summarise, average = mean(engagement.rate))
top_er_hours = top_n_for_hour(er_by_hour, 11)$hour
er_by_hour = designate_top(er_by_hour, top_er_hours)
er_t = time_of_day_t(data, "engagement.rate", top_er_hours)
ggplot(er_by_hour) + geom_bar(aes(hour, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Engagement Rate") + ggtitle("Best Time of Day to Maximize Engagement Rate")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = percent_format())
ggsave("er_by_tod.png", dpi=300, width=12, height=9)
data$url.ctr = data$url.clicks/data$impressions
url_by_hour = ddply(data, .(hour), summarise, average = sum(url.clicks)/sum(impressions),url.ctr = sum(url.clicks)/sum(impressions))
top_url_hours = top_n_for_hour(url_by_hour, 4)$hour
url_by_hour = designate_top(url_by_hour, top_url_hours)
url_t = time_of_day_t(data, "url.ctr", top_url_hours)
ggplot(url_by_hour) + geom_bar(aes(hour, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Click Rate") + ggtitle("Best Time of Day to Maximize Click Rate")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = percent_format())
ggsave("url_click_by_tod.png", dpi=300, width=12, height=9)
follows_by_hour = ddply(data, .(hour), summarise, average = mean(follows))
top_follows_hours = top_n_for_hour(follows_by_hour, 4)$hour
follows_by_hour = designate_top(follows_by_hour, top_follows_hours)
follows_t = time_of_day_t(data, "follows", top_follows_hours)
ggplot(follows_by_hour) + geom_bar(aes(hour, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Follows") + ggtitle("Best Time of Day to New Follows")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = percent_format())
ggsave("follow_rate_by_tod.png", dpi=300, width=12, height=9)
data$day = weekdays(as.Date(data$time))
data$weekday = as.POSIXlt(data$time)$wday
weekday_t = function(df, field, top_days){
x = data.frame(matrix(NA, nrow = 7, ncol = 7))
colnames(x) = seq(0:6)
rownames(x) = seq(0:6)
for (i in 1:7){
current_day = subset(df, df$weekday == i-1)
for (j in 1:7){
testing_day = subset(df, df$day == j-1)
if ( && nrow(testing_day) > 1 && nrow(current_day)>1){
x[i, j] = t.test(current_day[field], testing_day[field])$p.value
else {
x[i, j] = NA
x$day = factor(rownames(x))
x = melt(x)
x = subset(x, value < 0.05)
x = subset(x, day %in% top_days)
x = subset(x, variable %in% top_days)
return (x)
top_n_for_day = function(df, n){
ordered = df[order(df$average, decreasing=TRUE),][1:n,]
return (ordered)
ggplot(data) + geom_point(aes(data$weekday, data$engagement.rate), size=5, alpha=0.7, colour="dodgerblue", position="jitter") + xlab("") + ylab("Engagement Rate") + ggtitle("Engagement Rate by Day of Week")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + scale_colour_manual(values = c("red", "dodgerblue4")) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8) + scale_y_continuous( labels = percent_format())
ggplot(data) + geom_boxplot(aes(data$weekday, data$impressions,outlier.color="gray", group=data$weekday), fill="orange", colour="gray", outlier.colour="gray50", outlier.size=3) + xlab("") + ylab("Impressions") + ggtitle("Impressions by Day of Week")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + scale_colour_manual(values = c("red", "dodgerblue4")) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_y_continuous( labels = comma_format())
data$url.ctr = data$url.clicks/data$impressions
url_by_day = ddply(data, .(weekday), summarise, average = sum(url.clicks)/sum(impressions),url.ctr = sum(url.clicks)/sum(impressions), count = length(url.clicks))
top_url_day = top_n_for_day(url_by_day, 7)$weekday
url_by_day = designate_top(url_by_day, top_url_day)
day_url_t = weekday_t(data, "url.ctr", top_url_day)
ggplot(url_by_day) + geom_bar(aes(weekday, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Click Rate") + ggtitle("Best Day of Week to Maximize Click Rate")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = percent_format())
er_by_day = ddply(data, .(weekday), summarise, average = mean(engagement.rate))
top_er_day = top_n_for_day(er_by_day, 7)$weekday
er_by_day = designate_top(er_by_day, top_er_day)
day_er_t = weekday_t(data, "engagement.rate", top_er_day)
ggplot(er_by_day) + geom_bar(aes(weekday, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Engagement Rate") + ggtitle("Best Day of Week to Maximize Engagement Rate")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = percent_format())
imp_by_day = ddply(data, .(weekday), summarise, average = mean(impressions))
top_imp_day = top_n_for_day(imp_by_day, 7)$weekday
imp_by_day = designate_top(imp_by_day, top_imp_day)
day_imp_t = weekday_t(data, "impressions", top_imp_day)
ggplot(imp_by_day) + geom_bar(aes(weekday, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Engagement Rate") + ggtitle("Best Day of Week to Maximize Engagement Rate")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = comma_format())
cor(data$impressions, data$engagement.rate)
cor(data$retweets, data$impressions)
cor(data$replies, data$impressions)
data$tweet.length = nchar(as.character(data$Tweet.text[1]))
Hey Tom,
thanks for this great script. Clemens and me took the time to make it a bit easier understandable as we feel that this script can be very useful for people that might not have a lot of experience with scripting.

We've extracted a few variables to facilitate configuration, make the charts show your chosen time zone and improved documentation to make it easier to get started with the script.

The updated gist can be found here:

Thanks again,

