Skip to content

Instantly share code, notes, and snippets.

@felixhaass
Last active August 29, 2015 13:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save felixhaass/5f3f8517edc9e3f2db3d to your computer and use it in GitHub Desktop.
Save felixhaass/5f3f8517edc9e3f2db3d to your computer and use it in GitHub Desktop.
This code lays out the steps necessary to download and analyze Twitter data from #isa2014.
library("scales")
library("Cairo")
library("ROAuth")
library("twitteR")
library("ggplot2")
#####################################
# Setting up the Twitter Connection #
#####################################
# the following header bits are taken from this blogpost:
# http://davetang.org/muse/2013/04/06/using-the-r_twitter-package/
# houskeeping
rm(list=ls())
# necessary step for Windows
download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")
#to get your consumerKey and consumerSecret see the twitteR documentation for instructions
cred <- OAuthFactory$new(consumerKey='',
consumerSecret='',
requestURL='https://api.twitter.com/oauth/request_token',
accessURL='https://api.twitter.com/oauth/access_token',
authURL='https://api.twitter.com/oauth/authorize')
# necessary step for Windows
cred$handshake(cainfo="cacert.pem")
# save for later use for Windows
save(cred, file="twitter authentication.Rdata")
registerTwitterOAuth(cred)
#################
# Analysis part #
#################
# NOT RUN:
# get all #isa2014 tweets & convert to data frame
# isa2014 <- searchTwitter("#isa2014", n=3500, cainfo="cacert.pem", since="2014-03-24", until="2014-03-31")
# isa2014df <- twListToDF(isa2014)
#
# names <- unique(sort(isa2014df$screenName))
#
# fulldf <- isa2014df[-(1:nrow(isa2014df)), ]
#
# for(name in names[434:length(names)]) {
# print(paste("Retrieving tweets from", name, "..."))
# usertweets <- searchTwitter(paste(name, " #isa2014"), 500, cainfo="cacert.pem", since="2014-03-24", until="2014-03-31")
# usertweets <- twListToDF(usertweets)
#
# fulldf <- rbind(fulldf, usertweets)
# }
# plot twenty busiest tweeps
df <- data.frame(table(isa2014df$screenName))
df <- tail(df[order(df$Freq), ], 20)
# get real names
for(i in 1:nrow(df)) {
df[i, "realname"] <- getUser(df[i, "Var1"], cainfo="cacert.pem")$name
}
# create display
df$disp_name <- paste0(df$realname, " \n(@", df$Var1, ")")
CairoPNG("busiest_isa2014_tweeps_NEW.png", height=1600, width = 1000, pointsize=30)
par(mar=c(5, 8, 4, 2))
barplot(df$Freq,
names.arg=df$disp_name,
horiz=T,
las=1,
main="20 Busiest #ISA2014 Tweeps",
xlab="Tweet count",
space=0.6,
col="navy",
border = FALSE, cex.names = .6)
dev.off()
#######################
# most popular tweets #
#######################
# add popular count
isa2014df$popular <- (isa2014df$favoriteCount + isa2014df$retweetCount)
# order & subset top 10
ordered <- isa2014df[order(isa2014df$popular, decreasing = TRUE), ]
top20tweets <- head(ordered[ordered$isRetweet == FALSE, ], 10)
# generate full twitter link
top20tweets$link <- paste0("https://twitter.com/", top20tweets$screenName, "/status/", top20tweets$id)
# write list of tweet links for c & p inclusion to wordpress
write.table(top20tweets[, "link"], row.names=FALSE, file="isa_twitter.txt", quote=F, col.names=FALSE)
####################################
# isatw14 "Twitter Panel" Analysis #
####################################
isatw14 <- searchTwitter("#isatw14", n=500, cainfo="cacert.pem")
isatw14df <- twListToDF(isatw14)
df_tw14 <- data.frame(table(isatw14df$screenName))
df_tw14 <- tail(df_tw14[order(df_tw14$Freq), ], 20)
# get real names
for(i in 1:nrow(df_tw14)) {
df_tw14[i, "realname"] <- getUser(df_tw14[i, "Var1"], cainfo="cacert.pem")$name
}
# create display
df_tw14$disp_name <- paste0(df_tw14$realname, " \n(@", df_tw14$Var1, ")")
CairoPNG("busiest_twitter_panel_tweeps.png", height=1600, width = 1100, pointsize=30)
par(mar=c(5, 8, 4, 2))
barplot(df_tw14$Freq,
names.arg=df_tw14$disp_name,
horiz=T,
las=1,
main="Busiest #isatw14 'The Twitter Panel' Tweeps",
xlab="Tweet count",
space=0.6,
col="navy",
border = FALSE, cex.names = .6)
dev.off()
# when do ISAlers tweet?
# some code taken from here:
# http://bommaritollc.com/2012/05/21/charting-twitter-time-series-data-with-tweet-and-unique-user-counts/
dt <- 30
isa2014df$date <- as.POSIXct(strptime(isa2014df$created, "%Y-%m-%d %H:%M:%S", tz="GMT"))
# fix dates
isa2014df$date <- isa2014df$date - (4*60*60)
minDate <- min(isa2014df$date)
maxDate <- max(isa2014df$date) + 60 * dt
dateBreaks <- seq(minDate, maxDate, by=60 * dt)
tweetCount <- hist(isa2014df$date, breaks=dateBreaks, plot=FALSE)
binBreaks <- tweetCount$breaks[1:length(tweetCount$breaks)-1]
plotData <- data.frame(dates=dateBreaks[1:length(dateBreaks)-1], tweets=as.numeric(tweetCount$count))
# time <- data.frame(xmin = as.POSIXct("2014-03-26 08:15"), xmax = as.POSIXct("2014-03-26 10:00"), ymin=-Inf, ymax=Inf)
CairoPNG("ISA_Twitter_trend.png", width=1600, height=900, pointsize=30)
ggplot(plotData) +
geom_bar(aes(x=dates, y=tweets), stat="identity") +
scale_y_continuous("Number of tweets") +
scale_x_datetime(breaks="3 hour", labels = date_format("%d. March %Hh")) +
theme_bw() +
theme(axis.text.x=element_text(hjust=1.1, angle=45), legend.key=element_blank()) +
labs(x="", title="#isa2014 Twitter usage over time \n") # +
# geom_rect(data=time, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), color="grey20", alpha=0.5, inherit.aes = FALSE)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment