-
-
Save felixhaass/5f3f8517edc9e3f2db3d to your computer and use it in GitHub Desktop.
This code lays out the steps necessary to download and analyze Twitter data from #isa2014.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("scales") | |
library("Cairo") | |
library("ROAuth") | |
library("twitteR") | |
library("ggplot2") | |
##################################### | |
# Setting up the Twitter Connection # | |
##################################### | |
# the following header bits are taken from this blogpost: | |
# http://davetang.org/muse/2013/04/06/using-the-r_twitter-package/ | |
# houskeeping | |
rm(list=ls()) | |
# necessary step for Windows | |
download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem") | |
#to get your consumerKey and consumerSecret see the twitteR documentation for instructions | |
cred <- OAuthFactory$new(consumerKey='', | |
consumerSecret='', | |
requestURL='https://api.twitter.com/oauth/request_token', | |
accessURL='https://api.twitter.com/oauth/access_token', | |
authURL='https://api.twitter.com/oauth/authorize') | |
# necessary step for Windows | |
cred$handshake(cainfo="cacert.pem") | |
# save for later use for Windows | |
save(cred, file="twitter authentication.Rdata") | |
registerTwitterOAuth(cred) | |
################# | |
# Analysis part # | |
################# | |
# NOT RUN: | |
# get all #isa2014 tweets & convert to data frame | |
# isa2014 <- searchTwitter("#isa2014", n=3500, cainfo="cacert.pem", since="2014-03-24", until="2014-03-31") | |
# isa2014df <- twListToDF(isa2014) | |
# | |
# names <- unique(sort(isa2014df$screenName)) | |
# | |
# fulldf <- isa2014df[-(1:nrow(isa2014df)), ] | |
# | |
# for(name in names[434:length(names)]) { | |
# print(paste("Retrieving tweets from", name, "...")) | |
# usertweets <- searchTwitter(paste(name, " #isa2014"), 500, cainfo="cacert.pem", since="2014-03-24", until="2014-03-31") | |
# usertweets <- twListToDF(usertweets) | |
# | |
# fulldf <- rbind(fulldf, usertweets) | |
# } | |
# plot twenty busiest tweeps | |
df <- data.frame(table(isa2014df$screenName)) | |
df <- tail(df[order(df$Freq), ], 20) | |
# get real names | |
for(i in 1:nrow(df)) { | |
df[i, "realname"] <- getUser(df[i, "Var1"], cainfo="cacert.pem")$name | |
} | |
# create display | |
df$disp_name <- paste0(df$realname, " \n(@", df$Var1, ")") | |
CairoPNG("busiest_isa2014_tweeps_NEW.png", height=1600, width = 1000, pointsize=30) | |
par(mar=c(5, 8, 4, 2)) | |
barplot(df$Freq, | |
names.arg=df$disp_name, | |
horiz=T, | |
las=1, | |
main="20 Busiest #ISA2014 Tweeps", | |
xlab="Tweet count", | |
space=0.6, | |
col="navy", | |
border = FALSE, cex.names = .6) | |
dev.off() | |
####################### | |
# most popular tweets # | |
####################### | |
# add popular count | |
isa2014df$popular <- (isa2014df$favoriteCount + isa2014df$retweetCount) | |
# order & subset top 10 | |
ordered <- isa2014df[order(isa2014df$popular, decreasing = TRUE), ] | |
top20tweets <- head(ordered[ordered$isRetweet == FALSE, ], 10) | |
# generate full twitter link | |
top20tweets$link <- paste0("https://twitter.com/", top20tweets$screenName, "/status/", top20tweets$id) | |
# write list of tweet links for c & p inclusion to wordpress | |
write.table(top20tweets[, "link"], row.names=FALSE, file="isa_twitter.txt", quote=F, col.names=FALSE) | |
#################################### | |
# isatw14 "Twitter Panel" Analysis # | |
#################################### | |
isatw14 <- searchTwitter("#isatw14", n=500, cainfo="cacert.pem") | |
isatw14df <- twListToDF(isatw14) | |
df_tw14 <- data.frame(table(isatw14df$screenName)) | |
df_tw14 <- tail(df_tw14[order(df_tw14$Freq), ], 20) | |
# get real names | |
for(i in 1:nrow(df_tw14)) { | |
df_tw14[i, "realname"] <- getUser(df_tw14[i, "Var1"], cainfo="cacert.pem")$name | |
} | |
# create display | |
df_tw14$disp_name <- paste0(df_tw14$realname, " \n(@", df_tw14$Var1, ")") | |
CairoPNG("busiest_twitter_panel_tweeps.png", height=1600, width = 1100, pointsize=30) | |
par(mar=c(5, 8, 4, 2)) | |
barplot(df_tw14$Freq, | |
names.arg=df_tw14$disp_name, | |
horiz=T, | |
las=1, | |
main="Busiest #isatw14 'The Twitter Panel' Tweeps", | |
xlab="Tweet count", | |
space=0.6, | |
col="navy", | |
border = FALSE, cex.names = .6) | |
dev.off() | |
# when do ISAlers tweet? | |
# some code taken from here: | |
# http://bommaritollc.com/2012/05/21/charting-twitter-time-series-data-with-tweet-and-unique-user-counts/ | |
dt <- 30 | |
isa2014df$date <- as.POSIXct(strptime(isa2014df$created, "%Y-%m-%d %H:%M:%S", tz="GMT")) | |
# fix dates | |
isa2014df$date <- isa2014df$date - (4*60*60) | |
minDate <- min(isa2014df$date) | |
maxDate <- max(isa2014df$date) + 60 * dt | |
dateBreaks <- seq(minDate, maxDate, by=60 * dt) | |
tweetCount <- hist(isa2014df$date, breaks=dateBreaks, plot=FALSE) | |
binBreaks <- tweetCount$breaks[1:length(tweetCount$breaks)-1] | |
plotData <- data.frame(dates=dateBreaks[1:length(dateBreaks)-1], tweets=as.numeric(tweetCount$count)) | |
# time <- data.frame(xmin = as.POSIXct("2014-03-26 08:15"), xmax = as.POSIXct("2014-03-26 10:00"), ymin=-Inf, ymax=Inf) | |
CairoPNG("ISA_Twitter_trend.png", width=1600, height=900, pointsize=30) | |
ggplot(plotData) + | |
geom_bar(aes(x=dates, y=tweets), stat="identity") + | |
scale_y_continuous("Number of tweets") + | |
scale_x_datetime(breaks="3 hour", labels = date_format("%d. March %Hh")) + | |
theme_bw() + | |
theme(axis.text.x=element_text(hjust=1.1, angle=45), legend.key=element_blank()) + | |
labs(x="", title="#isa2014 Twitter usage over time \n") # + | |
# geom_rect(data=time, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), color="grey20", alpha=0.5, inherit.aes = FALSE) | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment