Created
September 12, 2017 08:14
-
-
Save chrishanretty/b9fb8dfb6a6dc04900098c5d5e4c9b9a to your computer and use it in GitHub Desktop.
Analysis of Britain Elects re-tweets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## See https://stackoverflow.com/questions/30508197/r-count-number-of-retweets | |
library(twitteR) | |
library(base64enc) | |
library(ggplot2) | |
library(hrbrthemes) | |
library(lubridate) | |
library(stringr) | |
library(arm) | |
if (!file.exists("be_tweets.rds")) { | |
consumer_key <- "get" | |
consumer_secret <- "your" | |
access_token <- "own" | |
access_secret <- "tokens" | |
setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret) | |
britain_elects_tweets <- userTimeline('britainelects', | |
maxID = '872589161328496641', | |
n = 1000, | |
excludeReplies = TRUE) | |
saveRDS(britain_elects_tweets, file = "be_tweets.rds") | |
} else { | |
britain_elects_tweets <- readRDS("be_tweets.rds") | |
} | |
### Get Westminster VI tweets | |
vi <- unlist(lapply(britain_elects_tweets, function(x) | |
grepl("^Westminster voting intention", x$text))) | |
vi_tweets <- britain_elects_tweets[vi] | |
tweet_text <- unlist(lapply(vi_tweets, function(x)x$text)) | |
change_regexp <- "(\\([^a-zA-Z]+\\))" | |
matches <- str_extract_all(tweet_text, change_regexp) | |
changes <- lapply(matches, function(x) { | |
x <- sub("(-)", "0", x, fixed = TRUE) | |
x <- sub("(", "", x, fixed = TRUE) | |
x <- sub(")", "", x, fixed = TRUE) | |
return(as.numeric(x)) | |
}) | |
mads <- unlist(lapply(changes, function(x) mean(abs(x)))) | |
lab_chg <- lapply(tweet_text, function(x) | |
sub("\\).*", "", sub(".*LAB: \\d*% \\((.*)\\).*", "\\1", x))) | |
lab_chg <- unlist(lab_chg) | |
lab_chg <- sub("^-$", "0", lab_chg) | |
lab_chg <- as.numeric(lab_chg) | |
time_stamps <- lapply(vi_tweets, function(x) x$created) | |
hour <- unlist(lapply(time_stamps, hour)) | |
hour.cut <- cut(hour, | |
breaks = c(0, 12, 17, 20, 24)) | |
rts <- lapply(vi_tweets, function(x) x$retweetCount) | |
rts <- unlist(rts) | |
plot.df <- data.frame(RT = rts, text = tweet_text, lab_chg = lab_chg, hour = hour, mad = mads) | |
summary(mod <- lm(log(rts) ~ lab_chg + mad + hour.cut, data = plot.df)) | |
png(file = "coefplot.png", width = 800, height = 480) | |
par(mar = c(5, 9, 2, 2)) | |
coefplot(mod, varnames = c("Intercept", "Labour change", "Mean absolute change", "1pm - 6pm tweet", | |
"6pm to 9pm", "Post 9pm tweet")) | |
dev.off() | |
p1 <- ggplot(plot.df, aes(x = lab_chg, y = RT)) + | |
geom_point() + | |
scale_y_sqrt() + | |
geom_smooth(method = "lm") + | |
theme_ipsum_rc() + | |
labs(title = "Retweets of @britain_elects polling tweets", | |
subtitle = "The better the change for Labour, the more re-tweets", | |
x = "Change in Labour figure relative to last poll", | |
y = "Retweets (log scale)") | |
p2 <- ggplot(plot.df, aes(x = mad, y = RT)) + | |
geom_point() + | |
scale_y_sqrt() + | |
geom_smooth(method = "lm") + | |
theme_ipsum_rc() + | |
labs(title = "Retweets of @britain_elects polling tweets", | |
subtitle = "The greater the change, the more the tweet is re-tweeted", | |
x = "Mean absolute change in vote shares", | |
y = "Retweets (log scale)") | |
png(file = "p1.png", width = 800, height = 480) | |
print(p1) | |
dev.off() | |
png(file = "p2.png", width = 800, height = 480) | |
print(p2) | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment