chrishanretty/be_tweets.R

## be_tweets.R
## See https://stackoverflow.com/questions/30508197/r-count-number-of-retweets
library(twitteR)
library(base64enc)
library(ggplot2)
library(hrbrthemes)
library(lubridate)
library(stringr)
library(arm)

if (!file.exists("be_tweets.rds")) {
    consumer_key <- "get"
    consumer_secret <- "your"
    access_token <- "own"
    access_secret <- "tokens"

    setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret)
    britain_elects_tweets <- userTimeline('britainelects',
                                          maxID = '872589161328496641',
                                          n = 1000,
                                          excludeReplies = TRUE)

    saveRDS(britain_elects_tweets, file = "be_tweets.rds")
} else {
    britain_elects_tweets <- readRDS("be_tweets.rds")
}

### Get Westminster VI tweets
vi <- unlist(lapply(britain_elects_tweets, function(x)
    grepl("^Westminster voting intention", x$text)))

vi_tweets  <- britain_elects_tweets[vi]

tweet_text <- unlist(lapply(vi_tweets, function(x)x$text))
change_regexp <- "(\\([^a-zA-Z]+\\))"
matches <- str_extract_all(tweet_text, change_regexp)
changes <- lapply(matches, function(x) {
    x <- sub("(-)", "0", x, fixed = TRUE)
    x <- sub("(", "", x, fixed = TRUE)
    x <- sub(")", "", x, fixed = TRUE)
    return(as.numeric(x))
})
mads <- unlist(lapply(changes, function(x) mean(abs(x))))

lab_chg <- lapply(tweet_text, function(x)
    sub("\\).*", "", sub(".*LAB: \\d*% \\((.*)\\).*", "\\1", x)))

lab_chg <- unlist(lab_chg)
lab_chg <- sub("^-$", "0", lab_chg)
lab_chg <- as.numeric(lab_chg)

time_stamps <- lapply(vi_tweets, function(x) x$created)
hour <- unlist(lapply(time_stamps, hour))

hour.cut <- cut(hour,
                breaks = c(0, 12, 17, 20, 24))
rts <- lapply(vi_tweets, function(x) x$retweetCount)
rts <- unlist(rts)


plot.df <- data.frame(RT = rts, text = tweet_text, lab_chg = lab_chg, hour = hour, mad = mads)

summary(mod <- lm(log(rts) ~ lab_chg + mad + hour.cut, data = plot.df))
png(file = "coefplot.png", width = 800, height = 480)
par(mar = c(5, 9, 2, 2))
coefplot(mod, varnames = c("Intercept", "Labour change", "Mean absolute change", "1pm - 6pm tweet",
         "6pm to 9pm", "Post 9pm tweet"))
dev.off()

p1 <- ggplot(plot.df, aes(x = lab_chg, y = RT)) +
    geom_point() +
    scale_y_sqrt() +
    geom_smooth(method = "lm") +
    theme_ipsum_rc() +
    labs(title = "Retweets of @britain_elects polling tweets",
         subtitle = "The better the change for Labour, the more re-tweets",
         x = "Change in Labour figure relative to last poll",
         y = "Retweets (log scale)")

p2 <- ggplot(plot.df, aes(x = mad, y = RT)) +
    geom_point() +
    scale_y_sqrt() +
    geom_smooth(method = "lm") +
    theme_ipsum_rc() +
    labs(title = "Retweets of @britain_elects polling tweets",
         subtitle = "The greater the change, the more the tweet is re-tweeted",
         x = "Mean absolute change in vote shares",
         y = "Retweets (log scale)")

png(file = "p1.png", width = 800, height = 480)
print(p1)
dev.off()


png(file = "p2.png", width = 800, height = 480)
print(p2)
dev.off()
	## See https://stackoverflow.com/questions/30508197/r-count-number-of-retweets
	library(twitteR)
	library(base64enc)
	library(ggplot2)
	library(hrbrthemes)
	library(lubridate)
	library(stringr)
	library(arm)

	if (!file.exists("be_tweets.rds")) {
	consumer_key <- "get"
	consumer_secret <- "your"
	access_token <- "own"
	access_secret <- "tokens"

	setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret)
	britain_elects_tweets <- userTimeline('britainelects',
	maxID = '872589161328496641',
	n = 1000,
	excludeReplies = TRUE)

	saveRDS(britain_elects_tweets, file = "be_tweets.rds")
	} else {
	britain_elects_tweets <- readRDS("be_tweets.rds")
	}

	### Get Westminster VI tweets
	vi <- unlist(lapply(britain_elects_tweets, function(x)
	grepl("^Westminster voting intention", x$text)))

	vi_tweets <- britain_elects_tweets[vi]

	tweet_text <- unlist(lapply(vi_tweets, function(x)x$text))
	change_regexp <- "(\\([^a-zA-Z]+\\))"
	matches <- str_extract_all(tweet_text, change_regexp)
	changes <- lapply(matches, function(x) {
	x <- sub("(-)", "0", x, fixed = TRUE)
	x <- sub("(", "", x, fixed = TRUE)
	x <- sub(")", "", x, fixed = TRUE)
	return(as.numeric(x))
	})
	mads <- unlist(lapply(changes, function(x) mean(abs(x))))

	lab_chg <- lapply(tweet_text, function(x)
	sub("\\).", "", sub(".LAB: \\d% \\((.)\\).*", "\\1", x)))

	lab_chg <- unlist(lab_chg)
	lab_chg <- sub("^-$", "0", lab_chg)
	lab_chg <- as.numeric(lab_chg)

	time_stamps <- lapply(vi_tweets, function(x) x$created)
	hour <- unlist(lapply(time_stamps, hour))

	hour.cut <- cut(hour,
	breaks = c(0, 12, 17, 20, 24))
	rts <- lapply(vi_tweets, function(x) x$retweetCount)
	rts <- unlist(rts)


	plot.df <- data.frame(RT = rts, text = tweet_text, lab_chg = lab_chg, hour = hour, mad = mads)

	summary(mod <- lm(log(rts) ~ lab_chg + mad + hour.cut, data = plot.df))
	png(file = "coefplot.png", width = 800, height = 480)
	par(mar = c(5, 9, 2, 2))
	coefplot(mod, varnames = c("Intercept", "Labour change", "Mean absolute change", "1pm - 6pm tweet",
	"6pm to 9pm", "Post 9pm tweet"))
	dev.off()

	p1 <- ggplot(plot.df, aes(x = lab_chg, y = RT)) +
	geom_point() +
	scale_y_sqrt() +
	geom_smooth(method = "lm") +
	theme_ipsum_rc() +
	labs(title = "Retweets of @britain_elects polling tweets",
	subtitle = "The better the change for Labour, the more re-tweets",
	x = "Change in Labour figure relative to last poll",
	y = "Retweets (log scale)")

	p2 <- ggplot(plot.df, aes(x = mad, y = RT)) +
	geom_point() +
	scale_y_sqrt() +
	geom_smooth(method = "lm") +
	theme_ipsum_rc() +
	labs(title = "Retweets of @britain_elects polling tweets",
	subtitle = "The greater the change, the more the tweet is re-tweeted",
	x = "Mean absolute change in vote shares",
	y = "Retweets (log scale)")

	png(file = "p1.png", width = 800, height = 480)
	print(p1)
	dev.off()


	png(file = "p2.png", width = 800, height = 480)
	print(p2)
	dev.off()