Created
November 2, 2018 20:53
-
-
Save aleszu/7c567c8bb2d97f614cc6db96b542d982 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(tidyverse) | |
library(tidytext) | |
library(plotly) | |
library(stringr) | |
# Pull in spreadsheet | |
df <- read.csv("fbads.csv", header=TRUE, stringsAsFactors = FALSE) | |
df$date <- as.Date(df$created_at) # Add date column | |
df %>% glimpse() | |
summary(df) | |
# Prompt users to look for a specific term or politician | |
betomentions <- df %>% | |
filter(str_detect(message, "Beto")) %>% | |
mutate("candidate" = "O'Rourke") | |
betomentions %>% glimpse() | |
cruzmentions <- df %>% | |
filter(str_detect(message, "Cruz")) %>% | |
mutate("candidate" = "Cruz") | |
cruzmentions %>% glimpse() | |
betocruz_mentions <- bind_rows(betomentions, cruzmentions) %>% | |
mutate("Beto" = ifelse(advertiser == "Beto O'Rourke", T, F)) | |
betocruz_mentions %>% glimpse() | |
ggplot(betocruz_mentions, aes(date, advertiser, text = message, color = Beto)) + | |
geom_point(aes(size=impressions)) + | |
scale_color_manual(values=c("black", "dodgerblue")) + | |
ggtitle("Beto O'Rourke and Ted Cruz Facebook ads by advertiser over time") + | |
theme(legend.position = 'none') | |
# Plotly | |
allads <- ggplot(betocruz_mentions, aes(date, advertiser, text = message, color = Beto)) + | |
geom_point(aes(size=impressions)) + | |
scale_color_manual(values=c("black", "dodgerblue")) + | |
ggtitle("Beto O'Rourke and Ted Cruz Facebook ads by advertiser over time") + | |
theme(legend.position = 'none') | |
ggplotly(allads, tooltip=c("text","impressions")) | |
# Top 20 advertisers | |
top_advs <- betocruz_mentions %>% | |
count(advertiser, Beto) %>% | |
arrange(desc(n)) %>% | |
top_n(20) | |
ggplot(top_advs, aes(fill = Beto, reorder(advertiser, n), n)) + | |
geom_bar(stat="identity") + | |
coord_flip() + | |
scale_fill_manual(values=c("black", "dodgerblue")) + | |
theme(legend.position = 'none') + | |
xlab("advertiser") + | |
ggtitle("Top 20 advertisers whose Facebook ads mention Beto or Cruz") | |
# Look at official ads | |
betoads <- df %>% | |
filter(advertiser == "Beto O'Rourke") %>% | |
mutate("candidate" = "O'Rourke")%>% glimpse() | |
cruzads <- df %>% | |
filter(advertiser == "Ted Cruz") %>% | |
mutate("candidate" = "Cruz") %>% glimpse() | |
betocruz <- bind_rows(betoads, cruzads) %>% glimpse() | |
ggplot(betocruz, aes(date, fill = candidate)) + geom_histogram() + | |
scale_fill_manual(values=c("firebrick", "dodgerblue")) + | |
facet_wrap(~candidate) | |
ggplot(betocruz, aes(date, advertiser, text = message, color = candidate)) + | |
geom_point(aes(size=impressions)) + | |
scale_color_manual(values=c("firebrick", "dodgerblue")) + | |
theme(legend.position = 'none') | |
##### Sentiment analysis | |
betocruz$post <- betocruz$message | |
tokenized_comments <- betocruz %>% | |
select(paid_for_by, advertiser, post, candidate, date, impressions, title, message) %>% | |
unnest_tokens(word, message) %>% | |
anti_join(stop_words) %>% | |
group_by(word, paid_for_by, advertiser, date, post, impressions, candidate, title) %>% | |
tally() %>% | |
arrange(desc(n)) | |
tokenized_comments %>% glimpse() | |
sentiments <- read.csv("labMT2english.csv", sep ="\t" ) | |
labMT <- sentiments %>% | |
select(word, happs) | |
all_sentiment <- tokenized_comments %>% | |
inner_join(labMT, by = "word") %>% | |
group_by(post, candidate, paid_for_by, advertiser, date, impressions, title) %>% | |
summarize(sentiment = mean(happs)) %>% | |
arrange(desc(sentiment)) %>% | |
mutate("score" = sentiment-5.372) | |
all_sentiment %>% glimpse() | |
cruz_sent <- all_sentiment %>% | |
filter(advertiser == "Ted Cruz") %>% glimpse() | |
mean(cruz_sent$score) | |
beto_sent <- all_sentiment %>% | |
filter(advertiser == "Beto O'Rourke") %>% glimpse() | |
mean(beto_sent$score) | |
# Plot sentiment over time | |
#betocruzsent1 <- ggplot(all_sentiment, aes(date, score)) + geom_smooth() + geom_point()+ facet_grid(~advertiser) | |
# Plot sentiment over time and split by advertiser | |
ggplot(all_sentiment, aes(date, score, color=score, text = post, score = score)) + geom_point() + geom_smooth() + | |
facet_wrap(~candidate) + | |
scale_color_gradient(low= "red", high="blue")+ | |
ggtitle("Sentiment of Beto O'Rourke and Ted Cruz Facebook ads over time") | |
betocruzsent <- ggplot(all_sentiment, aes(date, score, color=score, text = post, score = score)) + geom_point() + geom_smooth() + | |
facet_wrap(~candidate) + | |
scale_color_gradient(low= "red", high="blue")+ | |
ggtitle("Sentiment of Beto O'Rourke and Ted Cruz Facebook ads over time") | |
ggplotly(betocruzsent, tooltip=c("text","word")) | |
# Sentiment of overall words and by advertiser | |
betocruz_mentions$post <- betocruz_mentions$message | |
tokenized_all_ads <- betocruz_mentions %>% | |
select(paid_for_by, advertiser, post, candidate, date, impressions, title, message) %>% | |
unnest_tokens(word, message) %>% | |
anti_join(stop_words) %>% | |
group_by(word, paid_for_by, advertiser, date, post, impressions, candidate, title) %>% | |
tally() %>% | |
arrange(desc(n)) | |
tokenized_all_ads %>% glimpse() | |
sentiments <- read.csv("labMT2english.csv", sep ="\t" ) | |
labMT <- sentiments %>% | |
select(word, happs) | |
all_ads_sentiment <- tokenized_all_ads %>% | |
inner_join(labMT, by = "word") %>% | |
group_by(post, candidate, paid_for_by, advertiser, date, impressions, title) %>% | |
summarize(sentiment = mean(happs)) %>% | |
arrange(desc(sentiment)) %>% | |
mutate("score" = sentiment-5.372) | |
all_ads_sentiment %>% glimpse() | |
sent_advertiser <- all_ads_sentiment %>% | |
group_by(advertiser) %>% | |
summarize("avgsent" = mean(score)) %>% | |
arrange(desc(avgsent)) | |
sent_advertiser %>% glimpse() | |
# Plot of mean sentiment by advertiser | |
ggplot(sent_advertiser, aes(reorder(advertiser, avgsent), avgsent, fill=avgsent >0)) + | |
geom_bar(stat = "identity") + | |
scale_fill_manual(values=c("red", "blue")) + | |
theme(legend.position = 'none') + | |
ylab("Average sentiment") + | |
xlab("") + | |
coord_flip() | |
ggplot(all_ads_sentiment, aes(date, score, color=score, text = post, score = score)) + geom_point() + geom_smooth() + | |
facet_wrap(~candidate) + | |
geom_smooth() + | |
scale_color_gradient(low= "red", high="blue")+ | |
ggtitle("Sentiment of Beto O'Rourke and Ted Cruz Facebook ads over time") | |
View(all_ads_sentiment %>% filter(score < -0.5) %>% glimpse()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment