Skip to content

Instantly share code, notes, and snippets.

@aleszu
Created November 2, 2018 20:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save aleszu/7c567c8bb2d97f614cc6db96b542d982 to your computer and use it in GitHub Desktop.
Save aleszu/7c567c8bb2d97f614cc6db96b542d982 to your computer and use it in GitHub Desktop.
library(dplyr)
library(tidyverse)
library(tidytext)
library(plotly)
library(stringr)
# Pull in spreadsheet
df <- read.csv("fbads.csv", header=TRUE, stringsAsFactors = FALSE)
df$date <- as.Date(df$created_at) # Add date column
df %>% glimpse()
summary(df)
# Prompt users to look for a specific term or politician
betomentions <- df %>%
filter(str_detect(message, "Beto")) %>%
mutate("candidate" = "O'Rourke")
betomentions %>% glimpse()
cruzmentions <- df %>%
filter(str_detect(message, "Cruz")) %>%
mutate("candidate" = "Cruz")
cruzmentions %>% glimpse()
betocruz_mentions <- bind_rows(betomentions, cruzmentions) %>%
mutate("Beto" = ifelse(advertiser == "Beto O'Rourke", T, F))
betocruz_mentions %>% glimpse()
ggplot(betocruz_mentions, aes(date, advertiser, text = message, color = Beto)) +
geom_point(aes(size=impressions)) +
scale_color_manual(values=c("black", "dodgerblue")) +
ggtitle("Beto O'Rourke and Ted Cruz Facebook ads by advertiser over time") +
theme(legend.position = 'none')
# Plotly
allads <- ggplot(betocruz_mentions, aes(date, advertiser, text = message, color = Beto)) +
geom_point(aes(size=impressions)) +
scale_color_manual(values=c("black", "dodgerblue")) +
ggtitle("Beto O'Rourke and Ted Cruz Facebook ads by advertiser over time") +
theme(legend.position = 'none')
ggplotly(allads, tooltip=c("text","impressions"))
# Top 20 advertisers
top_advs <- betocruz_mentions %>%
count(advertiser, Beto) %>%
arrange(desc(n)) %>%
top_n(20)
ggplot(top_advs, aes(fill = Beto, reorder(advertiser, n), n)) +
geom_bar(stat="identity") +
coord_flip() +
scale_fill_manual(values=c("black", "dodgerblue")) +
theme(legend.position = 'none') +
xlab("advertiser") +
ggtitle("Top 20 advertisers whose Facebook ads mention Beto or Cruz")
# Look at official ads
betoads <- df %>%
filter(advertiser == "Beto O'Rourke") %>%
mutate("candidate" = "O'Rourke")%>% glimpse()
cruzads <- df %>%
filter(advertiser == "Ted Cruz") %>%
mutate("candidate" = "Cruz") %>% glimpse()
betocruz <- bind_rows(betoads, cruzads) %>% glimpse()
ggplot(betocruz, aes(date, fill = candidate)) + geom_histogram() +
scale_fill_manual(values=c("firebrick", "dodgerblue")) +
facet_wrap(~candidate)
ggplot(betocruz, aes(date, advertiser, text = message, color = candidate)) +
geom_point(aes(size=impressions)) +
scale_color_manual(values=c("firebrick", "dodgerblue")) +
theme(legend.position = 'none')
##### Sentiment analysis
betocruz$post <- betocruz$message
tokenized_comments <- betocruz %>%
select(paid_for_by, advertiser, post, candidate, date, impressions, title, message) %>%
unnest_tokens(word, message) %>%
anti_join(stop_words) %>%
group_by(word, paid_for_by, advertiser, date, post, impressions, candidate, title) %>%
tally() %>%
arrange(desc(n))
tokenized_comments %>% glimpse()
sentiments <- read.csv("labMT2english.csv", sep ="\t" )
labMT <- sentiments %>%
select(word, happs)
all_sentiment <- tokenized_comments %>%
inner_join(labMT, by = "word") %>%
group_by(post, candidate, paid_for_by, advertiser, date, impressions, title) %>%
summarize(sentiment = mean(happs)) %>%
arrange(desc(sentiment)) %>%
mutate("score" = sentiment-5.372)
all_sentiment %>% glimpse()
cruz_sent <- all_sentiment %>%
filter(advertiser == "Ted Cruz") %>% glimpse()
mean(cruz_sent$score)
beto_sent <- all_sentiment %>%
filter(advertiser == "Beto O'Rourke") %>% glimpse()
mean(beto_sent$score)
# Plot sentiment over time
#betocruzsent1 <- ggplot(all_sentiment, aes(date, score)) + geom_smooth() + geom_point()+ facet_grid(~advertiser)
# Plot sentiment over time and split by advertiser
ggplot(all_sentiment, aes(date, score, color=score, text = post, score = score)) + geom_point() + geom_smooth() +
facet_wrap(~candidate) +
scale_color_gradient(low= "red", high="blue")+
ggtitle("Sentiment of Beto O'Rourke and Ted Cruz Facebook ads over time")
betocruzsent <- ggplot(all_sentiment, aes(date, score, color=score, text = post, score = score)) + geom_point() + geom_smooth() +
facet_wrap(~candidate) +
scale_color_gradient(low= "red", high="blue")+
ggtitle("Sentiment of Beto O'Rourke and Ted Cruz Facebook ads over time")
ggplotly(betocruzsent, tooltip=c("text","word"))
# Sentiment of overall words and by advertiser
betocruz_mentions$post <- betocruz_mentions$message
tokenized_all_ads <- betocruz_mentions %>%
select(paid_for_by, advertiser, post, candidate, date, impressions, title, message) %>%
unnest_tokens(word, message) %>%
anti_join(stop_words) %>%
group_by(word, paid_for_by, advertiser, date, post, impressions, candidate, title) %>%
tally() %>%
arrange(desc(n))
tokenized_all_ads %>% glimpse()
sentiments <- read.csv("labMT2english.csv", sep ="\t" )
labMT <- sentiments %>%
select(word, happs)
all_ads_sentiment <- tokenized_all_ads %>%
inner_join(labMT, by = "word") %>%
group_by(post, candidate, paid_for_by, advertiser, date, impressions, title) %>%
summarize(sentiment = mean(happs)) %>%
arrange(desc(sentiment)) %>%
mutate("score" = sentiment-5.372)
all_ads_sentiment %>% glimpse()
sent_advertiser <- all_ads_sentiment %>%
group_by(advertiser) %>%
summarize("avgsent" = mean(score)) %>%
arrange(desc(avgsent))
sent_advertiser %>% glimpse()
# Plot of mean sentiment by advertiser
ggplot(sent_advertiser, aes(reorder(advertiser, avgsent), avgsent, fill=avgsent >0)) +
geom_bar(stat = "identity") +
scale_fill_manual(values=c("red", "blue")) +
theme(legend.position = 'none') +
ylab("Average sentiment") +
xlab("") +
coord_flip()
ggplot(all_ads_sentiment, aes(date, score, color=score, text = post, score = score)) + geom_point() + geom_smooth() +
facet_wrap(~candidate) +
geom_smooth() +
scale_color_gradient(low= "red", high="blue")+
ggtitle("Sentiment of Beto O'Rourke and Ted Cruz Facebook ads over time")
View(all_ads_sentiment %>% filter(score < -0.5) %>% glimpse())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment