ryanburge/ct_scrapes_trump.R

## ct_scrapes_trump.R
library(rtweet)
library(socsci)

## Scraping and Joining ####

rt <- search_tweets(
  "https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
)


rt1 <- search_tweets(
  "https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
)

rt2 <- search_tweets(
  "https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
)


rt3 <- search_tweets(
  "https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
)

rt4 <- search_tweets(
  "https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
)


rt5 <- search_tweets(
  "https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
)


rt6 <- search_tweets(
  "https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
)


rt7 <- search_tweets(
  "https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
)


all <- bind_rows(rt, rt1, rt2, rt3, rt4, rt5, rt6)

all <- all %>% distinct(status_id, .keep_all = TRUE)


## Tweet Volume ####

all$date <- date(all$created_at)

all$date2 <- round_date(all$created_at, "1 mins")

graph <- all %>%
  group_by(date2) %>%
  count()

graph %>%
  ggplot(., aes(date2, y = n, fill = n)) +
  scale_fill_gradient(low = "#AAB0B1", high = "#E11A23") +
  geom_col() +
  theme_gg("Abel") +
  labs(x = "Greenwich Mean Time", y = "Tweets per Minute", title = "Volume of Tweets about the CT Editorial", caption = "@ryanburge\nData: Twitter REST API") +
  ggsave("E://vel_ct.png", type = "cairo-png")


## Sentiment Analysis ####

reg_words <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"
tidy_tweets <- all %>%
  filter(!str_detect(text, "^RT")) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|http://[A-Za-z\\d]+|&amp;|&lt;|&gt;|RT|https", "")) %>%
  unnest_tokens(word, text) %>%
  filter(!word %in% stop_words$word,
         str_detect(word, "[a-z]"))

afinn <- get_sentiments("afinn")

tidy_tweets <- tidy_tweets %>%
  inner_join(afinn)

fin1 <- tidy_tweets %>%
  filter(value > 0) %>%
  group_by(date2) %>%
  summarise(sum = sum(value)) %>%
  mutate(type = "Positive")

fin2 <- tidy_tweets %>%
  filter(value < 0) %>%
  group_by(date2) %>%
  summarise(sum = sum(value)) %>%
  mutate(type = "Negative")

fin3 <- tidy_tweets %>%
  group_by(date2) %>%
  summarise(sum = sum(value)) %>%
  mutate(type = "Overall")

fin <- bind_rows(fin1, fin2, fin3)

fin %>%
  ggplot(., aes(x = date2, y = sum, color = type, group = type)) +
  geom_point(size=3, color="white") +
  geom_point(size=2, shape=1) +
  geom_point(size=1, shape=19) +
  scale_color_manual(values = c("#D51B1E", "#AAB0B1", "navyblue")) +
  geom_smooth(se = FALSE, linetype = "twodash") +
  theme_gg("Abel") +
  theme(legend.position = "bottom") +
  labs(x = "Greenwich Mean Time", y = "Overall Sentiment", title = "The Sentiment of Tweets About the CT Editorial", caption = "@ryanburge\nData: Twitter REST API") +
  ggsave("E://sentiment_CT.png", type = "cairo-png")


graph <- tidy_tweets %>%
  filter(value < 0) %>%
  ct(word) %>%
  arrange(-n) %>%
  top_n(25)

graph %>%
  filter(word != "shit") %>%
  ggplot(., aes(x = reorder(word, n), y = n, fill = n)) +
  geom_col(color = "black") +
  coord_flip() +
  theme_gg("Abel") +
  scale_fill_gradient(low = "#AAB0B1", high = "#D51B1E") +
  labs(x = "", y = "", title = "Most Used Negative Words", caption = "@ryanburge\nData: Twitter REST API") +
  ggsave("E://neg_words.png", type = "cairo-png")


graph <- tidy_tweets %>%
  filter(value > 0) %>%
  ct(word) %>%
  arrange(-n) %>%
  top_n(25)

graph %>%
  filter(word != "shit") %>%
  ggplot(., aes(x = reorder(word, n), y = n, fill = n)) +
  geom_col(color = "black") +
  coord_flip() +
  theme_gg("Abel") +
  scale_fill_gradient(low = "#AAB0B1", high = "navyblue") +
  labs(x = "", y = "", title = "Most Used Positive Words", caption = "@ryanburge\nData: Twitter REST API") +
  ggsave("E://pos_words.png", type = "cairo-png")
	library(rtweet)
	library(socsci)

	## Scraping and Joining ####

	rt <- search_tweets(
	"https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
	)


	rt1 <- search_tweets(
	"https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
	)

	rt2 <- search_tweets(
	"https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
	)


	rt3 <- search_tweets(
	"https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
	)

	rt4 <- search_tweets(
	"https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
	)


	rt5 <- search_tweets(
	"https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
	)


	rt6 <- search_tweets(
	"https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
	)


	rt7 <- search_tweets(
	"https://www.christianitytoday.com/ct/2019/december-web-only/trump-should-be-removed-from-office.html", n = 18000, include_rts = TRUE
	)



	all <- bind_rows(rt, rt1, rt2, rt3, rt4, rt5, rt6)

	all <- all %>% distinct(status_id, .keep_all = TRUE)


	## Tweet Volume ####

	all$date <- date(all$created_at)

	all$date2 <- round_date(all$created_at, "1 mins")

	graph <- all %>%
	group_by(date2) %>%
	count()

	graph %>%
	ggplot(., aes(date2, y = n, fill = n)) +
	scale_fill_gradient(low = "#AAB0B1", high = "#E11A23") +
	geom_col() +
	theme_gg("Abel") +
	labs(x = "Greenwich Mean Time", y = "Tweets per Minute", title = "Volume of Tweets about the CT Editorial", caption = "@ryanburge\nData: Twitter REST API") +
	ggsave("E://vel_ct.png", type = "cairo-png")


	## Sentiment Analysis ####

	reg_words <- "([^A-Za-z_\\d#@']\|'(?![A-Za-z_\\d#@]))"
	tidy_tweets <- all %>%
	filter(!str_detect(text, "^RT")) %>%
	mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+\|http://[A-Za-z\\d]+\|&\|<\|>\|RT\|https", "")) %>%
	unnest_tokens(word, text) %>%
	filter(!word %in% stop_words$word,
	str_detect(word, "[a-z]"))

	afinn <- get_sentiments("afinn")

	tidy_tweets <- tidy_tweets %>%
	inner_join(afinn)

	fin1 <- tidy_tweets %>%
	filter(value > 0) %>%
	group_by(date2) %>%
	summarise(sum = sum(value)) %>%
	mutate(type = "Positive")

	fin2 <- tidy_tweets %>%
	filter(value < 0) %>%
	group_by(date2) %>%
	summarise(sum = sum(value)) %>%
	mutate(type = "Negative")

	fin3 <- tidy_tweets %>%
	group_by(date2) %>%
	summarise(sum = sum(value)) %>%
	mutate(type = "Overall")

	fin <- bind_rows(fin1, fin2, fin3)

	fin %>%
	ggplot(., aes(x = date2, y = sum, color = type, group = type)) +
	geom_point(size=3, color="white") +
	geom_point(size=2, shape=1) +
	geom_point(size=1, shape=19) +
	scale_color_manual(values = c("#D51B1E", "#AAB0B1", "navyblue")) +
	geom_smooth(se = FALSE, linetype = "twodash") +
	theme_gg("Abel") +
	theme(legend.position = "bottom") +
	labs(x = "Greenwich Mean Time", y = "Overall Sentiment", title = "The Sentiment of Tweets About the CT Editorial", caption = "@ryanburge\nData: Twitter REST API") +
	ggsave("E://sentiment_CT.png", type = "cairo-png")



	graph <- tidy_tweets %>%
	filter(value < 0) %>%
	ct(word) %>%
	arrange(-n) %>%
	top_n(25)

	graph %>%
	filter(word != "shit") %>%
	ggplot(., aes(x = reorder(word, n), y = n, fill = n)) +
	geom_col(color = "black") +
	coord_flip() +
	theme_gg("Abel") +
	scale_fill_gradient(low = "#AAB0B1", high = "#D51B1E") +
	labs(x = "", y = "", title = "Most Used Negative Words", caption = "@ryanburge\nData: Twitter REST API") +
	ggsave("E://neg_words.png", type = "cairo-png")


	graph <- tidy_tweets %>%
	filter(value > 0) %>%
	ct(word) %>%
	arrange(-n) %>%
	top_n(25)

	graph %>%
	filter(word != "shit") %>%
	ggplot(., aes(x = reorder(word, n), y = n, fill = n)) +
	geom_col(color = "black") +
	coord_flip() +
	theme_gg("Abel") +
	scale_fill_gradient(low = "#AAB0B1", high = "navyblue") +
	labs(x = "", y = "", title = "Most Used Positive Words", caption = "@ryanburge\nData: Twitter REST API") +
	ggsave("E://pos_words.png", type = "cairo-png")