Skip to content

Instantly share code, notes, and snippets.

@ikashnitsky
Last active July 19, 2022 16:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ikashnitsky/86569f654ccc4ddd42336cac38db5789 to your computer and use it in GitHub Desktop.
Save ikashnitsky/86569f654ccc4ddd42336cac38db5789 to your computer and use it in GitHub Desktop.
#===============================================================================
# 2020-11-21 -- twitter
# B census in replies to the joke tweet
# https://twitter.com/rafaelotinoco/status/1329622292507267073
# Ilya Kashnitsky, ilya.kashnitsky@gmail.com
#===============================================================================
# analyse the first surname letter of those replied to the meme
# the prevalence of simila jokes from B people was so obvious
library(tidyverse)
library(magrittr)
library(stringi)
library(rtweet)
library(ggdark)
library(hrbrthemes)
# for replicability the export is done at this time
##------ Sat Nov 21 19:10:05 2020 ------##
df <- search_tweets("url:1329622292507267073", n = 2000)
save(df, file = "tweets.rda")
# filter quoting tweets
df_quo <- df %>% filter(is_quote == T)
# get the info in the quoting authors names
quo_auth <- df_quo %>% users_data()
# tally first letter of the surnames
df_name <- quo_auth %>%
select(name) %>%
mutate(
name = name %>% str_to_lower() %>%
stri_enc_toascii() %>%
# remove text in parentheses
str_remove("\\s*\\([^\\)]+\\)") %>%
# remove hasgtags
str_remove("(?<=^|\\s)#[^\\s]+") %>%
#remove "jr"
str_remove(" jr")
) %>%
# remove the common ", phd"
separate(name, sep = ",", into = c("one", "two")) %>%
select("one") %>%
# get the last word in the name string ~surname
mutate(name = one %>% str_extract("[^ ]+$")) %>%
# if NA coppy from name
transmute(surname = case_when(is.na(name)~one, TRUE~name)) %>%
# GET THE FIRST LETTER OF THE SURNAME
mutate(first = surname %>% str_sub(1, 1)) %>%
# calculate letters freq
group_by(first) %>%
summarise(n = n()) %>%
ungroup()
# get the dataset for plotting
df_plot <- tibble(LETTERS) %>%
mutate(first = LETTERS %>% str_to_lower) %>%
left_join(df_name) %>%
mutate(prop = n %>% prop.table())
# visualize
df_plot %>%
ggplot(aes(LETTERS, prop))+
geom_col(color = NA, fill= "orange", width = .75)+
scale_y_percent()+
labs(
title = "% of quote tweets",
x = "First letter of the quote tweet author last name",
y = NULL
)+
dark_theme_minimal(base_family = "Roboto Slab")+
theme(
axis.text.x = element_text(
size = 14, face = 2, colour = c("white", "orange", rep("white", 24))
)
)+
annotate(
"text", x = 4, y = .3, hjust = 0, vjust = 1,
color = "orange", size = 7, family = "Roboto Slab",
label = 'The Twitter census of (B)est jokes'
)+
annotate(
"text", x = 4, y = .23, hjust = 0, vjust = 1,
color = "white", size = 5, family = "Roboto Slab",
label = "A good joke is always unexpected, isn't it?"
)
ggsave(filename = "out.png", width = 6, height = 3.375)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment