Last active
July 19, 2022 16:10
-
-
Save ikashnitsky/86569f654ccc4ddd42336cac38db5789 to your computer and use it in GitHub Desktop.
First letter of the quote tweet author last name who replied to https://twitter.com/rafaelotinoco/status/1329622292507267073 – https://twitter.com/ikashnitsky/status/1330233541674885123
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#=============================================================================== | |
# 2020-11-21 -- twitter | |
# B census in replies to the joke tweet | |
# https://twitter.com/rafaelotinoco/status/1329622292507267073 | |
# Ilya Kashnitsky, ilya.kashnitsky@gmail.com | |
#=============================================================================== | |
# analyse the first surname letter of those replied to the meme | |
# the prevalence of simila jokes from B people was so obvious | |
library(tidyverse) | |
library(magrittr) | |
library(stringi) | |
library(rtweet) | |
library(ggdark) | |
library(hrbrthemes) | |
# for replicability the export is done at this time | |
##------ Sat Nov 21 19:10:05 2020 ------## | |
df <- search_tweets("url:1329622292507267073", n = 2000) | |
save(df, file = "tweets.rda") | |
# filter quoting tweets | |
df_quo <- df %>% filter(is_quote == T) | |
# get the info in the quoting authors names | |
quo_auth <- df_quo %>% users_data() | |
# tally first letter of the surnames | |
df_name <- quo_auth %>% | |
select(name) %>% | |
mutate( | |
name = name %>% str_to_lower() %>% | |
stri_enc_toascii() %>% | |
# remove text in parentheses | |
str_remove("\\s*\\([^\\)]+\\)") %>% | |
# remove hasgtags | |
str_remove("(?<=^|\\s)#[^\\s]+") %>% | |
#remove "jr" | |
str_remove(" jr") | |
) %>% | |
# remove the common ", phd" | |
separate(name, sep = ",", into = c("one", "two")) %>% | |
select("one") %>% | |
# get the last word in the name string ~surname | |
mutate(name = one %>% str_extract("[^ ]+$")) %>% | |
# if NA coppy from name | |
transmute(surname = case_when(is.na(name)~one, TRUE~name)) %>% | |
# GET THE FIRST LETTER OF THE SURNAME | |
mutate(first = surname %>% str_sub(1, 1)) %>% | |
# calculate letters freq | |
group_by(first) %>% | |
summarise(n = n()) %>% | |
ungroup() | |
# get the dataset for plotting | |
df_plot <- tibble(LETTERS) %>% | |
mutate(first = LETTERS %>% str_to_lower) %>% | |
left_join(df_name) %>% | |
mutate(prop = n %>% prop.table()) | |
# visualize | |
df_plot %>% | |
ggplot(aes(LETTERS, prop))+ | |
geom_col(color = NA, fill= "orange", width = .75)+ | |
scale_y_percent()+ | |
labs( | |
title = "% of quote tweets", | |
x = "First letter of the quote tweet author last name", | |
y = NULL | |
)+ | |
dark_theme_minimal(base_family = "Roboto Slab")+ | |
theme( | |
axis.text.x = element_text( | |
size = 14, face = 2, colour = c("white", "orange", rep("white", 24)) | |
) | |
)+ | |
annotate( | |
"text", x = 4, y = .3, hjust = 0, vjust = 1, | |
color = "orange", size = 7, family = "Roboto Slab", | |
label = 'The Twitter census of (B)est jokes' | |
)+ | |
annotate( | |
"text", x = 4, y = .23, hjust = 0, vjust = 1, | |
color = "white", size = 5, family = "Roboto Slab", | |
label = "A good joke is always unexpected, isn't it?" | |
) | |
ggsave(filename = "out.png", width = 6, height = 3.375) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment