Created
March 29, 2022 22:05
-
-
Save rodolfoalmeida01/b9b18b0fa817bc72e832136847c1b883 to your computer and use it in GitHub Desktop.
Código para classificar contas de TikTok e gerar gráficos
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(plotly) | |
library(ggrepel) | |
# LENDO DADOS ---------------------------------------------------------------- | |
# DISPONÍVEIS EM https://www.kaggle.com/moraesvic/tiktok-data-from-most-followed-accounts-2022 | |
tt_data <- read_csv("~/tt_data.csv") | |
# CLASSIFICANDO ------------------------------------------------------- | |
# Classifica categoria das contas | |
tt_data <- tt_data %>% | |
mutate(categoria = case_when(grepl("@happydogs2021", account_name, ignore.case = F) ~ "Animais", | |
grepl("@avani", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@tiktok", account_name, ignore.case = F) ~ "Institucional", | |
grepl("@anokhinalz", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@babyariel", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@bellapoarch", account_name, ignore.case = F) ~ "Artista", | |
grepl("@brentrivera", account_name, ignore.case = F) ~ "Youtuber", | |
grepl("@charlidamelio", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@briandadeyanara", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@bts_official_bighit", account_name, ignore.case = F) ~ "Artista", | |
grepl("@carlosferiag", account_name, ignore.case = F) ~ "Youtuber", | |
grepl("@cznburak", account_name, ignore.case = F) ~ "Comida", | |
grepl("@darianrojasc", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@dixiedamelio", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@dobretwins", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@domelipa", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@elrodcontreras", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@gilmhercroes", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@homm9k", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@gordonramsayofficial", account_name, ignore.case = F) ~ "Comida", | |
grepl("@itsjojosiwa", account_name, ignore.case = F) ~ "Artista", | |
grepl("@jamescharles", account_name, ignore.case = F) ~ "Maquiagem", | |
grepl("@junya1gou", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@jasonderulo", account_name, ignore.case = F) ~ "Artista", | |
grepl("@joealbanese", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@khaby.lame", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@addisonre", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@justmaiko", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@kimberly.loaiza", account_name, ignore.case = F) ~ "Artista", | |
grepl("@kallmekris", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@kylethomas", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@kyliejenner", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@juliamenugarcia", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@daviddobrik", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@adrilatinatv", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@kikakiim", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@kingbach", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@karolg", account_name, ignore.case = F) ~ "Artista", | |
grepl("@era_ays", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@spencerx", account_name, ignore.case = F) ~ "Artista", | |
grepl("@thekiryalife", account_name, ignore.case = F) ~ "Atleta", | |
grepl("@donaldducc", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@mr_faisu_07", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@benjikrol", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@camilo", account_name, ignore.case = F) ~ "Artista", | |
grepl("@sameeksha_sud", account_name, ignore.case = F) ~ "Artista", | |
grepl("@topperguild", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@psg", account_name, ignore.case = F) ~ "Institucional", | |
grepl("@xoteam", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@scottsreality", account_name, ignore.case = F) ~ "Comida", | |
grepl("@nishaguragain", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@awezdarbar", account_name, ignore.case = F) ~ "Artista", | |
grepl("@kodyantle", account_name, ignore.case = F) ~ "Animais", | |
grepl("@riyaz.14", account_name, ignore.case = F) ~ "Influencer", | |
grepl("@tirullipa", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@wigofellas", account_name, ignore.case = F) ~ "Comédia", | |
grepl("@arianagrande", account_name, ignore.case = F) ~ "Artista")) | |
# Classifica nacionalidade das contas | |
tt_data <- tt_data %>% | |
mutate(nacionalidade = case_when(grepl("@happydogs2021", account_name, ignore.case = F) ~ "NA", | |
grepl("@avani", account_name, ignore.case = F) ~ "EUA", | |
grepl("@tiktok", account_name, ignore.case = F) ~ "China", | |
grepl("@anokhinalz", account_name, ignore.case = F) ~ "Rússia", | |
grepl("@babyariel", account_name, ignore.case = F) ~ "EUA", | |
grepl("@bellapoarch", account_name, ignore.case = F) ~ "Filipinas", | |
grepl("@brentrivera", account_name, ignore.case = F) ~ "EUA", | |
grepl("@charlidamelio", account_name, ignore.case = F) ~ "EUA", | |
grepl("@briandadeyanara", account_name, ignore.case = F) ~ "México", | |
grepl("@bts_official_bighit", account_name, ignore.case = F) ~ "Coreia do Sul", | |
grepl("@carlosferiag", account_name, ignore.case = F) ~ "Colômbia", | |
grepl("@cznburak", account_name, ignore.case = F) ~ "Turquia", | |
grepl("@darianrojasc", account_name, ignore.case = F) ~ "México", | |
grepl("@dixiedamelio", account_name, ignore.case = F) ~ "EUA", | |
grepl("@dobretwins", account_name, ignore.case = F) ~ "EUA", | |
grepl("@domelipa", account_name, ignore.case = F) ~ "México", | |
grepl("@elrodcontreras", account_name, ignore.case = F) ~ "México", | |
grepl("@gilmhercroes", account_name, ignore.case = F) ~ "EUA", | |
grepl("@homm9k", account_name, ignore.case = F) ~ "Cazaquistão", | |
grepl("@gordonramsayofficial", account_name, ignore.case = F) ~ "Escócia", | |
grepl("@itsjojosiwa", account_name, ignore.case = F) ~ "EUA", | |
grepl("@jamescharles", account_name, ignore.case = F) ~ "EUA", | |
grepl("@junya1gou", account_name, ignore.case = F) ~ "Japão", | |
grepl("@jasonderulo", account_name, ignore.case = F) ~ "EUA", | |
grepl("@joealbanese", account_name, ignore.case = F) ~ "EUA", | |
grepl("@khaby.lame", account_name, ignore.case = F) ~ "Senegal", | |
grepl("@addisonre", account_name, ignore.case = F) ~ "EUA", | |
grepl("@justmaiko", account_name, ignore.case = F) ~ "EUA", | |
grepl("@kimberly.loaiza", account_name, ignore.case = F) ~ "México", | |
grepl("@kallmekris", account_name, ignore.case = F) ~ "Canadá", | |
grepl("@kylethomas", account_name, ignore.case = F) ~ "Inglaterra", | |
grepl("@kyliejenner", account_name, ignore.case = F) ~ "EUA", | |
grepl("@juliamenugarcia", account_name, ignore.case = F) ~ "Espanha", | |
grepl("@daviddobrik", account_name, ignore.case = F) ~ "Eslováquia", | |
grepl("@adrilatinatv", account_name, ignore.case = F) ~ "Colômbia", | |
grepl("@kikakiim", account_name, ignore.case = F) ~ "Cazaquistão", | |
grepl("@kingbach", account_name, ignore.case = F) ~ "Canadá", | |
grepl("@karolg", account_name, ignore.case = F) ~ "Colômbia", | |
grepl("@era_ays", account_name, ignore.case = F) ~ "Rússia", | |
grepl("@spencerx", account_name, ignore.case = F) ~ "EUA", | |
grepl("@thekiryalife", account_name, ignore.case = F) ~ "Rússia", | |
grepl("@donaldducc", account_name, ignore.case = F) ~ "EUA", | |
grepl("@mr_faisu_07", account_name, ignore.case = F) ~ "Índia", | |
grepl("@benjikrol", account_name, ignore.case = F) ~ "Brasil", | |
grepl("@camilo", account_name, ignore.case = F) ~ "Colômbia", | |
grepl("@sameeksha_sud", account_name, ignore.case = F) ~ "Índia", | |
grepl("@topperguild", account_name, ignore.case = F) ~ "EUA", | |
grepl("@psg", account_name, ignore.case = F) ~ "França", | |
grepl("@xoteam", account_name, ignore.case = F) ~ "Rússia", | |
grepl("@scottsreality", account_name, ignore.case = F) ~ "Holanda", | |
grepl("@nishaguragain", account_name, ignore.case = F) ~ "Índia", | |
grepl("@awezdarbar", account_name, ignore.case = F) ~ "Índia", | |
grepl("@kodyantle", account_name, ignore.case = F) ~ "EUA", | |
grepl("@riyaz.14", account_name, ignore.case = F) ~ "Butão", | |
grepl("@tirullipa", account_name, ignore.case = F) ~ "Brasil", | |
grepl("@wigofellas", account_name, ignore.case = F) ~ "EUA", | |
grepl("@arianagrande", account_name, ignore.case = F) ~ "EUA")) | |
# PLOTANDO ------------------------------------------------------------------- | |
# TIKTOKS - COMMENTS x LIKES | |
tt_data %>% | |
ggplot() + | |
aes(x=comment_count, y=digg_count, label=account_name, color=categoria, text=description) + | |
geom_point() -> p | |
ggplotly(p) | |
# CONTAS - COMMENTS x LIKES | |
tt_data %>% | |
group_by(account_name, categoria) %>% | |
summarise(likes=sum(digg_count), comments=sum(comment_count)) %>% | |
ggplot() + | |
aes(x=comments, y=likes, label=account_name, color=categoria) + | |
geom_text_repel(max.overlaps=15) + | |
geom_point() -> p | |
ggplotly(p) | |
# CATEGORIAS E CONTAS - LIKES | |
tt_data %>% | |
group_by(account_name,categoria) %>% | |
summarise(likes=sum(digg_count)) %>% | |
ggplot() + | |
aes(x=reorder(categoria,likes), y=likes, color=categoria, label=account_name) + | |
geom_point() + | |
geom_text_repel(max.overlaps=10) + | |
coord_flip() + | |
theme_minimal() -> p | |
ggplotly(p) | |
# CATEGORIAS - LIKES | |
tt_data %>% | |
group_by(account_name, categoria) %>% | |
summarise(likes=sum(digg_count)) %>% | |
ggplot() + | |
aes(x=reorder(categoria,likes, FUN=sum), y=likes, fill=reorder(account_name,likes,FUN=sum)) + | |
geom_col() + | |
coord_flip() + | |
theme_minimal() | |
# CATEGORIAS E NACIONALIDADES | |
tt_data %>% | |
group_by(account_name, categoria, nacionalidade) %>% | |
summarise(count = n()) %>% | |
group_by(categoria, nacionalidade) %>% | |
summarise(count = n()) %>% | |
ggplot() + | |
aes(x=reorder(nacionalidade, count, FUN=sum), y=count, fill=categoria) + | |
geom_col() + | |
coord_flip() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment