Skip to content

Instantly share code, notes, and snippets.

@sergiospagnuolo
Created August 12, 2021 00:09
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sergiospagnuolo/336d52b260fbc084b21c65c3ff5b6381 to your computer and use it in GitHub Desktop.
Save sergiospagnuolo/336d52b260fbc084b21c65c3ff5b6381 to your computer and use it in GitHub Desktop.
Gist com análise do tiktok
library(tidyverse)
library(stringr)
# CRIA LISTA DE CSVS
# full.names = F para tirar nome do diretorio
csvs <- list.files("20210811", pattern = "*.csv", full.names = FALSE)
setwd("20210811")
# CRIA TABELAO COM COLUNA DE NOME DE ARQUIVOS
tabelao = tibble(File = csvs) %>%
extract(File, "hash", remove = FALSE) %>%
mutate(Data = lapply(File, read_csv)) %>%
unnest(Data) %>%
select(-File)
setwd("../")
# CONSERTA DATAS DO FORMATO UNIX PRA ALGO MAIS HUMANO
tabelao$data <- as.Date(as.POSIXct(tabelao$createTime, origin="1970-01-01"))
# diggCount = likes
df <- tabelao %>%
select(hash, id, data, text, authorMeta.name, authorMeta.verified, authorMeta.following, authorMeta.fans, webVideoUrl, musicMeta.musicAuthor, musicMeta.musicOriginal, videoMeta.duration, diggCount, shareCount, playCount, mentions, hashtags)
# distinct(id, ..., .keep_all = FALSE)
videos <- df %>%
group_by(hash) %>%
count() %>%
arrange(desc(n))
videos_uniques <- df %>%
group_by(id) %>%
count() %>%
arrange(desc(n))
plays <- df %>%
group_by(hash) %>%
summarise(median_plays = median(playCount)) %>%
arrange(desc(median_plays))
likes <- df %>%
group_by(hash) %>%
summarise(median_likes = median(diggCount)) %>%
arrange(desc(median_likes))
concat <- left_join(likes, plays, by = "hash") %>% left_join(., videos, by = "hash")
library(clipr)
write_clip(concat)
mentions <- df %>%
group_by(hash) %>%
count(mentions) %>%
arrange(desc(n))
stats_base <- df %>%
summarise(data_min = min(data),
data_max = max(data),
plays_min = min(playCount),
plays_max = max(playCount),
likes_min = min(diggCount),
likes_max = max(diggCount)
)
# concatena dados com categorias manuais em df separado
concat_categorias <- left_join(df, d, by = "hash")
militarismo <- k %>%
select(text, webVideoUrl, categoria) %>%
filter(categoria == "policial/militar")
# POPULARIDADE
k <- concat_categorias
#d$data_publ <- lubridate::as_date(d$`Post Created Date`, format="%Y-%m-%d")
contagem <- k %>%
filter(categoria != "memes") %>%
filter(categoria != "esporte") %>%
filter(data >= "2020-01-01") %>%
mutate(ano_mes = format(data, "%Y-%m")) %>%
group_by(categoria,ano_mes) %>%
count(data)
contagem$rede <- "TikTok"
contagem %>%
drop_na() %>%
ggplot(aes(ano_mes, n, fill = categoria, colour = categoria)) + geom_bar(stat = "identity", position = "dodge")
# mediana
mediana_plays <- k %>%
filter(categoria != "memes") %>%
filter(categoria != "esporte") %>%
filter(data >= "2020-01-01") %>%
mutate(ano_mes = format(data, "%Y-%m")) %>%
group_by(categoria,ano_mes) %>%
summarise(mediana = median(playCount))
mediana_plays %>%
drop_na() %>%
ggplot(aes(ano_mes, mediana, fill = categoria, colour = categoria)) + geom_bar(stat = "identity", position = "dodge")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment