Skip to content

Instantly share code, notes, and snippets.

@fernandobarbalho
Created November 17, 2022 18:14
Show Gist options
  • Save fernandobarbalho/5acd4575620bff81036250d44b82ef80 to your computer and use it in GitHub Desktop.
Save fernandobarbalho/5acd4575620bff81036250d44b82ef80 to your computer and use it in GitHub Desktop.
library(stringr)
library(purrr)
library(readr)
file_txt<- "<Nome_do_arquivo_baixado.txt>"
conversa_txt <- readLines(file(file_txt , encoding = "UTF8"))
id_linhas_sujas<-
which(str_detect(str_sub(conversa_txt,1,10),
"[:digit:][:digit:]/[:digit:][:digit:]/[:digit:][:digit:][:digit:][:digit:]")==FALSE)
for (id_row in id_linhas_sujas[length(id_linhas_sujas):1]) {
linha<- id_row
print(linha)
if(str_length(conversa_txt[linha])==0){
next
}
if (str_length(conversa_txt[linha-1])==0){
conversa_txt[linha-2] = str_c(conversa_txt[linha-2],
conversa_txt[linha])
print(conversa_txt[linha-2])
}
if (str_length(conversa_txt[linha-1])!=0){
conversa_txt[linha-1] = str_c(conversa_txt[linha-1],
conversa_txt[linha])
print(conversa_txt[linha-1])
}
}
conversa_tratada<- conversa_txt[-id_linhas_sujas]
df_mensagens<-
purrr::map_dfr(4:length(conversa_tratada),function(id_row){
print(id_row)
if(stringr::str_detect(conversa_tratada[id_row],
"entrou usando o link de convite deste grupo")){
return()
}
if(stringr::str_length(conversa_tratada[id_row])==0){
trata_linha_vazia<- TRUE
return()
}
linha_limpa<- str_replace(conversa_tratada[id_row]," - ",";")
datahora_separada<-
str_split(linha_limpa,pattern = ";")
usuario_separado<-
str_split(datahora_separada[[1]][2], pattern = "(?<!https):")
tibble::tibble(
datahora= datahora_separada[[1]][1],
usuario= usuario_separado[[1]][1],
texto = str_squish(usuario_separado[[1]][2])
)
})
df_mensagens%>%
readr::write_csv("mensagens_whats_app.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment