This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dicionario_analise = {'Python' : len(re.findall('python', text_analise, re.IGNORECASE))} | |
dicionario_analise['JavaScript'] = len(re.findall('js', text_analise, re.IGNORECASE)) | |
dicionario_analise['SQL'] = len(re.findall('SQL', text_analise, re.IGNORECASE)) | |
dicionario_analise['Java'] = len(re.findall('Java', text_analise, re.IGNORECASE)) | |
dicionario_analise['Shell'] = len(re.findall('Shell', text_analise, re.IGNORECASE)) | |
dicionario_analise['C#'] = len(re.findall('C#', text_analise, re.IGNORECASE)) | |
dicionario_analise['C++'] = len(re.findall('C\+\+', text_analise, re.IGNORECASE)) | |
dicionario_analise['C'] = len(re.findall('\sC\s', text_analise, re.IGNORECASE)) | |
dicionario_analise['React'] = len(re.findall('React', text_analise, re.IGNORECASE)) | |
dicionario_analise['Angular'] = len(re.findall('Angular', text_analise, re.IGNORECASE)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
stopwords = set(STOPWORDS) | |
stopwords.update(["ra", "ga", "na", "ani", "em", "ki", "ah","ha","la","eh","ne","le","ni","lo","Ma","Haa","ni"]) | |
# Gerar uma imagem wordcloud | |
wordcloud = WordCloud(stopwords=stopwords, background_color="black", width=1280, height=720).generate(text) | |
plt.figure(figsize=(12.8, 7.2)) | |
plt.imshow(wordcloud, interpolation='bilinear') | |
plt.axis("off") | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.express as px | |
fig = px.pie(emoji_df, values='count', names='emoji') | |
fig.update_traces(textposition='inside', textinfo='percent+label') | |
fig.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
total_emojis = list([a for b in mensagens_df.emoji for a in b]) | |
emoji_dict = dict(Counter(total_emojis)) | |
emoji_dict = sorted(emoji_dict.items(), key=lambda x: x[1], reverse=True) | |
emoji_df = pd.DataFrame(emoji_dict, columns=['emoji', 'count']) | |
emoji_df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Cria uma lista com os autores únicos | |
l = mensagens_df.autor.unique() | |
for i in range(len(l)): | |
# Filtra mensagens de um autor específico | |
req_df = mensagens_df[mensagens_df["autor"] == l[i]] | |
print(f'Estatísticas de {l[i]}') | |
print('Mensagens enviadas:', req_df.shape[0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
media_mensagens_df = df[df['mensagem'] == '<Arquivo de mídia oculto>'] | |
mensagens_df = df.drop(media_mensagens_df.index) | |
apagadas_mensagens_df = mensagens_df[mensagens_df['mensagem'] == 'Essa mensagem foi apagada'] | |
mensagens_df = mensagens_df.drop(apagadas_mensagens_df.index) | |
mensagens_df['cont_letras'] = mensagens_df['mensagem'].apply(lambda s : len(s)) | |
mensagens_df['cont_palavras'] = mensagens_df['mensagem'].apply(lambda s : len(s.split(' '))) | |
mensagens_df["cont_mensagem"]=1 | |
mensagens_df["cont_emoji"]= df['emoji'].str.len() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
total_mensagens = df.shape[0] | |
media_mensagens = df[df['mensagem'] == '<Arquivo de mídia oculto>'].shape[0] | |
emojis = sum(df['emoji'].str.len()) | |
links = np.sum(df.urlcount) | |
print("Contagem geral dos dados do grupo: ") | |
print("Mensagem:",total_mensagens) | |
print("Mídia:",media_mensagens) | |
print("Emojis:",emojis) | |
print("Links:",links) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def split_count(text): | |
emoji_list = [] | |
data = regex.findall(r'\X', text) | |
for word in data: | |
if any(char in emoji.UNICODE_EMOJI for char in word): | |
emoji_list.append(word) | |
return emoji_list | |
df["emoji"] = df["mensagem"].apply(split_count) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = pd.DataFrame(dadoAnalisado, columns=['data', 'hora', 'autor', 'mensagem']) | |
df["data"] = pd.to_datetime(df["data"]) | |
df = df.dropna() | |
df.head(5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dadoAnalisado = [] | |
conversationPath = '/content/dev_chat_analise.txt' | |
with open(conversationPath, encoding="utf-8") as fp: | |
fp.readline() | |
mensagemBuffer = [] | |
data, hora, autor, mensagem = None, None, None, "" | |
while True: | |
linha = fp.readline() | |
if not linha: | |
break |