Skip to content

Instantly share code, notes, and snippets.

View davisvictorns's full-sized avatar

Davis Nascimento davisvictorns

  • Infoprime Sistemas
View GitHub Profile
premier_league_all.groupby(by=["season", "team"]).mean().unstack()
#function to get points in percentage
def percentage(val):
return (val / (38*3)) * 100
fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(26, 5))
#array with needed columns
itens = ["position", "points", "goals_for", "goals_against"]
premier_league_all_grouped = premier_league_all.groupby(by=["season", "team"]).mean().unstack()
!pip install emoji
import re
import regex
import pandas as pd
import numpy as np
import emoji
import plotly.express as px
from collections import Counter
import matplotlib.pyplot as plt
from os import path
from PIL import Image
! git clone https://github.com/amueller/word_cloud.git
% cd word_cloud
! pip install .
def iniciaDataHora(s):
padrao = '^(\d{2})(\/)(\d{2})(\/)(\d{4}) (\d{2}):(\d{2})[ ]? -'
resultado = re.match(padrao, s)
if resultado:
return True
return False
autores_anoni = []
def anonimizarAutor(autor):
if autor is None:
return None
if autor in autores_anoni:
return autores_anoni.index(autor) + 1
else:
autores_anoni.append(autor)
return autores_anoni.index(autor) + 1
def dadosSeparados(linha):
#01/11/2020 15:25 - Exemplo: Oi, eu sou o Exemplo
data_search = re.search("(\d{2})(\/)(\d{2})(\/)(\d{4})", linha)
data = data_search.group()
hora_search = re.search("(\d{2}):(\d{2})", linha)
hora = hora_search.group()
#para o autor estamos pegando a string que está entre "- " e ": "
dadoAnalisado = []
conversationPath = '/content/dev_chat_analise.txt'
with open(conversationPath, encoding="utf-8") as fp:
fp.readline()
mensagemBuffer = []
data, hora, autor, mensagem = None, None, None, ""
while True:
linha = fp.readline()
if not linha:
break
df = pd.DataFrame(dadoAnalisado, columns=['data', 'hora', 'autor', 'mensagem'])
df["data"] = pd.to_datetime(df["data"])
df = df.dropna()
df.head(5)