Skip to content

Instantly share code, notes, and snippets.

@vfrico
Created August 17, 2018 14:38
Show Gist options
  • Save vfrico/7db4d6e7ec24ccfb1dc40cef00915458 to your computer and use it in GitHub Desktop.
Save vfrico/7db4d6e7ec24ccfb1dc40cef00915458 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
f = open("whatsapp.txt", "r")
list_messages = []
list_users = []
for line in f:
first_split = line.split(" - ");
right = "".join(first_split[1:])
second_split = right.split(":")
(date, user, mess_text) = ("", "", "")
try:
date = first_split[0]
user = second_split[0]
mess_text = "".join(second_split[1:])
except:
print("Error al leer la linea: "+line)
message = {
"date": date,
"user": user,
"mess_text": mess_text
}
list_messages.append(message)
list_users.append(user)
def get_users_list(all_users, exclude_list=[""]):
threshold = 10
counters = {}
for key in set(all_users):
counters[key] = 0
for user in all_users:
counters[user] += 1
valid_users = []
for user in counters:
if user in exclude_list:
continue
if counters[user] > threshold:
valid_users.append(user)
return valid_users
def count_messages_by_user(all_messages, all_users, plugins):
number_messages = {user: 0 for user in all_users}
messages_length = {user: 0 for user in all_users}
results = {}
for plugin_name, plugin_function, pl_title in plugins:
results[plugin_name] = {user: 0 for user in all_users}
for message in all_messages:
try:
for plugin_name, plugin_function, pl_title in plugins:
results[plugin_name][message["user"]] += plugin_function(message["mess_text"])
#messages_length[message["user"]] += len(message["mess_text"])
except:
pass
return results
print(len(list_messages))
lista = get_users_list(list_users)
def is_multimedia(message):
if "<Multimedia omitido>" in message:
return 1
else:
return 0
def count_jaja(message):
message = message.lower()
if "ja" in message:
return (len(message.split("ja")))
else:
return 0
def count_jeje(message):
message = message.lower()
if "je" in message:
return (len(message.split("je")))
else:
return 0
def get_substitution(valor):
if valor == "Clara Olmeda Medrano":
return "Clara"
elif valor == "Mari Carmen Sánchez Cordón":
return "Mari"
elif valor == "Adri Fernández":
return "Adri"
elif valor == "Víctor Fernández":
return "Víctor"
def apply_thesaurus(diccionario, substitution_fn):
new_dict = {}
for k in diccionario:
new_dict[substitution_fn(k)] = diccionario[k]
return new_dict
plugins_list = []
plugins_list.append(("num_messages", lambda x: 1, "Número total de mensajes"))
plugins_list.append(("len_messages", lambda x: len(x), "Número de caracteres escritos"))
plugins_list.append(("multimedia", is_multimedia, "Número de mensajes multimedia"))
plugins_list.append(("ja", count_jaja, "Número de veces con \"jaja\""))
plugins_list.append(("je", count_jeje, "Número de veces con \"jeje\""))
res = count_messages_by_user(list_messages, lista, plugins_list)
print(res)
res_subs = {}
for k in res:
res_subs[k] = apply_thesaurus(res[k], get_substitution)
print(res_subs)
def plot_object(lista, dict_values, title, ylabel="valor"):
plt.figure()
objects = [get_substitution(v) for v in lista]
y_pos = np.arange(len(objects))
performance = [dict_values[v] for v in dict_values]
#performance = [res_subs["num_messages"][v] for v in res_subs["num_messages"]]
bars = plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel(ylabel)
plt.title(title)
for rect in bars:
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width()/2.0, height, '%d' % int(height), ha='center', va='bottom')
return plt
plt.show()
input()
for plugin_name, plugin_function, pl_title in plugins_list:
plot = plot_object(lista, res_subs[plugin_name], pl_title)
plot.savefig(plugin_name+".png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment