Created
August 17, 2018 14:38
-
-
Save vfrico/7db4d6e7ec24ccfb1dc40cef00915458 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import matplotlib.pyplot as plt; plt.rcdefaults() | |
import numpy as np | |
import matplotlib.pyplot as plt | |
f = open("whatsapp.txt", "r") | |
list_messages = [] | |
list_users = [] | |
for line in f: | |
first_split = line.split(" - "); | |
right = "".join(first_split[1:]) | |
second_split = right.split(":") | |
(date, user, mess_text) = ("", "", "") | |
try: | |
date = first_split[0] | |
user = second_split[0] | |
mess_text = "".join(second_split[1:]) | |
except: | |
print("Error al leer la linea: "+line) | |
message = { | |
"date": date, | |
"user": user, | |
"mess_text": mess_text | |
} | |
list_messages.append(message) | |
list_users.append(user) | |
def get_users_list(all_users, exclude_list=[""]): | |
threshold = 10 | |
counters = {} | |
for key in set(all_users): | |
counters[key] = 0 | |
for user in all_users: | |
counters[user] += 1 | |
valid_users = [] | |
for user in counters: | |
if user in exclude_list: | |
continue | |
if counters[user] > threshold: | |
valid_users.append(user) | |
return valid_users | |
def count_messages_by_user(all_messages, all_users, plugins): | |
number_messages = {user: 0 for user in all_users} | |
messages_length = {user: 0 for user in all_users} | |
results = {} | |
for plugin_name, plugin_function, pl_title in plugins: | |
results[plugin_name] = {user: 0 for user in all_users} | |
for message in all_messages: | |
try: | |
for plugin_name, plugin_function, pl_title in plugins: | |
results[plugin_name][message["user"]] += plugin_function(message["mess_text"]) | |
#messages_length[message["user"]] += len(message["mess_text"]) | |
except: | |
pass | |
return results | |
print(len(list_messages)) | |
lista = get_users_list(list_users) | |
def is_multimedia(message): | |
if "<Multimedia omitido>" in message: | |
return 1 | |
else: | |
return 0 | |
def count_jaja(message): | |
message = message.lower() | |
if "ja" in message: | |
return (len(message.split("ja"))) | |
else: | |
return 0 | |
def count_jeje(message): | |
message = message.lower() | |
if "je" in message: | |
return (len(message.split("je"))) | |
else: | |
return 0 | |
def get_substitution(valor): | |
if valor == "Clara Olmeda Medrano": | |
return "Clara" | |
elif valor == "Mari Carmen Sánchez Cordón": | |
return "Mari" | |
elif valor == "Adri Fernández": | |
return "Adri" | |
elif valor == "Víctor Fernández": | |
return "Víctor" | |
def apply_thesaurus(diccionario, substitution_fn): | |
new_dict = {} | |
for k in diccionario: | |
new_dict[substitution_fn(k)] = diccionario[k] | |
return new_dict | |
plugins_list = [] | |
plugins_list.append(("num_messages", lambda x: 1, "Número total de mensajes")) | |
plugins_list.append(("len_messages", lambda x: len(x), "Número de caracteres escritos")) | |
plugins_list.append(("multimedia", is_multimedia, "Número de mensajes multimedia")) | |
plugins_list.append(("ja", count_jaja, "Número de veces con \"jaja\"")) | |
plugins_list.append(("je", count_jeje, "Número de veces con \"jeje\"")) | |
res = count_messages_by_user(list_messages, lista, plugins_list) | |
print(res) | |
res_subs = {} | |
for k in res: | |
res_subs[k] = apply_thesaurus(res[k], get_substitution) | |
print(res_subs) | |
def plot_object(lista, dict_values, title, ylabel="valor"): | |
plt.figure() | |
objects = [get_substitution(v) for v in lista] | |
y_pos = np.arange(len(objects)) | |
performance = [dict_values[v] for v in dict_values] | |
#performance = [res_subs["num_messages"][v] for v in res_subs["num_messages"]] | |
bars = plt.bar(y_pos, performance, align='center', alpha=0.5) | |
plt.xticks(y_pos, objects) | |
plt.ylabel(ylabel) | |
plt.title(title) | |
for rect in bars: | |
height = rect.get_height() | |
plt.text(rect.get_x() + rect.get_width()/2.0, height, '%d' % int(height), ha='center', va='bottom') | |
return plt | |
plt.show() | |
input() | |
for plugin_name, plugin_function, pl_title in plugins_list: | |
plot = plot_object(lista, res_subs[plugin_name], pl_title) | |
plot.savefig(plugin_name+".png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment