|
import json |
|
import datetime |
|
import numpy as np |
|
import matplotlib |
|
from itertools import groupby |
|
from operator import itemgetter |
|
from matplotlib import pyplot as plt |
|
from time import perf_counter |
|
|
|
matplotlib.rcParams['font.size'] = 18 |
|
|
|
class Trace(object): |
|
def __init__(self): |
|
self.trace = [] |
|
|
|
def step(self, label): |
|
self.trace.append((label,perf_counter())) |
|
|
|
def print_stats(self): |
|
if len(self.trace) < 2: |
|
print("Not enough tracing steps") |
|
else: |
|
print("***********************") |
|
print("Tracing statistics") |
|
for prev,post in zip(self.trace[:-1],self.trace[1:]): |
|
print("{} -> {}: {:8.2f} s".format( |
|
prev[0], post[0], post[1]-prev[1]) |
|
) |
|
print("***********************") |
|
|
|
def chat_stats(messages): |
|
stats = [] |
|
for k,g in groupby(sorted(messages,key=itemgetter('author')), |
|
key=itemgetter('author')): |
|
timestamps = sorted([ |
|
datetime.datetime.strptime(m['date'],"%b %d %Y %H:%M:%S") |
|
for m in g]) |
|
stats.append((k, timestamps[0], len(timestamps))) |
|
|
|
return stats |
|
|
|
def monthly_active_users(messages): |
|
active_users = [] |
|
engagement = [] |
|
times = [] |
|
last_visit = {} |
|
any_visit = {} |
|
|
|
threshold = datetime.timedelta(days=30) |
|
|
|
for m in reversed(messages): |
|
todelete = [] |
|
author = m['author'], |
|
timestamp = datetime.datetime.strptime(m['date'],"%b %d %Y %H:%M:%S") |
|
last_visit[author] = timestamp |
|
any_visit[author] = timestamp |
|
|
|
# Clean user list |
|
for user in last_visit: |
|
if (timestamp - last_visit[user]) > threshold: |
|
todelete.append(user) |
|
|
|
for user in todelete: |
|
del last_visit[user] |
|
|
|
active_users.append(len(last_visit)) |
|
engagement.append(len(last_visit)/len(any_visit)) |
|
times.append(timestamp) |
|
|
|
return active_users, engagement, times |
|
|
|
|
|
if __name__ == '__main__': |
|
trace = Trace() |
|
trace.step('start') |
|
|
|
with open('messages.json') as f: |
|
messages = json.load(f) |
|
|
|
trace.step('IO') |
|
|
|
stats = chat_stats(messages) |
|
dates = sorted([fa[1] for fa in stats]) |
|
timedeltas_seconds = [(date-dates[0]).total_seconds() for date in dates] |
|
|
|
# Largest contributors |
|
print('Users that have contributed more than 20 messages') |
|
for s in sorted(stats,key=itemgetter(2), reverse=True): |
|
if s[2] > 20: |
|
print('{}: {}'.format(s[0],s[2])) |
|
|
|
x = np.linspace(0,len(dates),len(dates)) |
|
p = np.polyfit(x,timedeltas_seconds,2) |
|
|
|
seconds_elapsed = np.polyval(p,x) |
|
newdates = [dates[0] + datetime.timedelta(seconds = s) for s in seconds_elapsed] |
|
trace.step('newusers') |
|
|
|
active_users, engagement, timestamps = monthly_active_users(messages) |
|
|
|
trace.step('monthly users') |
|
trace.print_stats() |
|
|
|
plt.figure(1) |
|
plt.xticks(rotation=70) |
|
plt.plot_date(dates, x,'-',linewidth=4) |
|
plt.plot_date(newdates, x, 'k--', linewidth=2) |
|
plt.subplots_adjust(bottom=0.21, left=0.15) |
|
plt.ylabel('Number of total users.') |
|
|
|
plt.figure(2) |
|
plt.xticks(rotation=70) |
|
plt.plot_date(timestamps, active_users,'-',linewidth=4) |
|
plt.subplots_adjust(bottom=0.21, left=0.15) |
|
plt.ylabel('Number of monthly active users.') |
|
|
|
plt.figure(3) |
|
plt.xticks(rotation=70) |
|
plt.plot_date(timestamps, engagement,'-',linewidth=4) |
|
plt.subplots_adjust(bottom=0.21, left=0.15) |
|
plt.ylim([0,1.1]) |
|
plt.ylabel('Engagement.') |
|
|
|
plt.show() |