Instantly share code, notes, and snippets.
Last active
January 1, 2016 12:59
-
Save guillemborrell/dcda3da1e619053462b0 to your computer and use it in GitHub Desktop.
Script that computes the trend for new authors in my chat.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import datetime | |
import numpy as np | |
import matplotlib | |
from itertools import groupby | |
from operator import itemgetter | |
from matplotlib import pyplot as plt | |
from time import perf_counter | |
matplotlib.rcParams['font.size'] = 18 | |
class Trace(object): | |
def __init__(self): | |
self.trace = [] | |
def step(self, label): | |
self.trace.append((label,perf_counter())) | |
def print_stats(self): | |
if len(self.trace) < 2: | |
print("Not enough tracing steps") | |
else: | |
print("***********************") | |
print("Tracing statistics") | |
for prev,post in zip(self.trace[:-1],self.trace[1:]): | |
print("{} -> {}: {:8.2f} s".format( | |
prev[0], post[0], post[1]-prev[1]) | |
) | |
print("***********************") | |
def chat_stats(messages): | |
stats = [] | |
for k,g in groupby(sorted(messages,key=itemgetter('author')), | |
key=itemgetter('author')): | |
timestamps = sorted([ | |
datetime.datetime.strptime(m['date'],"%b %d %Y %H:%M:%S") | |
for m in g]) | |
stats.append((k, timestamps[0], len(timestamps))) | |
return stats | |
def monthly_active_users(messages): | |
active_users = [] | |
engagement = [] | |
times = [] | |
last_visit = {} | |
any_visit = {} | |
threshold = datetime.timedelta(days=30) | |
for m in reversed(messages): | |
todelete = [] | |
author = m['author'], | |
timestamp = datetime.datetime.strptime(m['date'],"%b %d %Y %H:%M:%S") | |
last_visit[author] = timestamp | |
any_visit[author] = timestamp | |
# Clean user list | |
for user in last_visit: | |
if (timestamp - last_visit[user]) > threshold: | |
todelete.append(user) | |
for user in todelete: | |
del last_visit[user] | |
active_users.append(len(last_visit)) | |
engagement.append(len(last_visit)/len(any_visit)) | |
times.append(timestamp) | |
return active_users, engagement, times | |
if __name__ == '__main__': | |
trace = Trace() | |
trace.step('start') | |
with open('messages.json') as f: | |
messages = json.load(f) | |
trace.step('IO') | |
stats = chat_stats(messages) | |
dates = sorted([fa[1] for fa in stats]) | |
timedeltas_seconds = [(date-dates[0]).total_seconds() for date in dates] | |
# Largest contributors | |
print('Users that have contributed more than 20 messages') | |
for s in sorted(stats,key=itemgetter(2), reverse=True): | |
if s[2] > 20: | |
print('{}: {}'.format(s[0],s[2])) | |
x = np.linspace(0,len(dates),len(dates)) | |
p = np.polyfit(x,timedeltas_seconds,2) | |
seconds_elapsed = np.polyval(p,x) | |
newdates = [dates[0] + datetime.timedelta(seconds = s) for s in seconds_elapsed] | |
trace.step('newusers') | |
active_users, engagement, timestamps = monthly_active_users(messages) | |
trace.step('monthly users') | |
trace.print_stats() | |
plt.figure(1) | |
plt.xticks(rotation=70) | |
plt.plot_date(dates, x,'-',linewidth=4) | |
plt.plot_date(newdates, x, 'k--', linewidth=2) | |
plt.subplots_adjust(bottom=0.21, left=0.15) | |
plt.ylabel('Number of total users.') | |
plt.figure(2) | |
plt.xticks(rotation=70) | |
plt.plot_date(timestamps, active_users,'-',linewidth=4) | |
plt.subplots_adjust(bottom=0.21, left=0.15) | |
plt.ylabel('Number of monthly active users.') | |
plt.figure(3) | |
plt.xticks(rotation=70) | |
plt.plot_date(timestamps, engagement,'-',linewidth=4) | |
plt.subplots_adjust(bottom=0.21, left=0.15) | |
plt.ylim([0,1.1]) | |
plt.ylabel('Engagement.') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment