Instantly share code, notes, and snippets.
Last active Jan 1, 2016
Script that computes the trend for new authors in my chat.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import datetime | |
import numpy as np | |
import matplotlib | |
from itertools import groupby | |
from operator import itemgetter | |
from matplotlib import pyplot as plt | |
from time import perf_counter | |
matplotlib.rcParams['font.size'] = 18 | |
class Trace(object): | |
def __init__(self): | |
self.trace = [] | |
def step(self, label): | |
self.trace.append((label,perf_counter())) | |
def print_stats(self): | |
if len(self.trace) < 2: | |
print("Not enough tracing steps") | |
else: | |
print("***********************") | |
print("Tracing statistics") | |
for prev,post in zip(self.trace[:-1],self.trace[1:]): | |
print("{} -> {}: {:8.2f} s".format( | |
prev[0], post[0], post[1]-prev[1]) | |
) | |
print("***********************") | |
def chat_stats(messages): | |
stats = [] | |
for k,g in groupby(sorted(messages,key=itemgetter('author')), | |
key=itemgetter('author')): | |
timestamps = sorted([ | |
datetime.datetime.strptime(m['date'],"%b %d %Y %H:%M:%S") | |
for m in g]) | |
stats.append((k, timestamps[0], len(timestamps))) | |
return stats | |
def monthly_active_users(messages): | |
active_users = [] | |
engagement = [] | |
times = [] | |
last_visit = {} | |
any_visit = {} | |
threshold = datetime.timedelta(days=30) | |
for m in reversed(messages): | |
todelete = [] | |
author = m['author'], | |
timestamp = datetime.datetime.strptime(m['date'],"%b %d %Y %H:%M:%S") | |
last_visit[author] = timestamp | |
any_visit[author] = timestamp | |
# Clean user list | |
for user in last_visit: | |
if (timestamp - last_visit[user]) > threshold: | |
todelete.append(user) | |
for user in todelete: | |
del last_visit[user] | |
active_users.append(len(last_visit)) | |
engagement.append(len(last_visit)/len(any_visit)) | |
times.append(timestamp) | |
return active_users, engagement, times | |
if __name__ == '__main__': | |
trace = Trace() | |
trace.step('start') | |
with open('messages.json') as f: | |
messages = json.load(f) | |
trace.step('IO') | |
stats = chat_stats(messages) | |
dates = sorted([fa[1] for fa in stats]) | |
timedeltas_seconds = [(date-dates[0]).total_seconds() for date in dates] | |
# Largest contributors | |
print('Users that have contributed more than 20 messages') | |
for s in sorted(stats,key=itemgetter(2), reverse=True): | |
if s[2] > 20: | |
print('{}: {}'.format(s[0],s[2])) | |
x = np.linspace(0,len(dates),len(dates)) | |
p = np.polyfit(x,timedeltas_seconds,2) | |
seconds_elapsed = np.polyval(p,x) | |
newdates = [dates[0] + datetime.timedelta(seconds = s) for s in seconds_elapsed] | |
trace.step('newusers') | |
active_users, engagement, timestamps = monthly_active_users(messages) | |
trace.step('monthly users') | |
trace.print_stats() | |
plt.figure(1) | |
plt.xticks(rotation=70) | |
plt.plot_date(dates, x,'-',linewidth=4) | |
plt.plot_date(newdates, x, 'k--', linewidth=2) | |
plt.subplots_adjust(bottom=0.21, left=0.15) | |
plt.ylabel('Number of total users.') | |
plt.figure(2) | |
plt.xticks(rotation=70) | |
plt.plot_date(timestamps, active_users,'-',linewidth=4) | |
plt.subplots_adjust(bottom=0.21, left=0.15) | |
plt.ylabel('Number of monthly active users.') | |
plt.figure(3) | |
plt.xticks(rotation=70) | |
plt.plot_date(timestamps, engagement,'-',linewidth=4) | |
plt.subplots_adjust(bottom=0.21, left=0.15) | |
plt.ylim([0,1.1]) | |
plt.ylabel('Engagement.') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment