Skip to content

Instantly share code, notes, and snippets.

@guillemborrell
Last active January 1, 2016 12:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guillemborrell/dcda3da1e619053462b0 to your computer and use it in GitHub Desktop.
Save guillemborrell/dcda3da1e619053462b0 to your computer and use it in GitHub Desktop.
Script that computes the trend for new authors in my chat.
import json
import datetime
import numpy as np
import matplotlib
from itertools import groupby
from operator import itemgetter
from matplotlib import pyplot as plt
from time import perf_counter
matplotlib.rcParams['font.size'] = 18
class Trace(object):
def __init__(self):
self.trace = []
def step(self, label):
self.trace.append((label,perf_counter()))
def print_stats(self):
if len(self.trace) < 2:
print("Not enough tracing steps")
else:
print("***********************")
print("Tracing statistics")
for prev,post in zip(self.trace[:-1],self.trace[1:]):
print("{} -> {}: {:8.2f} s".format(
prev[0], post[0], post[1]-prev[1])
)
print("***********************")
def chat_stats(messages):
stats = []
for k,g in groupby(sorted(messages,key=itemgetter('author')),
key=itemgetter('author')):
timestamps = sorted([
datetime.datetime.strptime(m['date'],"%b %d %Y %H:%M:%S")
for m in g])
stats.append((k, timestamps[0], len(timestamps)))
return stats
def monthly_active_users(messages):
active_users = []
engagement = []
times = []
last_visit = {}
any_visit = {}
threshold = datetime.timedelta(days=30)
for m in reversed(messages):
todelete = []
author = m['author'],
timestamp = datetime.datetime.strptime(m['date'],"%b %d %Y %H:%M:%S")
last_visit[author] = timestamp
any_visit[author] = timestamp
# Clean user list
for user in last_visit:
if (timestamp - last_visit[user]) > threshold:
todelete.append(user)
for user in todelete:
del last_visit[user]
active_users.append(len(last_visit))
engagement.append(len(last_visit)/len(any_visit))
times.append(timestamp)
return active_users, engagement, times
if __name__ == '__main__':
trace = Trace()
trace.step('start')
with open('messages.json') as f:
messages = json.load(f)
trace.step('IO')
stats = chat_stats(messages)
dates = sorted([fa[1] for fa in stats])
timedeltas_seconds = [(date-dates[0]).total_seconds() for date in dates]
# Largest contributors
print('Users that have contributed more than 20 messages')
for s in sorted(stats,key=itemgetter(2), reverse=True):
if s[2] > 20:
print('{}: {}'.format(s[0],s[2]))
x = np.linspace(0,len(dates),len(dates))
p = np.polyfit(x,timedeltas_seconds,2)
seconds_elapsed = np.polyval(p,x)
newdates = [dates[0] + datetime.timedelta(seconds = s) for s in seconds_elapsed]
trace.step('newusers')
active_users, engagement, timestamps = monthly_active_users(messages)
trace.step('monthly users')
trace.print_stats()
plt.figure(1)
plt.xticks(rotation=70)
plt.plot_date(dates, x,'-',linewidth=4)
plt.plot_date(newdates, x, 'k--', linewidth=2)
plt.subplots_adjust(bottom=0.21, left=0.15)
plt.ylabel('Number of total users.')
plt.figure(2)
plt.xticks(rotation=70)
plt.plot_date(timestamps, active_users,'-',linewidth=4)
plt.subplots_adjust(bottom=0.21, left=0.15)
plt.ylabel('Number of monthly active users.')
plt.figure(3)
plt.xticks(rotation=70)
plt.plot_date(timestamps, engagement,'-',linewidth=4)
plt.subplots_adjust(bottom=0.21, left=0.15)
plt.ylim([0,1.1])
plt.ylabel('Engagement.')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment