Skip to content

Instantly share code, notes, and snippets.

@jmduke
Created November 12, 2013 07:37
Show Gist options
  • Save jmduke/7427030 to your computer and use it in GitHub Desktop.
Save jmduke/7427030 to your computer and use it in GitHub Desktop.
Plotting retweets over time.
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np
from math import log
import tweepy
import collections
import bisect
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
INTERVAL = timedelta(hours = 1)
USERNAMES = ["adage", "nytimes", "digg", "wsj", "amazon", "google"]
def normalize(l):
avg = sum(l) / len(l)
if avg > 2:
return log(avg)
else:
return avg
auth = tweepy.auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth)
dates = []
values = []
for username in USERNAMES:
test = api.get_user(username)
ds, vs = zip(*[(t.created_at, t.retweet_count) for t in test.timeline(count = 1000)])
dates += ds
values += vs
# Aggregate values with similar datetimes.
beginning_timestamp = min(dates)
interval_grid = [beginning_timestamp + n * INTERVAL for n in range(240)]
bins = collections.defaultdict(list)
for v, d in zip(values, dates):
index = bisect.bisect(interval_grid, d)
bins[index].append(v)
# Create a curve of best fit.
x_values = bins.keys()
y_values = [normalize(i) for i in bins.values()]
for day in range(10):
x_points = np.linspace(0.0, 24.0, 24)
local_x = [x for x in x_values if x > day * 24 and x < (day + 1) * 24]
x_indices = [x_values.index(x) for x in local_x]
local_y = [y_values[y] for y in x_indices]
local_x = [i - day * 24 for i in local_x]
if len(local_x) < 3:
continue
try:
coefficients = np.polyfit(local_x, local_y, 2)
polynomial = np.poly1d(coefficients)
plt.plot(local_x, local_y, 'x', x_points, polynomial(x_points), '-')
except:
continue
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment