Skip to content

Instantly share code, notes, and snippets.

@tipabu
Last active May 25, 2016 19:54
Show Gist options
  • Save tipabu/5a0ee436644b48228127668796023e71 to your computer and use it in GitHub Desktop.
Save tipabu/5a0ee436644b48228127668796023e71 to your computer and use it in GitHub Desktop.
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import json
import logging
import os
import time
import requests
logging.basicConfig(level=logging.INFO)
GET_FRESH = False
URL = 'http://stackalytics.com/api/1.0/stats/engineers_extended'
TOP = 5000
MIN_CONTRIBS = 100
def get_groups():
resp = requests.get('http://stackalytics.com/api/1.0/modules')
return [x['id'][:-6] for x in resp.json()['data']
if x['id'].endswith('-group')]
def get_contribs(module, end_date=None, window=90):
if end_date is None:
end_date = int(time.time())
params = {
'project_type': 'all',
'metric': 'marks',
'release': 'all',
'start_date': end_date - window * 24 * 60 * 60,
'end_date': end_date,
'module': module + '-group',
}
logging.info('Getting contribs for %s (%d day window, ending %d)',
module, window, end_date)
return requests.get(URL, params=params).json()['stats']
def print_table(tab):
print(' '.join(p for p in tab))
for proj, line in tab.items():
print(' '.join([
('%*d' if isinstance(v, int) else '%*.2f') % (len(k), tab[k][proj])
for k, v in line.items()] + [proj]))
print()
if GET_FRESH or not os.path.exists('data.json'):
data = {}
data['modules'] = get_groups()
data['contribs_by_module'] = {}
# Halt time for a given run
NOW = int(time.time())
for days_back in (0, 180, 360):
data['contribs_by_module'][days_back] = {
m: get_contribs(m, NOW - days_back * 24 * 60 * 60)
for m in data['modules']}
with open('data.json', 'wb') as fd:
json.dump(data, fd)
else:
with open('data.json', 'rb') as fd:
data = json.load(fd)
too_few_contribs = {x for x in data['modules']
if len(data['contribs_by_module']['0'][x]) < MIN_CONTRIBS}
if too_few_contribs:
logging.info('Removing %d modules for having too few contributors',
len(too_few_contribs))
data['modules'] = [x for x in data['modules']
if x not in too_few_contribs
if x not in ('openstackclient', 'puppetopenstack',
'release cycle management')]
#data['modules'] = ['swift', 'keystone', 'nova', 'neutron', 'cinder', 'glance',
# 'horizon', 'fuel', 'ironic', 'tripleo',
# 'oslo', 'infrastructure']
for days_back in (0, 180, 360):
print()
print('+----------------+')
print('| Days Back: %3d |' % days_back)
print('+----------------+')
print()
contribs_by_module = {
k: {c['id'] for c in v[:TOP]}
for k, v in data['contribs_by_module'][str(days_back)].items()}
joint_contribs = OrderedDict(
(m, OrderedDict(
(m2, contribs_by_module[m] & contribs_by_module[m2])
for m2 in data['modules']))
for m in data['modules'])
disjoint_contribs = OrderedDict(
(m, contribs_by_module[m].difference(*[
contribs_by_module[m2]
for m2 in data['modules'] if m2 != m]))
for m in data['modules'])
counts = OrderedDict(
(m, OrderedDict(
(m2, len(joint_contribs[m][m2]))
for m2 in data['modules']))
for m in data['modules'])
print_table(counts)
percentages = OrderedDict(
(m, OrderedDict(
(m2, int(100 * counts[m][m2] / counts[m][m]))
for m2 in data['modules']))
for m in data['modules'])
print_table(percentages)
affiliations = OrderedDict(
(m, OrderedDict(
(m2, int(100 *
counts[m][m2] / (counts[m][m] - len(disjoint_contribs[m]))))
for m2 in data['modules']))
for m in data['modules'])
print_table(affiliations)
for m, p, c in sorted([
(m, len(v) / counts[m][m], counts[m][m])
for m, v in disjoint_contribs.items()], key=lambda x: x[1]):
print('%25s: %3d%% %d' % (m, 100 * (1 - p), c))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment