Skip to content

Instantly share code, notes, and snippets.

@charlesreid1
Last active October 6, 2018 21:38
Show Gist options
  • Save charlesreid1/15ddd6601eba61320226a84d7f35c34c to your computer and use it in GitHub Desktop.
Save charlesreid1/15ddd6601eba61320226a84d7f35c34c to your computer and use it in GitHub Desktop.
Contribution Counter - Count contributions (opening issues or pull requests) to the DCPPC.

Top Contributors to the DCPPC

This script uses the Github API to iterate through all repos in the DCPPC organization and count contributions (opening an issue or a pull request) by user.

This requires an API key for an account that can access both public and private repositories in the DCPPC. (We used one of our own, as Florence Python runs Centillion and our Github authorization protection layers, and we don't want to bork those.)

Create an API access token in the Settings page on Github, and pass that into the script with the environment variable GITHUB_TOKEN:

$ GITHUB_TOKEN="AAAAAAAAAAAA" python top_contributors.py

This will create a new timestamped directory named output_YYYYMMDD_HHMMDD that will contain the following files:

Logs:

  • log_top25.log - Output log file
  • repos.txt - list of repos that were indexed/counted

Contributions:

  • dcppc_all_contributors.csv - sorted CSV list of all contributors to all DCPPC repos
  • dcppc_top25_contributors.csv - sorted CSV list of top 25 contributors to all DCPPC repos

Breakdown:

  • dcppc_issues_contributors.csv - sorted CSV list of top issues contributors to all DCPPC repos
  • dcppc_pulls_contributors.csv - sorted CSV list of top pull request contributors to all DCPPC repos
login count
charlesreid1 722
dcppcbot 678
raynamharris 329
ACharbonneau 310
ctb 282
fp9695253 121
davissn30 51
meredithlgamble 48
jggautier 37
vmbrasseur 34
vindie13 32
hharty 28
VrosieB 24
rpwagner 21
KCB13 20
owhite 19
sherry-jenkins 19
cricketsloan 16
victor73 15
briandoconnor 15
cmungall 15
zflamig 15
RLC-DCPPC 14
wshands 14
AviMaayan 13
jonathancrabtree 13
alisonleaf 13
jessieclyons 12
BenjaminHCCarr 12
aegururaj 10
bilafer 9
david4096 8
NickolausDS 8
SusannaSansone 8
jmcmurry 7
bheavner 7
mfenner 6
luizirber 6
robes 6
proccaserra 6
jyttri 6
keanderka 6
sarpera 6
theferrit32 6
sulakhe 6
eabell 5
karenword 4
standage 3
nsuvarnaiari 3
agbeltran 3
swzCuroverse 3
mercecrosas 3
rayi113 3
amahurkar 2
carlkesselman 2
webermn 2
gversmee 2
ianfoster 2
lliming 2
rspahnn 2
kevinwilson 2
yammasnake 2
stevencox 2
hannahblau 2
jonathonl 1
mellybelly 1
sethi7ik 1
brooksph 1
djc183 1
karlcz 1
gneglur 1
danielskatz 1
alexjones1725 1
rossrepo 1
simont 1
jaklenk 1
sarala 1
micheldumontier 1
anitawaard 1
mmtrun 1
meganwojciechowicz 1
login count
dcppcbot 678
charlesreid1 425
ctb 204
raynamharris 204
ACharbonneau 184
fp9695253 62
davissn30 46
meredithlgamble 31
vmbrasseur 29
jggautier 27
hharty 25
vindie13 16
cmungall 15
KCB13 15
owhite 14
rpwagner 13
RLC-DCPPC 12
VrosieB 12
victor73 11
sherry-jenkins 11
alisonleaf 11
briandoconnor 10
jonathancrabtree 10
AviMaayan 9
cricketsloan 9
zflamig 9
jessieclyons 9
jmcmurry 7
aegururaj 7
wshands 7
bilafer 6
bheavner 6
robes 6
david4096 6
keanderka 6
BenjaminHCCarr 6
eabell 5
luizirber 4
proccaserra 4
jyttri 4
NickolausDS 4
SusannaSansone 4
mfenner 3
nsuvarnaiari 3
mercecrosas 3
sarpera 3
theferrit32 3
sulakhe 3
amahurkar 2
standage 2
karenword 2
carlkesselman 2
webermn 2
gversmee 2
agbeltran 2
lliming 2
swzCuroverse 2
rayi113 2
kevinwilson 2
jonathonl 1
mellybelly 1
sethi7ik 1
brooksph 1
djc183 1
ianfoster 1
karlcz 1
gneglur 1
danielskatz 1
alexjones1725 1
rossrepo 1
simont 1
rspahnn 1
jaklenk 1
sarala 1
micheldumontier 1
anitawaard 1
yammasnake 1
mmtrun 1
meganwojciechowicz 1
stevencox 1
hannahblau 1
login count
charlesreid1 297
ACharbonneau 126
raynamharris 125
ctb 78
fp9695253 59
meredithlgamble 17
vindie13 16
VrosieB 12
jggautier 10
rpwagner 8
sherry-jenkins 8
cricketsloan 7
wshands 7
BenjaminHCCarr 6
zflamig 6
owhite 5
briandoconnor 5
vmbrasseur 5
KCB13 5
davissn30 5
AviMaayan 4
victor73 4
NickolausDS 4
SusannaSansone 4
bilafer 3
mfenner 3
aegururaj 3
jonathancrabtree 3
jessieclyons 3
hharty 3
sarpera 3
theferrit32 3
sulakhe 3
karenword 2
luizirber 2
alisonleaf 2
RLC-DCPPC 2
david4096 2
proccaserra 2
jyttri 2
standage 1
bheavner 1
ianfoster 1
agbeltran 1
rspahnn 1
swzCuroverse 1
yammasnake 1
rayi113 1
stevencox 1
hannahblau 1
login count
charlesreid1 722
dcppcbot 678
raynamharris 329
ACharbonneau 310
ctb 282
fp9695253 121
davissn30 51
meredithlgamble 48
jggautier 37
vmbrasseur 34
vindie13 32
hharty 28
VrosieB 24
rpwagner 21
KCB13 20
owhite 19
sherry-jenkins 19
cricketsloan 16
victor73 15
briandoconnor 15
cmungall 15
zflamig 15
RLC-DCPPC 14
wshands 14
AviMaayan 13
import time
import requests
from github import Github
import os, re
import base64
import logging
import re
from datetime import datetime, timedelta
from collections import Counter
"""
Top Organization Contributors
Walk every issue of every repo of an
organization, and compile a count of
contributions - comments, issues, pull
requests.
"""
OUTPUT_DIR = 'output_%s'%(datetime.now().strftime("%Y%m%d_%H%M%S"))
os.mkdir(OUTPUT_DIR)
LOG_FILE = os.path.join(OUTPUT_DIR,'log_top25.log')
GHR_FILE = os.path.join(OUTPUT_DIR,'repos.txt')
ALL_FILE = os.path.join(OUTPUT_DIR,'dcppc_all_contributors.csv')
TOP_FILE = os.path.join(OUTPUT_DIR,'dcppc_top25_contributors.csv')
ISS_FILE = os.path.join(OUTPUT_DIR,'dcppc_issues_contributors.csv')
PRS_FILE = os.path.join(OUTPUT_DIR,'dcppc_pulls_contributors.csv')
# Limit the number of repos (for testing)
# If -1, do all repos
LIMIT = -1
# Set up logging
logging.basicConfig(level=logging.INFO,
filename=LOG_FILE,
filemode='w')
console = logging.StreamHandler()
console.setLevel(logging.INFO)
logging.getLogger('').addHandler(console)
def main():
logging.info("Setting up github api")
# set up API with access token
org = 'dcppc'
access_token = os.environ['GITHUB_TOKEN']
# Github -> get organization -> get repository
g = Github(access_token)
org = g.get_organization(org)
repos = org.get_repos(type='all')
master_issue_contributors = Counter()
master_pr_contributors = Counter()
master_contributors = Counter()
count = 0
logging.info("Iterating through repositories")
for repo in repos:
logging.info(" On repository %s"%(repo.name))
logging.info(" Iterating through issues")
# Keep it simple:
# Just create a list of usernames,
# one per issue, and count them up
# at the end with a Counter()
issue_contributor_list = []
for i,issue in enumerate(repo.get_issues(state="open")):
if (i+1)%50==0:
logging.info(" On open issue %d..."%(i+1))
issue_contributor_list.append(issue.user.login)
for i,issue in enumerate(repo.get_issues(state="closed")):
if (i+1)%50==0:
logging.info(" On closed issue %d..."%(i+1))
issue_contributor_list.append(issue.user.login)
issue_contributors = Counter(issue_contributor_list)
master_issue_contributors += issue_contributors
logging.info(" Finished counting issue contributors")
# ---
pr_contributor_list = []
for p,pr in enumerate(repo.get_pulls(state="open")):
if (p+1)%50==0:
logging.info(" On open PR %d..."%(p+1))
pr_contributor_list.append(pr.user.login)
for p,pr in enumerate(repo.get_pulls(state="closed")):
if (p+1)%50==0:
logging.info(" On closed PR %d..."%(p+1))
pr_contributor_list.append(pr.user.login)
pr_contributors = Counter(pr_contributor_list)
master_pr_contributors += pr_contributors
logging.info(" Finished counting PR contributors")
# ---
logging.info(" Done with repo %s"%(repo.name))
with open(GHR_FILE,'a') as f:
f.write(repo.name)
f.write("\n")
count += 1
if LIMIT>0 and count>=LIMIT:
break
time.sleep(1)
master_contributors = master_issue_contributors + master_pr_contributors
topN = master_contributors.most_common()
top25 = master_contributors.most_common(25)
topPR = master_pr_contributors.most_common()
topiss = master_issue_contributors.most_common()
with open(TOP_FILE,'w') as f:
f.write("login,count\n")
for top in top25:
f.write("%s,%s\n"%(top))
with open(ALL_FILE,'w') as f:
f.write("login,count\n")
for top in topN:
f.write("%s,%s\n"%(top))
with open(ISS_FILE,'w') as f:
f.write("login,count\n")
for top in topiss:
f.write("%s,%s\n"%(top))
with open(PRS_FILE,'w') as f:
f.write("login,count\n")
for top in topPR:
f.write("%s,%s\n"%(top))
logging.info("Finished writing top 25 contributors file: %s"%(TOP_FILE))
logging.info("Finished writing all contributors file: %s"%(ALL_FILE))
if __name__=="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment