Skip to content

Instantly share code, notes, and snippets.

@ngsankha
Last active December 11, 2015 01:18
Show Gist options
  • Save ngsankha/4522157 to your computer and use it in GitHub Desktop.
Save ngsankha/4522157 to your computer and use it in GitHub Desktop.
Collect data from Github
import csv, httplib, json
from string import ascii_lowercase
con = httplib.HTTPSConnection('api.github.com')
languages = ['java', 'c', 'ruby', 'python', 'javascript']
for lang in languages:
with open(lang + '.csv', 'wb') as csvfile:
csvwriter = csv.writer(csvfile, delimiter = ',', quotechar='"', quoting = csv.QUOTE_MINIMAL)
for ch in ascii_lowercase:
print("Processing repos with " + ch + " for language " + lang)
con.request('GET', '/legacy/repos/search/' + ch + '?language=' + lang)
data = json.load(con.getresponse())
for repo in data['repositories']:
csvwriter.writerow([repo['name'], repo['username'], repo['followers'], repo['forks'], repo['fork']])
@hughdbrown
Copy link

import csv
import json
from string import ascii_lowercase

import requests # pip install requests

LANGUAGES = ('java', 'c', 'ruby', 'python', 'javascript')
ARG_NAMES = ('name', 'username', 'followers', 'forks', 'fork')


def github_url(ch, lang):
    return 'https://api.github.com/legacy/repos/search/{ch}?language={lang}'.format(ch=ch, lang=lang)

def print_project_data(text, csvwriter):
    data = json.loads(text)
    if 'message' in data:
        print(data['message'])
    else:
        for repo in data.get('repositories', []):
            csvwriter.writerow([repo[arg] for arg in ARG_NAMES])

def main(): 
    for lang in LANGUAGES:
        with open(lang + '.csv', 'wb') as csvfile:
            csvwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            for ch in ascii_lowercase:
                print("Processing repos with {ch} for language {lang}".format(ch=ch, lang=lang))
                r = requests.get(github_url(ch, lang))
                print_project_data(r.text, csvwriter)


if __name__ == '__main__':
    main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment