Skip to content

Instantly share code, notes, and snippets.

@briehanlombaard
Last active May 20, 2018 09:56
Show Gist options
  • Save briehanlombaard/a2bd4f8a0e57e733027e997c2a48b106 to your computer and use it in GitHub Desktop.
Save briehanlombaard/a2bd4f8a0e57e733027e997c2a48b106 to your computer and use it in GitHub Desktop.
GitHub Gist Language Popularity
#!/usr/bin/env python3
import argparse
import sys
import json
import time
import logging
import threading
import requests
from datetime import datetime
from getpass import getpass
from urllib.parse import urlparse
from queue import Queue
def worker(username, password, q):
while True:
url = q.get()
logger.info('Fetching {}'.format(url))
if username and password:
r = requests.get(url, auth=(username, password))
else:
r = requests.get(url)
if int(r.headers['X-RateLimit-Remaining']) == 0:
d1 = datetime.fromtimestamp(float(r.headers['X-RateLimit-Reset']))
d2 = datetime.now()
seconds = (d1 - d2).seconds
logger.info('Rate limit reached. Sleeping for {}s'.format(seconds))
time.sleep(seconds)
query = dict(kv.split('=') for kv in urlparse(url).query.split('&') if kv)
with open('gists-{}.json'.format(query.get('page', 1)), 'w') as f:
f.write(json.dumps(r.json()))
if 'next' in r.links:
q.put(r.links['next']['url'])
q.task_done()
class Password(argparse.Action):
def __call__(self, parser, namespace, values, option_string):
if values is None:
values = getpass()
setattr(namespace, self.dest, values)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--log-file')
parser.add_argument('--log-level', default='INFO', type=str.upper,
choices=['DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL'],)
parser.add_argument('--url', dest='url', default='https://api.github.com/gists')
parser.add_argument('-u', dest='username')
parser.add_argument('-p', dest='password', action=Password, nargs='?')
arguments = parser.parse_args()
if not arguments.log_file:
logging.basicConfig(stream=sys.stdout, level=arguments.log_level)
else:
logging.basicConfig(filename=arguments.log_file,
format='%(asctime)s [%(levelname)5s]: %(message)s',
level=arguments.log_level)
logger = logging.getLogger(__name__)
queue = Queue()
for i in range(4):
threading.Thread(
target=worker,
daemon=True,
args=(arguments.username, arguments.password, queue,)
).start()
queue.put(arguments.url)
queue.join()
#!/usr/bin/env python3
import argparse
import glob
import json
import numpy as np
from collections import Counter
from matplotlib import pyplot as plt
def plot(languages=None):
c = Counter()
for f in glob.glob('*.json'):
with open(f, 'r') as f:
gists = json.load(f)
for gist in gists:
for f in gist['files'].values():
if f['language'] and f['language'] in languages:
c[f['language']] += 1
total = sum(c.values())
labels, values = c.keys(), [v/total*100 for v in c.values()]
count = len(labels)
indexes = np.arange(count)
width = 1
plt.title('Language Popularity (from gist.github.com)')
plt.ylabel('Popularity [%]')
plt.bar(indexes, values, width, color=np.random.rand(64, 3))
plt.xticks(indexes, labels, rotation=90)
plt.tight_layout()
plt.show()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('languages', nargs='+')
arguments = parser.parse_args()
plot(arguments.languages)
matplotlib==2.2.2
requests==2.18.4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment