Skip to content

Instantly share code, notes, and snippets.

@platan
Last active October 14, 2018 18:49
Show Gist options
  • Save platan/fc203d4b067a1ba7475cabdbacf6f887 to your computer and use it in GitHub Desktop.
Save platan/fc203d4b067a1ba7475cabdbacf6f887 to your computer and use it in GitHub Desktop.
Shows usages of Shields.io in most starred projects hosted on GitHub
# Copyright (c) 2018 Marcin Mielnicki
# Licensed under the MIT License
#
# Shows usages of Shields.io in most starred projects hosted on GitHub
#
# Requirements:
# - requests (http://python-requests.org/)
#
import argparse
import random
import itertools
import time
import urllib
import requests
def prepare_parser():
parser = argparse.ArgumentParser(description='Shows usages of Shields.io in most starred projects hosted on GitHub')
parser.add_argument('--access_token', type=str, help='personal access token (https://developer.github.com/v3/oauth/)')
parser.add_argument('--count', default=10, type=int, help='number of projects to check')
return parser
def check_limits(response):
remaining = response.headers.get("X-RateLimit-Remaining", -1)
if int(remaining) == 0:
sleep_seconds = int(response.headers.get("X-RateLimit-Reset")) - int(time.time())
time.sleep(sleep_seconds + 1)
retry_after = response.headers.get("Retry-After")
if retry_after:
time.sleep(int(retry_after) + 1)
def repos_and_shields(access_token):
headers = {}
next_page_url = 'https://api.github.com/search/repositories?q=stars%3A>1000&per_page=100&sort=stars'
while next_page_url:
if access_token:
headers['Authorization'] = 'token %s' % random.choice(access_token)
repos_response = requests.get(next_page_url, headers=headers)
check_limits(repos_response)
if repos_response.status_code is not 200:
raise Exception("Status code %s. %s" % (repos_response.status_code, repos_response.text))
next_page_url = None
repos_json = repos_response.json()
repos = list(map(lambda repo: repo["full_name"], repos_json["items"]))
if repos_response.links and "next" in repos_response.links:
next_page_url = repos_response.links["next"]["url"]
query = 'filename:readme path:/ "img.shields.io"%s' % " ".join(
list(map(lambda x: "repo:%s" % x, repos)))
if access_token:
headers['Authorization'] = 'token %s' % random.choice(access_token)
code_response = requests.get("https://api.github.com/search/code?q=%s&per_page=100" % urllib.quote(query), headers=headers)
check_limits(code_response)
if code_response.status_code is not 200:
raise Exception("Status code %s. %s" % (code_response.status_code, code_response.text))
for repo in repos_json["items"]:
is_using_shields = next((True for code in code_response.json()["items"] if repo["full_name"] == code["repository"]["full_name"]), False)
yield (repo["html_url"], is_using_shields)
# Do not sent request to often, we want to avoid "You have triggered an abuse detection mechanism. Please wait a few minutes before you try again."
time.sleep(15)
def main():
parser = prepare_parser()
args = parser.parse_args()
access_token = args.access_token
count = args.count
repos = repos_and_shields(access_token.split(","))
print("repo_url, is_using_shields")
for repo in itertools.islice(repos, count):
print("%s, %s" % (repo[0], repo[1]))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment