Skip to content

Instantly share code, notes, and snippets.

@hashlash
Last active March 20, 2021 06:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hashlash/410399c096cfb7b07cdb47736c7886ad to your computer and use it in GitHub Desktop.
Save hashlash/410399c096cfb7b07cdb47736c7886ad to your computer and use it in GitHub Desktop.
pkg.go.dev repo imported by sorted by stats
import json
import re
import requests
import uuid
# No OAuth scope required
# Just create a personal access token on https://github.com/settings/tokens/new
API_KEY = 'api key example'
QUERY_FORMAT = '''
{key}: repository(name: "{name}", owner: "{owner}") {{
url
forkCount
watchers {{
totalCount
}}
stargazers {{
totalCount
}}
}}
'''
# request each 500 window (or smaller). 1000 gave me error (could be timeout)
def get_dependents_stats(package, chunk_size=500):
importedby_url = 'https://pkg.go.dev/{}?tab=importedby'.format(package)
godoc = requests.get(importedby_url).content.decode()
github_repos = list(set(re.findall('github.com\/[\w\-]+\/[\w\-]+', godoc)))
result = dict()
for i in range(0, len(github_repos), chunk_size):
print('Processing data from index {} to {}'.format(i, i+chunk_size))
# graphql require alias to match `/[_A-Za-z][_0-9A-Za-z]*/ http://facebook.github.io/graphql/#sec-Names
chunk = {'id_'+str(uuid.uuid4()).replace('-', '_'): repo for repo in github_repos[i:i+chunk_size]}
query = ''.join(
QUERY_FORMAT.format(key=key, owner=owner, name=name)
for key, owner, name in map(lambda kv: (kv[0], *kv[1].split('/')[1:]), chunk.items())
)
query = '{\n' + query + '\n}'
response = requests.post(
'https://api.github.com/graphql',
json={'query': query},
headers={'Authorization': 'Bearer {}'.format(API_KEY)}
)
try:
response.raise_for_status()
result.update({chunk[k]: v for k, v in response.json()['data'].items()})
except requests.HTTPError as e:
raise requests.HTTPError(str(e) + '\n' + response.headers)
except KeyError as e:
raise KeyError(str(e) + '\n' + response.json())
# github's graphql api will return null data for unresolved repo
return {k: v for k, v in result.items() if v}
import urllib.request
url = 'https://gist.githubusercontent.com/hashlash/410399c096cfb7b07cdb47736c7886ad/raw/66165429996ed02f4b9c03ad49ec5f6e3fa3e3e7/graphql.py'
a = urllib.request.urlopen(url)
exec(a.read())
stats = get_dependents_stats('github.com/gorilla/mux')
with open('output.json', 'w') as f:
json.dump(stats, f)
repos = {v['url']: v for v in stats.values()}
by_forks = sorted(
((k, v['forkCount']) for k, v in repos.items()),
reverse=True,
key=lambda xy: xy[1]
)
by_stars = sorted(
((k, v['stargazers']['totalCount']) for k, v in repos.items()),
reverse=True,
key=lambda xy: xy[1]
)
by_watch = sorted(
((k, v['watchers']['totalCount']) for k, v in repos.items()),
reverse=True,
key=lambda xy: xy[1]
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment