Skip to content

Instantly share code, notes, and snippets.

@ElliotFriend
Last active January 17, 2024 15:11
Show Gist options
  • Save ElliotFriend/ca5c4bf097f7d465eba2e7fa925fc97e to your computer and use it in GitHub Desktop.
Save ElliotFriend/ca5c4bf097f7d465eba2e7fa925fc97e to your computer and use it in GitHub Desktop.
Count EC repos and contributors for Stellar ecosystem
#!/usr/bin/env python
from count_repos import *
from count_contributors import *
from count_repos_date import *
from github import Auth
from github import Github
from datetime import datetime, timedelta
auth = Auth.Token('some_github_auth_token')
g = Github(auth=auth)
g.per_page = 100
## Get the repositories that are attributed to our ecosystem
stellar_repos_set = count_repos('stellar', g)
with open('stellar_repos.txt', 'w') as f:
for repo in stellar_repos_set:
f.write(f"{repo}\n")
print(f'There are currently {len(stellar_repos_set)} repos in the ecosystem')
## Get the commit authors within those ecosystem repositories during a 30 days window
# The date window to query for contributors
since_days_ago = 30
until_days_ago = 0
stellar_contributors_set = {c for c in count_contributors(stellar_repos_set, since_days_ago, until_days_ago, g) if not c.endswith('[bot]')}
with open('stellar_contributors.txt', 'w') as f:
for contributor in stellar_contributors_set:
f.write(f"{contributor}\n")
print(f'There have been {len(stellar_contributors_set)} unique contributors between {datetime.now() - timedelta(days=since_days_ago)} and {datetime.now() - timedelta(days=until_days_ago)}')
#!/usr/bin/env python
from github import Github
from github import GithubException
from github.GithubException import UnknownObjectException
from count_repos import *
from datetime import datetime, timedelta
def count_contributors(ecosystem_repos_set: set[str], since_days_ago: int, until_days_ago: int, g: Github) -> set[str]:
ecosystem_contributors = set()
for repo in ecosystem_repos_set:
repo_repo = repo.split('/')[-1]
repo_owner = repo.split('/')[-2]
try:
repo_commits = g.get_repo(f"{repo_owner}/{repo_repo}", lazy=True).get_commits(
since=datetime.now() - timedelta(days=since_days_ago),
until=datetime.now() - timedelta(days=until_days_ago),
)
for commit in repo_commits:
if commit.author:
committer = commit.author.login or commit.author.name
elif commit.commit.author:
committer = commit.commit.author.name
else:
print(f'weird nonetype thing: {commit.html_url}')
continue
if committer not in ecosystem_contributors:
ecosystem_contributors.add(committer)
except UnknownObjectException:
# This is the 404 exception
pass
except GithubException as err:
# This was "empty git repository" or something
if err.status == 409:
pass
return ecosystem_contributors
#!/usr/bin/env python
import tomllib
from github import Github
from github.GithubException import UnknownObjectException
BASE_URL = "https://github.com/"
def count_repos(ecosystem_name: str, g: Github) -> set[str]:
repos_set = set()
# Parse the TOML file
with open(f'data/ecosystems/{ecosystem_name[0]}/{ecosystem_name}.toml', 'rb') as f:
main_data = tomllib.load(f)
# Get repos from Github organizations
for org in main_data['github_organizations']:
org_name = org.split('/')[-1]
try:
org_repos = g.get_organization(org_name).get_repos()
for repo in org_repos:
repos_set.add(f"{BASE_URL}{repo.full_name}")
except UnknownObjectException:
pass
# Add any repositories from sub ecosystems
for sub_eco in main_data['sub_ecosystems']:
eco_name = '-'.join(sub_eco.split()).lower().replace('(', '').replace(')', '')
sub_repos_set = count_repos(eco_name, g)
repos_set.update(sub_repos_set)
# Add the repos actually defined in the TOML file
for repo in main_data['repo']:
repos_set.add(repo['url'])
return repos_set
ElliotFriend
rice2000
tyvdh
https://github.com/stellar/some-repository
https://github.com/stellar/some-other-repository
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment