Skip to content

Instantly share code, notes, and snippets.

@grootjans
Last active May 7, 2021 15:11
Show Gist options
  • Save grootjans/0e5cafa88406e779bd22169e9b5a1e7a to your computer and use it in GitHub Desktop.
Save grootjans/0e5cafa88406e779bd22169e9b5a1e7a to your computer and use it in GitHub Desktop.
#!/usr/bin/python
#
# Small program to retrieve teams associated to projects
# within a github organization
#
import sys
import logging
import requests
import collections
import csv
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("github")
Repo = collections.namedtuple('Repo', ['name', 'url'])
Team = collections.namedtuple('Team', ['name', 'url', 'repositories', 'parent'])
page_size = 100
if len(sys.argv) != 3:
logger.error("program expects a PAT token, and the organization as parameter")
exit(1)
token = sys.argv[1]
organization = sys.argv[2]
logger.info("PAT token {} used".format(token))
headers = {'Authorization': "token {}".format(token)}
orgUrl = "https://api.github.com/orgs/{}".format(organization)
#convenience function for paginated calls
def retrievePaginatedList(requestUrl):
data = []
url = requestUrl
hasNext = True
while (hasNext):
response = requests.get(url, headers=headers)
data.extend(response.json())
hasNext = 'next' in response.links
url = response.links["next"]["url"] if hasNext else ""
return data
#return a tuple of the repository name and url
def getRepos(orgUrl):
repoUrl = "{}/repos?per_page={}".format(orgUrl, page_size)
repoData = retrievePaginatedList(repoUrl)
return (Repo(repo['name'], repo['url']) for repo in repoData)
#return a list of repository urls for a team
def getRepositoriesForTeam(repositoryUrl):
repositoryData = retrievePaginatedList(repositoryUrl)
return (repositories['url'] for repositories in repositoryData)
#create team object and take parent teams into account
def createTeam(teamData):
parent = teamData['parent']
parentTeam = createTeam(requests.get(parent['url'], headers=headers).json()) if (parent) else None
return Team(teamData.get("name"), teamData['url'], getRepositoriesForTeam(teamData['repositories_url']), parentTeam)
#get teams with their repos
def getTeamsFromOrganisation(orgUrl):
url = "{}/teams?per_page={}".format(orgUrl, page_size)
response = retrievePaginatedList(url)
logger.info("processing {} teams".format(len(response)))
return (createTeam(teamData) for teamData in response)
#return a mapping from repository url -> team name
def getRepoWithTeams(teams):
repoToTeam = {}
for team in teams:
for repo in team[2]:
repoToTeam.setdefault(repo, []).append(team)
return repoToTeam
def writeToCsv(repos, repoToTeams):
with open('github_teams.csv', mode='w') as file:
writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
header = ["Repo Name", "Repo URL", "Team", "Parent1", "Parent2"]
writer.writerow(header)
def getTeamNames(teams):
return (team.name for team in teams)
for repo in repos:
mappedTeams = list(repoToTeams.get(repo.url) if (repo.url in repoToTeams) else [])
derivedParents = list(team.parent for team in mappedTeams if team.parent)
derivedSubParents = list(group.parent for group in derivedParents if group.parent)
writer.writerow([repo.name,
repo.url,
", ".join(getTeamNames(mappedTeams)),
", ".join(getTeamNames(derivedParents)),
", ".join(getTeamNames(derivedSubParents))
])
# #main program
repos = list(getRepos(orgUrl))
logger.info("got a total of {} repos".format(len(repos)))
teams = list(getTeamsFromOrganisation(orgUrl))
logger.info("got a total of {} teams".format(len(teams)))
repoToTeams = getRepoWithTeams(teams)
writeToCsv(repos, repoToTeams)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment