Skip to content

Instantly share code, notes, and snippets.

@jspeed-meyers
Created July 6, 2022 11:55
Show Gist options
  • Save jspeed-meyers/dfece3238a72533beb7cc8a0dcb87940 to your computer and use it in GitHub Desktop.
Save jspeed-meyers/dfece3238a72533beb7cc8a0dcb87940 to your computer and use it in GitHub Desktop.
Collect all non-archived repo names associated with a GitHub organization
# collect all non-archived repo names associated with one GitHub organization and
# save in text file.
#
# USAGE:
#
# export GITHUB_AUTH_TOKEN=lkdjflkdjglkdjlkjg
#
# python get_org_repos.py
#
# NOTE:
#
# change ORG variable within script to set organization to analyze
#
import json
import os
import re
import requests
print("INITIATING DATA COLLECTION")
GITHUB_USERNAME = "jspeed-meyers"
GITHUB_TOKEN = os.environ.get("GITHUB_AUTH_TOKEN")
# GitHub organization to analyze
ORG = "eclipse"
# use pagination to get all repos associated with an organization,
# not just top 100
for page in range(1, 100):
response = requests.get(
"https://api.github.com/orgs/"
+ ORG
+ "/repos?page="
+ str(page)
+ "&per_page=100", # number of results per page
# convert username and token to strings per requests's specifications
auth=(str(GITHUB_USERNAME), str(GITHUB_TOKEN)),
)
if response.ok:
repos= json.loads(response.text or response.content)
with open("results/" + ORG + ".-repos.txt", 'a') as f:
for repo in repos:
# do not collect repo name if repo is archived
if not repo['archived']:
# remove https:// and .git from repo name
repo_name_cleaned = re.search(r'https://(.*).git',repo['clone_url']).group(1)
f.write(repo_name_cleaned + "\n")
# determine if pagination has ended or not. If there are more pages
# to return, the API JSON will include a 'next' field
if "next" not in response.links:
break
print("FINISHED DATA COLLECTION")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment