jspeed-meyers/get_github_org_repos.py

## get_github_org_repos.py
# collect all non-archived repo names associated with one GitHub organization and
# save in text file.
#
# USAGE:
#
# export GITHUB_AUTH_TOKEN=lkdjflkdjglkdjlkjg
#
# python get_org_repos.py
#
# NOTE:
#
# change ORG variable within script to set organization to analyze
#

import json
import os
import re

import requests

print("INITIATING DATA COLLECTION")

GITHUB_USERNAME = "jspeed-meyers"
GITHUB_TOKEN = os.environ.get("GITHUB_AUTH_TOKEN")

# GitHub organization to analyze
ORG = "eclipse"

# use pagination to get all repos associated with an organization,
# not just top 100
for page in range(1, 100):
        response = requests.get(
            "https://api.github.com/orgs/"
            + ORG
            + "/repos?page="
            + str(page)
            + "&per_page=100", # number of results per page
            # convert username and token to strings per requests's specifications
            auth=(str(GITHUB_USERNAME), str(GITHUB_TOKEN)),
        )

        if response.ok:
            repos= json.loads(response.text or response.content)
            with open("results/" + ORG + ".-repos.txt", 'a') as f:
                for repo in repos:
                    # do not collect repo name if repo is archived
                    if not repo['archived']:
                        # remove https:// and .git from repo name
                        repo_name_cleaned = re.search(r'https://(.*).git',repo['clone_url']).group(1)
                        f.write(repo_name_cleaned + "\n")

        # determine if pagination has ended or not. If there are more pages
        # to return, the API JSON will include a 'next' field
        if "next" not in response.links:
            break

print("FINISHED DATA COLLECTION")
	# collect all non-archived repo names associated with one GitHub organization and
	# save in text file.
	#
	# USAGE:
	#
	# export GITHUB_AUTH_TOKEN=lkdjflkdjglkdjlkjg
	#
	# python get_org_repos.py
	#
	# NOTE:
	#
	# change ORG variable within script to set organization to analyze
	#

	import json
	import os
	import re

	import requests

	print("INITIATING DATA COLLECTION")

	GITHUB_USERNAME = "jspeed-meyers"
	GITHUB_TOKEN = os.environ.get("GITHUB_AUTH_TOKEN")

	# GitHub organization to analyze
	ORG = "eclipse"

	# use pagination to get all repos associated with an organization,
	# not just top 100
	for page in range(1, 100):
	response = requests.get(
	"https://api.github.com/orgs/"
	+ ORG
	+ "/repos?page="
	+ str(page)
	+ "&per_page=100", # number of results per page
	# convert username and token to strings per requests's specifications
	auth=(str(GITHUB_USERNAME), str(GITHUB_TOKEN)),
	)

	if response.ok:
	repos= json.loads(response.text or response.content)
	with open("results/" + ORG + ".-repos.txt", 'a') as f:
	for repo in repos:
	# do not collect repo name if repo is archived
	if not repo['archived']:
	# remove https:// and .git from repo name
	repo_name_cleaned = re.search(r'https://(.*).git',repo['clone_url']).group(1)
	f.write(repo_name_cleaned + "\n")

	# determine if pagination has ended or not. If there are more pages
	# to return, the API JSON will include a 'next' field
	if "next" not in response.links:
	break

	print("FINISHED DATA COLLECTION")