Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
This code snippet takes a Github organization name as input, crawls for all its public repositories and returns a list of all the "Git clone URLs" for those repos.
import itertools
import re
import requests as rq
# Your Github organization (e.g. /Github)
organization = "/<company_name>"
response = rq.request("GET", "{0}".format(organization))
pages ="data-total-pages=\"(\d+)\">", response.text).group(1)
pages = 1
repositoryUrls = []
for page in range(1, int(pages) + 1):
response = rq.request("GET", "{}?page={}".format(organization, str(page)))
repositoryUrls.append(re.findall(r"itemprop=\"name codeRepository\".*href=\"" + path + "/(.*)\" class", response.text))
repositoryUrls = list(itertools.chain.from_iterable(repositoryUrls))
repositoryUrls = ["" + organization + "/{0}.git".format(repo) for repo in repositoryUrls]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment