Skip to content

Instantly share code, notes, and snippets.

@pdashford
Created January 18, 2017 04:01
Show Gist options
  • Save pdashford/2e4bcd4fc2343e2fd03efe4da17f577d to your computer and use it in GitHub Desktop.
Save pdashford/2e4bcd4fc2343e2fd03efe4da17f577d to your computer and use it in GitHub Desktop.
Python script to download folders from Github repo
"""
Downloads folders from github repo
Requires PyGithub
pip install PyGithub
"""
import os
import sys
import base64
import shutil
import getopt
from github import Github
from github import GithubException
def get_sha_for_tag(repository, tag):
"""
Returns a commit PyGithub object for the specified repository and tag.
"""
branches = repository.get_branches()
matched_branches = [match for match in branches if match.name == tag]
if matched_branches:
return matched_branches[0].commit.sha
tags = repository.get_tags()
matched_tags = [match for match in tags if match.name == tag]
if not matched_tags:
raise ValueError('No Tag or Branch exists with that name')
return matched_tags[0].commit.sha
def download_directory(repository, sha, server_path):
"""
Download all contents at server_path with commit tag sha in
the repository.
"""
if os.path.exists(server_path):
shutil.rmtree(server_path)
os.makedirs(server_path)
contents = repository.get_dir_contents(server_path, ref=sha)
for content in contents:
print "Processing %s" % content.path
if content.type == 'dir':
os.makedirs(content.path)
download_directory(repository, sha, content.path)
else:
try:
path = content.path
file_content = repository.get_contents(path, ref=sha)
file_data = base64.b64decode(file_content.content)
file_out = open(content.path, "w+")
file_out.write(file_data)
file_out.close()
except (GithubException, IOError) as exc:
print('Error processing %s: %s', content.path, exc)
def usage():
"""
Prints the usage command lines
"""
print "usage: gh-download --token=token --org=org --repo=repo --branch=branch --folder=folder"
def main(argv):
"""
Main function block
"""
try:
opts, args = getopt.getopt(argv, "t:o:r:b:f:", ["token=", "org=", "repo=", "branch=", "folder="])
except getopt.GetoptError as err:
print str(err)
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ("-t", "--token"):
token = arg
elif opt in ("-o", "--org"):
org = arg
elif opt in ("-r", "--repo"):
repo = arg
elif opt in ("-b", "--branch"):
branch = arg
elif opt in ("-f", "--folder"):
folder = arg
github = Github(token)
organization = github.get_organization(org)
repository = organization.get_repo(repo)
sha = get_sha_for_tag(repository, branch)
download_directory(repository, sha, folder)
if __name__ == "__main__":
"""
Entry point
"""
main(sys.argv[1:])
@aayevtushenko
Copy link

Nice!
Pretty basic question, but how do I use this in another python file via import?

@RuihanGao
Copy link

Nice!
Pretty basic question, but how do I use this in another python file via import?

For me, running the following command in terminal does the work

python gh-download.py --token=token --org=org --repo=repo --branch=branch --folder=folder

@itamar121
Copy link

How do you run this line in terminal that begins with C:Users\Admin>
and starting python the >>> do not accept this line
Need to have file.py that contains this line and the run it ?
how do specify the repo or the particular code to be processed by python ?
Thank you

@RuihanGao
Copy link

How do you run this line in terminal that begins with C:Users\Admin>
and starting python the >>> do not accept this line
Need to have file.py that contains this line and the run it ?
how do specify the repo or the particular code to be processed by python ?
Thank you

Hi I run it in Linux terminal. You may want to find a way to run the python code in Windows.

@pjcpjc
Copy link

pjcpjc commented Apr 14, 2021

This helped me a lot, thanks!
My version of download_directory is a bit shorter. Mine doesn't make a directory, then delete a directory, then make the directory again, when executing the if content.type == 'dir':clause. But overall, great code!

@RenanBancke
Copy link

Sorry for my ignorance, which fields do i need to fill with the repository information? The values inside the parentheses (token,org,repo..)?

github = Github(token)
organization = github.get_organization(org)
repository = organization.get_repo(repo)
sha = get_sha_for_tag(repository, branch)
download_directory(repository, sha, folder)

@pjcpjc
Copy link

pjcpjc commented Mar 30, 2022

Hey @RenanBancke - here is a short snippet of my production code, with the values explained line by line. It's not the same as your example but perhaps it's close enough to help you (or someone else).

    g = github.Github(token)

Here, token is a string of length 40 starting with "ghp_". Look here for more information.

    user = g.get_user(user_str)
    repo = user.get_repo(repo_str)

Here user_str is "pjcpjc" and repo_str is "tts_netflow". This is referring to the public repo here. I believe it all works similarly if you want to refer to repos owned by an organization as opposed to a repo owned by a user.

    sha = _get_sha_for_tag(repo, release_tag)

Here, release_tag is "0.0.2". So I'm going to get the files from here. I believe you can use all sorts of identifying strings for this second argument, to include the long randomized SHA strings.

From here my code gets more complicated, but there is a line that's like download_directory(repository, sha, folder), with folder being "tts_netflow". Note that in this case, I'm referring to the directory named "tts_netflow" and not the repo named "tts_netflow". Specifically, I'm downloading this directory. If I were to pass "test_tts_netflow" as the folder then I would be downloading this directory.

Hopefully that helps. At any rate, it helped me review my production code.

@leynier
Copy link

leynier commented Aug 21, 2022

# This code was copied from
# https://gist.github.com/pdashford/2e4bcd4fc2343e2fd03efe4da17f577d
# and modified to work with Python 3, type hints, correct format and
# simplified the code to our needs.

"""
Downloads folders from github repo
Requires PyGithub
pip install PyGithub
"""

import base64
import getopt
import os
import shutil
import sys
from typing import Optional

from github import Github, GithubException
from github.ContentFile import ContentFile
from github.Repository import Repository


def get_sha_for_tag(repository: Repository, tag: str) -> str:
    """
    Returns a commit PyGithub object for the specified repository and tag.
    """
    branches = repository.get_branches()
    matched_branches = [match for match in branches if match.name == tag]
    if matched_branches:
        return matched_branches[0].commit.sha

    tags = repository.get_tags()
    matched_tags = [match for match in tags if match.name == tag]
    if not matched_tags:
        raise ValueError("No Tag or Branch exists with that name")
    return matched_tags[0].commit.sha


def download_directory(repository: Repository, sha: str, server_path: str) -> None:
    """
    Download all contents at server_path with commit tag sha in
    the repository.
    """
    if os.path.exists(server_path):
        shutil.rmtree(server_path)

    os.makedirs(server_path)
    contents = repository.get_dir_contents(server_path, ref=sha)

    for content in contents:
        print("Processing %s" % content.path)
        if content.type == "dir":
            os.makedirs(content.path)
            download_directory(repository, sha, content.path)
        else:
            try:
                path = content.path
                file_content = repository.get_contents(path, ref=sha)
                if not isinstance(file_content, ContentFile):
                    raise ValueError("Expected ContentFile")
                file_out = open(content.path, "w+")
                if file_content.content:
                    file_data = base64.b64decode(file_content.content)
                    file_out.write(file_data.decode("utf-8"))
                file_out.close()
            except (GithubException, IOError, ValueError) as exc:
                print("Error processing %s: %s", content.path, exc)


def usage():
    """
    Prints the usage command lines
    """
    print("usage: gh-download --repo=repo --branch=branch --folder=folder")


def main(argv):
    """
    Main function block
    """
    try:
        opts, _ = getopt.getopt(argv, "r:b:f:", ["repo=", "branch=", "folder="])
    except getopt.GetoptError as err:
        print(str(err))
        usage()
        sys.exit(2)
    repo: Optional[str] = None
    branch: Optional[str] = None
    folder: Optional[str] = None
    for opt, arg in opts:
        if opt in ("-r", "--repo"):
            repo = arg
        elif opt in ("-b", "--branch"):
            branch = arg
        elif opt in ("-f", "--folder"):
            folder = arg

    if not repo:
        print("Repo is required")
        usage()
        sys.exit(2)
    if not branch:
        print("Branch is required")
        usage()
        sys.exit(2)
    if not folder:
        print("Folder is required")
        usage()
        sys.exit(2)

    github = Github(None)
    repository = github.get_repo(repo)
    sha = get_sha_for_tag(repository, branch)
    download_directory(repository, sha, folder)


if __name__ == "__main__":
    """
    Entry point
    """
    main(sys.argv[1:])

@Gkquic
Copy link

Gkquic commented Apr 3, 2023

Hey here is short version of this code
import os
import sys
import base64
import shutil
import argparse
from github import Github

def get_sha_for_tag(repository, tag):
"""
Returns a commit PyGithub object for the specified repository and tag.
"""
branches = repository.get_branches()
matched_branches = [match for match in branches if match.name == tag]
if matched_branches:
return matched_branches[0].commit.sha

tags = repository.get_tags()
matched_tags = [match for match in tags if match.name == tag]
if not matched_tags:
    raise ValueError('No Tag or Branch exists with that name')
return matched_tags[0].commit.sha

def download_directory(repository, sha, server_path):
"""
Download all contents at server_path with commit tag sha in
the repository.
"""
if os.path.exists(server_path):
shutil.rmtree(server_path)

os.makedirs(server_path)
contents = repository.get_dir_contents(server_path, ref=sha)

for content in contents:
    print(f"Processing {content.path}")
    if content.type == 'dir':
        os.makedirs(content.path)
        download_directory(repository, sha, content.path)
    else:
        try:
            path = content.path
            file_content = repository.get_contents(path, ref=sha)
            file_data = base64.b64decode(file_content.content)
            with open(content.path, "w+") as file_out:
                file_out.write(file_data)
        except Exception as exc:
            print(f"Error processing {content.path}: {exc}")

def parse_args():
"""
Parses command line arguments.
"""
parser = argparse.ArgumentParser(description='Download a directory from a Github repo.')
parser.add_argument('--token', required=True, help='Github API token')
parser.add_argument('--org', required=True, help='Github organization')
parser.add_argument('--repo', required=True, help='Github repository')
parser.add_argument('--branch', required=True, help='Github branch or tag')
parser.add_argument('--folder', required=True, help='Folder to download')
return parser.parse_args()

def main():
"""
Main function block
"""
args = parse_args()
github = Github(args.token)
organization = github.get_organization(args.org)
repository = organization.get_repo(args.repo)
sha = get_sha_for_tag(repository, args.branch)
download_directory(repository, sha, args.folder)

if name == "main":
"""
Entry point
"""
main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment