keithstellyes/gh.py

## gh.py
#!/usr/bin/env python3
import shutil, os, sys, json, requests, getpass

def ensure_dir(dir):
    try:
        os.mkdir(dir)
    except FileExistsError:
        # It's OK if dir doesn't exist, it's just cleaner for code to have an
        # "ensure" function
        pass

'''
Returns a list of files and directories on an api.github.com/repos/ URL.
On failure it will return a tuple.
'''
def gh_ls(url, auth=None):
    if url.lstrip('https://').startswith('github'):
        return (-1, 'Invalid Repo API URL, for example should look like:' +
            'https://api.github.com/repos/qt/qtbase/contents/examples ' +
            'For the examples/ dir, on the repo qt/qtbase, its normal URL ' +
            'being https://github.com/qt/qtbase/tree/dev/examples')
    # Ideally, we have some sort of re-try logic, however such logic needs to
    # be smart enough to not keep pounding GitHub on API Rate Limits being
    # exceeded.
    r = requests.get(url, auth=auth)
    if r.status_code != 200:
        return r.status_code, r.text
    resp = {}
    files_dirs = json.loads(r.text)
    for fd in files_dirs:
        # delete keys we don't care about to reduce noise
        del fd['url']
        del fd['git_url']
        del fd['sha']
        del fd['_links']
        if fd['type'] not in resp.keys():
            resp[fd['type']] = []
        resp[fd['type']].append(fd)
    return resp

if __name__ == '__main__':
    url = sys.argv[1]
    auth = None

    if len(sys.argv) > 2:
        auth = (sys.argv[2], sys.argv[3])
    else:
        auth = (input('GitHub username:'), getpass.getpass())

    q = [(url, os.curdir)]
    while q != []:
        nxt = q.pop()
        url = nxt[0]
        curdir = nxt[1]
        fd = gh_ls(url, auth)
        entries = []
        if type(fd) != dict:
            print('Error downloading! Assuming I should exist... Details:', str(fd))
            sys.exit(1)
        if 'file' in fd.keys():
            for f in fd['file']: entries.append(curdir + '/' + str(f['name']))
        if 'dir' in fd.keys():
            for d in fd['dir']:
                entries.append(curdir + '/' + d['name'] + '/')
        print('\n'.join(sorted(entries)))
        if 'dir' in fd.keys():
            for d in fd['dir']:
                ensure_dir(curdir + '/' + d['name'])
                q.append((url + '/' + d['name'], curdir + '/' + d['name']))
        if 'file' not in fd.keys():
            continue
        for f in fd['file']:
            fpath = curdir + '/' + f['name']
            print('Trying to download:', fpath)
            if os.path.exists(fpath):
                print(fpath, 'already exists! Skipping...')
                continue
            if f['download_url'] is None:
                print('{}/{} did not have a download url as expected!'.format(curdir,
                    f['name']))
            dl_url = f['download_url']

            ## Requests is a little ugly for downloading non-text data, TBH ##
            # stream=True is necessary for downloading binary data
            r = requests.get(dl_url, stream=True)
            if r.status_code != 200:
                # We exit here since it's presumed if the dl fails then things
                # are broken beyond our control.
                print('Failed to download', dl_url)
                sys.exit(1)
            # GitHub likes sending responses as zip blobs it seems,
            # so we need requests to decompress
            r.raw.decode_content = True
            with open(fpath, 'wb') as f:
                shutil.copyfileobj(r.raw, f)
	#!/usr/bin/env python3
	import shutil, os, sys, json, requests, getpass

	def ensure_dir(dir):
	try:
	os.mkdir(dir)
	except FileExistsError:
	# It's OK if dir doesn't exist, it's just cleaner for code to have an
	# "ensure" function
	pass

	'''
	Returns a list of files and directories on an api.github.com/repos/ URL.
	On failure it will return a tuple.
	'''
	def gh_ls(url, auth=None):
	if url.lstrip('https://').startswith('github'):
	return (-1, 'Invalid Repo API URL, for example should look like:' +
	'https://api.github.com/repos/qt/qtbase/contents/examples ' +
	'For the examples/ dir, on the repo qt/qtbase, its normal URL ' +
	'being https://github.com/qt/qtbase/tree/dev/examples')
	# Ideally, we have some sort of re-try logic, however such logic needs to
	# be smart enough to not keep pounding GitHub on API Rate Limits being
	# exceeded.
	r = requests.get(url, auth=auth)
	if r.status_code != 200:
	return r.status_code, r.text
	resp = {}
	files_dirs = json.loads(r.text)
	for fd in files_dirs:
	# delete keys we don't care about to reduce noise
	del fd['url']
	del fd['git_url']
	del fd['sha']
	del fd['_links']
	if fd['type'] not in resp.keys():
	resp[fd['type']] = []
	resp[fd['type']].append(fd)
	return resp

	if __name__ == '__main__':
	url = sys.argv[1]
	auth = None

	if len(sys.argv) > 2:
	auth = (sys.argv[2], sys.argv[3])
	else:
	auth = (input('GitHub username:'), getpass.getpass())

	q = [(url, os.curdir)]
	while q != []:
	nxt = q.pop()
	url = nxt[0]
	curdir = nxt[1]
	fd = gh_ls(url, auth)
	entries = []
	if type(fd) != dict:
	print('Error downloading! Assuming I should exist... Details:', str(fd))
	sys.exit(1)
	if 'file' in fd.keys():
	for f in fd['file']: entries.append(curdir + '/' + str(f['name']))
	if 'dir' in fd.keys():
	for d in fd['dir']:
	entries.append(curdir + '/' + d['name'] + '/')
	print('\n'.join(sorted(entries)))
	if 'dir' in fd.keys():
	for d in fd['dir']:
	ensure_dir(curdir + '/' + d['name'])
	q.append((url + '/' + d['name'], curdir + '/' + d['name']))
	if 'file' not in fd.keys():
	continue
	for f in fd['file']:
	fpath = curdir + '/' + f['name']
	print('Trying to download:', fpath)
	if os.path.exists(fpath):
	print(fpath, 'already exists! Skipping...')
	continue
	if f['download_url'] is None:
	print('{}/{} did not have a download url as expected!'.format(curdir,
	f['name']))
	dl_url = f['download_url']

	## Requests is a little ugly for downloading non-text data, TBH ##
	# stream=True is necessary for downloading binary data
	r = requests.get(dl_url, stream=True)
	if r.status_code != 200:
	# We exit here since it's presumed if the dl fails then things
	# are broken beyond our control.
	print('Failed to download', dl_url)
	sys.exit(1)
	# GitHub likes sending responses as zip blobs it seems,
	# so we need requests to decompress
	r.raw.decode_content = True
	with open(fpath, 'wb') as f:
	shutil.copyfileobj(r.raw, f)