Skip to content

Instantly share code, notes, and snippets.

@tianchaijz
Last active August 8, 2022 04:17
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save tianchaijz/cb30e71d6f1fb85e954bca8454942c87 to your computer and use it in GitHub Desktop.
Save tianchaijz/cb30e71d6f1fb85e954bca8454942c87 to your computer and use it in GitHub Desktop.
A python script to clone a specified github user's repos, gists.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
import time
import json
import logging
import requests
import multiprocessing.dummy
from subprocess import PIPE, Popen
API = 'api.github.com'
USER = None
TOKEN = os.environ.get('GITHUB_TOKEN')
KIND = None
DEBUG = None
ERR = None
LIMIT = 16 # M
POOL = multiprocessing.dummy.Pool(5)
DEFAULT_FMT = '%(levelname)-8s [%(asctime)s] %(name)s: %(message)s'
class Formatter(logging.Formatter):
def __init__(self, datefmt=None):
logging.Formatter.__init__(self, DEFAULT_FMT, datefmt)
self.converter = time.gmtime
def formatException(self, exc_info):
text = logging.Formatter.formatException(self, exc_info)
text = '\n'.join(('! %s' % line) for line in text.splitlines())
return text
def get_console_logger(name):
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(Formatter())
logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)
logger.addHandler(ch)
return logger
def shell(command, stdin=None):
process = Popen(
args=command,
stdout=PIPE,
stderr=PIPE,
stdin=stdin,
shell=True,
close_fds=True
)
std = process.communicate()
if std[0]:
DEBUG(std[0])
if std[1]:
ERR(std[1])
process.wait()
def parse_options():
global TOKEN, USER, KIND, LIMIT
from optparse import OptionParser
parser = OptionParser()
parser.add_option('-t', dest='token', action='store', type='string',
default=TOKEN)
parser.add_option('-u', dest='user', action='store', type='string',
help='specify the config file')
parser.add_option('-k', dest='kind', action='store', type='string',
help='specify the kind(gists, repos)')
parser.add_option('--limit', dest='limit', action='store', type='int',
help='specify the limit size of clone', default=LIMIT)
options, _ = parser.parse_args()
assert options.token, 'token required'
assert options.user, 'user required'
assert options.kind, 'kind required'
USER, TOKEN, KIND = options.user, options.token, options.kind
if options.limit != LIMIT:
LIMIT = options.limit
def get_headers():
headers = {
'Host': API,
'Accept': 'application/vnd.github.v3+json',
'Authorization': 'token ' + TOKEN,
'User-Agent': 'tianchaijz/clone-it'
}
return headers
def do_request(page):
url = 'https://%s/users/%s/%s?page=%d' % (API, USER, KIND, page)
r = requests.get(url, headers=get_headers())
return r
def shell_clone(url):
DEBUG('cloning: %s' % url)
path = re.sub(r'\.git$', '', url).split('/')[-1]
if os.path.isdir(path):
ERR('directory exists: %s, pull now' % (url))
shell('cd %s && git pull --all' % path)
return
shell('git clone %s' % url)
class Clone(object):
@staticmethod
def gists(gist):
shell_clone(gist['git_pull_url'])
@staticmethod
def repos(repo):
url = repo['git_url']
lang = os.environ.get('GITHUB_LANG', '').strip()
if lang:
lang = lang.lower().split(',')
repo_lang = str(repo['language'])
if repo_lang.lower() not in lang:
DEBUG('skip: %s, language: %s' % (url, repo_lang))
return
if repo['size'] / 1024 > LIMIT:
DEBUG('skip: %s, size: %d' % (url, repo['size']))
return
shell_clone(url)
def clone(tasks):
POOL.map(getattr(Clone, KIND), tasks)
def fetch(page=0):
page += 1
r = do_request(page)
if r is None:
return
DEBUG('get page: %d, status: %d, content length: %d' % (
page, r.status_code, len(r.content)))
tasks = json.loads(r.content)
if tasks:
clone(tasks)
fetch(page)
def main():
global DEBUG, ERR
logger = get_console_logger('git/clone-it')
DEBUG = lambda s: logger.log(logging.DEBUG, s)
ERR = lambda s: logger.log(logging.ERROR, s)
parse_options()
fetch()
if __name__ == '__main__':
main()
@AllenLiuNZ
Copy link

What does kind mean? What are the possible values of it? I just want to download all gists.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment