Skip to content

Instantly share code, notes, and snippets.

@m2kar
Created September 3, 2018 09:09
Show Gist options
  • Save m2kar/60f383847e5022061cb0394f4bbb692b to your computer and use it in GitHub Desktop.
Save m2kar/60f383847e5022061cb0394f4bbb692b to your computer and use it in GitHub Desktop.
获取某个github组织的所有库
# ! python3
"""
获取某个用户的所有库
安装依赖库:
pip install requests,bs4
运行:
python3 github_spider.py nasa
会打印出每个项目的git地址
比如下载该组织的的所有库
python3 github_spider.py nasa | xargs -n1 git clone
TODO: 扩展到用户
Author: imakar@qq.com
"""
import sys
import requests
from bs4 import BeautifulSoup
def get_org_pages_count(org):
homepage_url="https://github.com/{name}".format(name=org)
homepage=requests.get(homepage_url)
soup0=BeautifulSoup(homepage.content,"html.parser")
page_count=int(soup0.select_one("#org-repositories > div > div > div > div > em.current")['data-total-pages'])
return page_count
def get_org_repos_in_page(org,page):
url="https://github.com/{name}?page={page}".format(name=org,page=page)
soup0=BeautifulSoup(requests.get(url).content,"html.parser")
alist=soup0.select("#org-repositories > div > div.org-repos.repo-list > li > div.d-inline-block.mb-1 > h3 > a")
return [each.text.strip() for each in alist]
def get_user_type(user):
return "org"
def print_repos(user):
if get_user_type(sys.argv[1])=="org":
for i in range(1,get_org_pages_count(user)):
for each in get_org_repos_in_page(user,i):
print("https://github.com/{user}/{repo}.git".format(user=user,repo=each))
if __name__=="__main__":
if len(sys.argv)!=2:
raise ValueError("arguments count must be 2")
user=sys.argv[1]
print_repos(user)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment