Skip to content

Instantly share code, notes, and snippets.

@spirkaa
Last active May 6, 2023 05:44
Show Gist options
  • Save spirkaa/78d11eadc5e59c17821eed98cde3be71 to your computer and use it in GitHub Desktop.
Save spirkaa/78d11eadc5e59c17821eed98cde3be71 to your computer and use it in GitHub Desktop.
Clone/pull multiple git repos simultaneously with ThreadPool
#!/usr/bin/env python3
"""Parse json and clone repos in specific directory."""
import json
import logging
import subprocess
from multiprocessing.pool import ThreadPool
from pathlib import Path
logger = logging.getLogger(__name__)
DATA_FILE = "data.json"
REPOS_DIR = "repos"
def get_data(file_path: str) -> dict:
"""Get data from json."""
content = Path(file_path).read_text()
json_data = json.loads(content)
json_data["user_repositories"] = sorted(
json_data["user_repositories"], key=lambda d: d["repo"].lower()
)
json_data["chart_repositories"] = sorted(
json_data["chart_repositories"], key=lambda d: d["repo"].lower()
)
for user_repo in json_data["user_repositories"]:
if "url" not in user_repo or not user_repo["url"]:
user_repo["url"] = f"https://github.com/{user_repo['repo']}"
for chart_repo in json_data["chart_repositories"]:
if "url" not in chart_repo or not chart_repo["url"]:
chart_repo["url"] = f"https://github.com/{chart_repo['repo']}"
return json_data
def pull_repo(dir_name: str) -> None:
"""Pull repo."""
try:
logger.info("%s", dir_name)
git = subprocess.run(
[
"git",
"-C",
dir_name,
"pull",
],
capture_output=True,
check=True,
encoding="utf-8",
)
print(git.stdout)
except subprocess.CalledProcessError as e:
print(e.stderr)
def clone_repo(repo_url: str, target_dir: str) -> None:
"""Clone repo."""
try:
logger.info("%s", repo_url)
git = subprocess.run(
[
"git",
"clone",
repo_url,
target_dir,
],
capture_output=True,
check=True,
encoding="utf-8",
)
print(git.stdout)
except subprocess.CalledProcessError as e:
if "already exists" in e.stderr:
pull_repo(target_dir)
else:
print(e.stderr)
def worker(repo: dict) -> None:
"""Thread worker."""
repo_name = repo["repo"].replace("/", "-")
target_dir = str(Path(REPOS_DIR, repo_name))
if "gitops_tool" in repo:
target_dir = str(Path(REPOS_DIR, repo["gitops_tool"], repo_name))
if "github.com" in repo["url"]:
clone_repo(repo["url"], target_dir)
def main(treads_num: int) -> None:
"""Main function."""
repos = get_data(DATA_FILE)
for data in repos.values():
with ThreadPool(treads_num) as pool:
pool.map(worker, data)
if __name__ == "__main__":
__version__ = "0.0.1"
logging.basicConfig(
format="%(asctime)s [%(levelname)8s] [%(name)s:%(lineno)s:%(funcName)20s()] --- %(message)s",
level=logging.DEBUG,
)
main(8)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment