Skip to content

Instantly share code, notes, and snippets.

@ashgansh
Last active June 10, 2024 10:42
Show Gist options
  • Save ashgansh/5660ba3f89ce0f1c43a38b66da1adbd7 to your computer and use it in GitHub Desktop.
Save ashgansh/5660ba3f89ce0f1c43a38b66da1adbd7 to your computer and use it in GitHub Desktop.
Printing an entire codebase
import os
import subprocess
import argparse
import fnmatch
from urllib.parse import urlparse
EXCLUDE_DIRS = ['node_modules', 'dist', 'build', '.git', 'venv']
SOURCE_CODE_EXTENSIONS = ['.py', '.md','.mdx', '.js', '.java', '.cpp', '.c', '.h', '.cs', '.rb', '.go', '.rs', '.ts', '.php', '.html', '.css']
def clone_repo(url, clone_dir):
"""
Clones a GitHub repository to a specified directory.
"""
subprocess.run(['git', 'clone', url, clone_dir], check=True)
def filter_and_print_files(root_dir, filename_filter=None, subfolder_filter=None):
"""
Filters and prints the content of source code files in the cloned repository.
"""
for root, dirs, files in os.walk(root_dir):
dirs[:] = [d for d in dirs if d not in EXCLUDE_DIRS]
if subfolder_filter and not root.startswith(os.path.join(root_dir, subfolder_filter)):
continue
for file in files:
if not any(file.endswith(ext) for ext in SOURCE_CODE_EXTENSIONS):
continue
if filename_filter and not fnmatch.fnmatch(file, filename_filter):
continue
file_path = os.path.join(root, file)
print(f"File: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f:
print(f.read())
print("\n" + "="*80 + "\n")
def parse_github_url(url):
"""
Parses the GitHub URL to extract owner, repo, branch, and subfolder.
"""
parsed_url = urlparse(url)
path_parts = parsed_url.path.strip('/').split('/')
if len(path_parts) < 2:
raise ValueError("Invalid GitHub URL. Expected format: https://github.com/owner/repo")
owner = path_parts[0]
repo = path_parts[1]
branch = 'main'
subfolder = ''
if 'tree' in path_parts and len(path_parts) > 3:
branch = path_parts[path_parts.index('tree') + 1]
subfolder = '/'.join(path_parts[path_parts.index('tree') + 2:])
repo_url = f"https://github.com/{owner}/{repo}.git"
return repo_url, branch, subfolder
def main():
parser = argparse.ArgumentParser(description='Clone a GitHub repository and print the content of source code files.')
parser.add_argument('url', type=str, help='GitHub repository URL')
parser.add_argument('--filter', type=str, help='Filename filter (optional)', default=None)
parser.add_argument('--dest', type=str, help='Destination directory (optional)', default='cloned_repo')
args = parser.parse_args()
repo_url, branch, subfolder = parse_github_url(args.url)
clone_repo(repo_url, args.dest)
filter_and_print_files(args.dest, args.filter, subfolder)
if __name__ == "__main__":
main()

Example Usage

python print_source_code_from_repo.py repo_name

i usually pipe in to pbcopy e.g.

python https://github.com/electric-sql/electric/tree/main | pbcopy

also works with subdirs (practical for monorepos)

python print_source_code_from_repo.py repo_name https://github.com/electric-sql/electric/tree/main/examples/linearlite
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment