Skip to content

Instantly share code, notes, and snippets.

@iguatemigarin
Last active July 12, 2024 17:12
Show Gist options
  • Save iguatemigarin/93e61d639953836c208339bdb6085a40 to your computer and use it in GitHub Desktop.
Save iguatemigarin/93e61d639953836c208339bdb6085a40 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import os
import argparse
import sys
import subprocess
from pathlib import Path
def get_git_root(path):
try:
return subprocess.check_output(['git', 'rev-parse', '--show-toplevel'],
cwd=path, stderr=subprocess.DEVNULL,
universal_newlines=True).strip()
except subprocess.CalledProcessError:
return None
def is_ignored(path):
try:
subprocess.check_output(['git', 'check-ignore', '-q', path], stderr=subprocess.DEVNULL)
return True
except subprocess.CalledProcessError:
return False
def combine_files(folder_path, output_file, max_size, filetypes):
current_size = 0
git_root = get_git_root(folder_path)
if not git_root:
print("Warning: Not a git repository. Ignoring .gitignore rules.", file=sys.stderr)
try:
with open(output_file, 'w', encoding='utf-8') as outfile:
for root, dirs, files in os.walk(folder_path, topdown=True):
dirs[:] = [d for d in dirs if not is_ignored(os.path.join(root, d))]
for file in files:
filepath = os.path.join(root, file)
if is_ignored(filepath):
print(f"Ignoring: {filepath}")
continue
if filetypes is None or '*' in filetypes or any(file.endswith('.' + ft.strip()) for ft in filetypes):
rel_path = os.path.relpath(filepath, folder_path)
try:
with open(filepath, 'r', encoding='utf-8') as infile:
content = infile.read()
file_size = len(content.encode('utf-8'))
if current_size + file_size > max_size:
print(f"Reached size limit at {rel_path}")
return
outfile.write(f"<file path=\"{rel_path}\">\n\n")
outfile.write(content)
outfile.write('\n\n</file>\n\n')
current_size += file_size
print(f"Added: {rel_path}")
except IOError as e:
print(f"Error reading file {rel_path}: {e}", file=sys.stderr)
except UnicodeDecodeError:
print(f"Skipping binary file: {rel_path}", file=sys.stderr)
print(f"Combined files written to {output_file}")
except IOError as e:
print(f"Error writing to output file {output_file}: {e}", file=sys.stderr)
def parse_filetypes(filetypes_str):
return [ft.strip().lower() for ft in filetypes_str.split(',')] if filetypes_str else None
def main():
parser = argparse.ArgumentParser(
description="Combine files from a folder into a single file with a size limit, respecting Git rules.",
epilog="""
Examples:
concat ./myproject # Combine all files in ./myproject
concat -t ts,js ./src # Combine .ts and .js files in ./src
concat -o combined.txt -s 2 ./docs # Combine files in ./docs, max 2GB, output to combined.txt
concat -t md,txt -o readme.md ./content # Combine markdown and text files in ./content to readme.md
concat -t * ./src # Combine all files in ./src, regardless of extension
""",
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("folder_path", help="Path to the folder containing files")
parser.add_argument("-o", "--output", default="combined_output.txt", help="Output file name (default: combined_output.txt)")
parser.add_argument("-s", "--size", type=int, default=5, help="Maximum size limit in GB (default: 5)")
parser.add_argument("-t", "--filetypes", type=parse_filetypes, help="Comma-separated file types to include (e.g., ts,js,html), use '*' for all files")
args = parser.parse_args()
if not os.path.isdir(args.folder_path):
print(f"Error: '{args.folder_path}' is not a valid directory.", file=sys.stderr)
sys.exit(1)
max_size = 1024 * 1024 * 1024 * args.size # Convert GB to bytes
combine_files(args.folder_path, args.output, max_size, args.filetypes)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment