Last active
July 12, 2024 17:12
-
-
Save iguatemigarin/93e61d639953836c208339bdb6085a40 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import argparse | |
import sys | |
import subprocess | |
from pathlib import Path | |
def get_git_root(path): | |
try: | |
return subprocess.check_output(['git', 'rev-parse', '--show-toplevel'], | |
cwd=path, stderr=subprocess.DEVNULL, | |
universal_newlines=True).strip() | |
except subprocess.CalledProcessError: | |
return None | |
def is_ignored(path): | |
try: | |
subprocess.check_output(['git', 'check-ignore', '-q', path], stderr=subprocess.DEVNULL) | |
return True | |
except subprocess.CalledProcessError: | |
return False | |
def combine_files(folder_path, output_file, max_size, filetypes): | |
current_size = 0 | |
git_root = get_git_root(folder_path) | |
if not git_root: | |
print("Warning: Not a git repository. Ignoring .gitignore rules.", file=sys.stderr) | |
try: | |
with open(output_file, 'w', encoding='utf-8') as outfile: | |
for root, dirs, files in os.walk(folder_path, topdown=True): | |
dirs[:] = [d for d in dirs if not is_ignored(os.path.join(root, d))] | |
for file in files: | |
filepath = os.path.join(root, file) | |
if is_ignored(filepath): | |
print(f"Ignoring: {filepath}") | |
continue | |
if filetypes is None or '*' in filetypes or any(file.endswith('.' + ft.strip()) for ft in filetypes): | |
rel_path = os.path.relpath(filepath, folder_path) | |
try: | |
with open(filepath, 'r', encoding='utf-8') as infile: | |
content = infile.read() | |
file_size = len(content.encode('utf-8')) | |
if current_size + file_size > max_size: | |
print(f"Reached size limit at {rel_path}") | |
return | |
outfile.write(f"<file path=\"{rel_path}\">\n\n") | |
outfile.write(content) | |
outfile.write('\n\n</file>\n\n') | |
current_size += file_size | |
print(f"Added: {rel_path}") | |
except IOError as e: | |
print(f"Error reading file {rel_path}: {e}", file=sys.stderr) | |
except UnicodeDecodeError: | |
print(f"Skipping binary file: {rel_path}", file=sys.stderr) | |
print(f"Combined files written to {output_file}") | |
except IOError as e: | |
print(f"Error writing to output file {output_file}: {e}", file=sys.stderr) | |
def parse_filetypes(filetypes_str): | |
return [ft.strip().lower() for ft in filetypes_str.split(',')] if filetypes_str else None | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Combine files from a folder into a single file with a size limit, respecting Git rules.", | |
epilog=""" | |
Examples: | |
concat ./myproject # Combine all files in ./myproject | |
concat -t ts,js ./src # Combine .ts and .js files in ./src | |
concat -o combined.txt -s 2 ./docs # Combine files in ./docs, max 2GB, output to combined.txt | |
concat -t md,txt -o readme.md ./content # Combine markdown and text files in ./content to readme.md | |
concat -t * ./src # Combine all files in ./src, regardless of extension | |
""", | |
formatter_class=argparse.RawDescriptionHelpFormatter | |
) | |
parser.add_argument("folder_path", help="Path to the folder containing files") | |
parser.add_argument("-o", "--output", default="combined_output.txt", help="Output file name (default: combined_output.txt)") | |
parser.add_argument("-s", "--size", type=int, default=5, help="Maximum size limit in GB (default: 5)") | |
parser.add_argument("-t", "--filetypes", type=parse_filetypes, help="Comma-separated file types to include (e.g., ts,js,html), use '*' for all files") | |
args = parser.parse_args() | |
if not os.path.isdir(args.folder_path): | |
print(f"Error: '{args.folder_path}' is not a valid directory.", file=sys.stderr) | |
sys.exit(1) | |
max_size = 1024 * 1024 * 1024 * args.size # Convert GB to bytes | |
combine_files(args.folder_path, args.output, max_size, args.filetypes) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment