Skip to content

Instantly share code, notes, and snippets.

@stuaxo
Last active May 11, 2024 10:52
Show Gist options
  • Save stuaxo/f1beb981dc4845921c31fcb4e16f4821 to your computer and use it in GitHub Desktop.
Save stuaxo/f1beb981dc4845921c31fcb4e16f4821 to your computer and use it in GitHub Desktop.
Output files in subdirectories for ingestion to an LLM such as Claude, ChatGPT etc.
#!/usr/bin/env python3
# Usage: python dirtollm.py [--dir /path/to/directory] [--glob "*.py"] [--prompt [Custom prompt]] [--exclude "*.pyc"] [--copy] [--list]
import argparse
import pathlib
import fnmatch
import shlex
try:
import pyperclip
except ImportError:
pyperclip = None
def append_file_content(output, path):
try:
content = path.read_text()
except UnicodeDecodeError:
#content = "Skipped (binary file)\n\n"
content = ""
except Exception as ex:
content = f"Skipped (error reading file: {ex})\n\n"
output += f"#:{path}:\n{content}\n\n"
return output
def fn_match_multiple(file, *patterns):
return any(fnmatch.fnmatch(file, pattern) for pattern in patterns)
def dirtollm(output, directory, globs, excludes, listing=False):
file_count = 0
p = pathlib.Path(directory)
for child in p.iterdir():
if child.is_dir() and not fn_match_multiple(pathlib.Path(child).name, *excludes):
output, sub_file_count = dirtollm(output, child, globs, excludes, listing=listing)
file_count += sub_file_count
for glob_pattern in globs:
for child in p.glob(glob_pattern):
if child.is_file() and not fn_match_multiple(child, *excludes):
if listing:
output += f"{child}\n"
else:
output = append_file_content(output, child)
file_count += 1
return output, file_count
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process some files.')
parser.add_argument('--dir', type=str, help='Directory to process', default=".")
parser.add_argument('--exclude', nargs='+', type=str, help='Glob pattern to exclude', default=[])
parser.add_argument('--glob', nargs='+', type=str, help='Glob pattern to match', default="*")
parser.add_argument('--prompt', nargs='?', type=str, const="Filenames followed by file content-:", default=None, help='Specify prompt text to output before the files.')
parser.add_argument('--count', action='store_true', help='Display the count of files, bytes, and tokens processed')
parser.add_argument('--copy', action='store_true', help='Copy output to the clipboard instead of stdout')
parser.add_argument('--list', action='store_true', help='List all files that match the patterns')
args = parser.parse_args()
output = ""
if args.prompt is not None:
output += args.prompt + "\n"
output, file_count = dirtollm(output, args.dir, args.glob, args.exclude, listing=args.list)
outpit = output.rstrip("\n")
token_count = len(output.split())
if args.count:
print(f"Processed {file_count} files, {len(output)} bytes, and approximately {token_count} tokens.")
elif args.copy:
if pyperclip:
pyperclip.copy(output)
print(f"Copied {file_count} files, {len(output)} bytes, and approximately {token_count} tokens to the clipboard.")
else:
print("The --copy option requires the 'pyperclip' module. Please install it to use this functionality.")
else:
print(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment