Created
July 27, 2024 21:53
-
-
Save witt3rd/7429ed51de67094cdf8a98f782e7ba13 to your computer and use it in GitHub Desktop.
Combine file glob into a single markdown file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Recursive File Combiner to Markdown | |
This script creates a markdown file from files in a specified folder matching | |
a given glob pattern. It recursively searches for files and formats them into | |
a single markdown document with appropriate headings and code fences. | |
Usage: | |
python script_name.py <folder_path> <glob_pattern> | |
Arguments: | |
folder_path: The path to the folder containing files (e.g., ~/documents) | |
glob_pattern: The glob pattern to match files (e.g., *.txt or **/*.py) | |
Output: | |
A markdown file named '<folder_name>_files.md' in the current directory. | |
Example: | |
python script_name.py ~/documents "**/*.txt" | |
This will create a file named 'documents_files.md' containing all text files | |
from the ~/documents directory and its subdirectories, properly formatted in markdown. | |
""" | |
import argparse | |
import os | |
import glob | |
def create_markdown(folder, pattern) -> None: | |
# Normalize folder path | |
folder = os.path.expanduser(folder) | |
# Create output filename | |
output_filename = f"{os.path.basename(folder)}_files.md" | |
with open(output_filename, "w", encoding="utf-8") as outfile: | |
# Write the level one heading | |
outfile.write(f"# Files matching '{pattern}' in {os.path.basename(folder)}\n\n") | |
# Use glob to find all matching files | |
for file_path in glob.glob(os.path.join(folder, pattern), recursive=True): | |
if os.path.isfile(file_path): | |
# Calculate the relative path | |
rel_path = os.path.relpath(file_path, folder) | |
# Calculate the heading level (number of path separators + 2) | |
level = rel_path.count(os.path.sep) + 2 | |
# Write the file heading | |
outfile.write(f"{'#' * level} {rel_path}\n\n") | |
# Determine the file extension for the code fence | |
_, file_extension = os.path.splitext(file_path) | |
file_extension = file_extension.lstrip('.') | |
# Write the code fence start | |
outfile.write(f"```{file_extension}\n") | |
# Read and write the file contents | |
with open(file_path, "r", encoding="utf-8") as infile: | |
outfile.write(infile.read()) | |
# Write the code fence end | |
outfile.write("\n```\n\n") | |
print(f"Markdown file '{output_filename}' has been created.") | |
def main() -> None: | |
"""Parse command line arguments and call create_markdown.""" | |
parser = argparse.ArgumentParser( | |
description="Create a markdown file from files in a folder (recursively) matching a glob pattern." | |
) | |
parser.add_argument("folder", help="The folder to search for files") | |
parser.add_argument( | |
"pattern", | |
help="The glob pattern to match files (e.g., *.txt or **/*.py)", | |
) | |
args = parser.parse_args() | |
create_markdown(args.folder, args.pattern) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment