Skip to content

Instantly share code, notes, and snippets.

@zeroasterisk
Created May 16, 2024 18:05
Show Gist options
  • Save zeroasterisk/2a043f988c2bc3f72e8373a2c2407811 to your computer and use it in GitHub Desktop.
Save zeroasterisk/2a043f988c2bc3f72e8373a2c2407811 to your computer and use it in GitHub Desktop.
import fnmatch
import os
import re
class CodeToMarkdown:
def __init__(self, output_filename, src_folders, exclusion_patterns, inclusion_patterns):
self.output_filename = output_filename
self.src_folders = src_folders
self.exclusion_patterns = exclusion_patterns
self.inclusion_patterns = inclusion_patterns
self.included_files = 0
def process_folder(self, folder):
"""
Recursively processes a folder, extracting code from files and generating markdown content.
"""
content = ""
for entry in os.scandir(folder):
excluded = self.is_excluded(entry.path)
included = self.is_included(entry.path)
# flags = [
# "DIR " if entry.is_dir() else "",
# "EXCLUDED " if excluded else "",
# "INCLUDED " if included else "",
# ]
# print(f".{entry.path} [{' '.join(flags)}]")
if entry.is_dir() and not excluded:
# Recursively call process_folder for subdirectories
content += self.process_folder(entry.path)
elif entry.is_file() and not excluded and included:
filepath = entry.path
ext = self.get_file_extension(filepath)
content += f"## File: {filepath}\n\n"
content += f"```{ext}\n"
with open(filepath, "r") as f:
content += f.read()
content += "\n```\n\n"
self.included_files += 1
return content
def is_excluded(self, filepath):
"""
Checks if a file should be excluded based on the given patterns.
"""
for pattern in self.exclusion_patterns:
if isinstance(pattern, str):
if fnmatch.fnmatch(filepath, pattern):
return True
elif isinstance(pattern, re.Pattern):
if pattern.search(filepath):
return True
return False
def is_included(self, filepath):
"""
Checks if a file should be included based on the given patterns.
"""
for pattern in self.inclusion_patterns:
if isinstance(pattern, str):
if fnmatch.fnmatch(filepath, pattern):
return True
elif isinstance(pattern, re.Pattern):
if pattern.search(filepath):
return True
return False
def get_file_extension(self, filepath):
"""
Extracts the file extension from a filepath.
"""
return os.path.splitext(filepath)[1][1:]
def generate_markdown(self):
"""
Main function to process source folders and generate the markdown file.
"""
all_content = ""
for folder in self.src_folders:
all_content += self.process_folder(folder)
with open(self.output_filename, "w") as f:
f.write(all_content)
print(f"Markdown file generated: {self.output_filename}")
print(f"Processed folders: {self.src_folders}")
print(f"Exclusion patterns: {self.exclusion_patterns}")
print(f"Inclusion patterns: {self.inclusion_patterns}")
print(f"Count files incldued: {self.included_files}")
if __name__ == "__main__":
# Replace with desired values
# output_filename = "codebase.md"
output_filename = "codebase-to-replicate-for-problem-solver.md"
src_folders = ["/Users/alanblount/Code/cai-platform/src/python"]
exclusion_patterns = ["*.pyc", re.compile(r"(\.git|node_modules)/")]
inclusion_patterns = ["*.py"]
converter = CodeToMarkdown(output_filename, src_folders, exclusion_patterns, inclusion_patterns)
converter.generate_markdown()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment