Created
May 7, 2024 16:42
-
-
Save WillPapper/717894f5f3edb149183b74b2438359c6 to your computer and use it in GitHub Desktop.
Recursively concatenate all text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import mimetypes | |
# Specify the folder path containing the text files | |
folder_path = "./path-goes-here" | |
# Specify the output file path | |
output_file = "concatenated_text.txt" | |
# Open the output file in write mode | |
with open(output_file, "w") as outfile: | |
# Iterate over each directory and subdirectory using os.walk() | |
for root, dirs, files in os.walk(folder_path): | |
# Iterate over each file | |
for file in files: | |
file_path = os.path.join(root, file) | |
# Get the MIME type of the file | |
mime_type, _ = mimetypes.guess_type(file_path) | |
# Check if the MIME type starts with 'text/' | |
if mime_type and mime_type.startswith('text/'): | |
# Open the text file in read mode | |
with open(file_path, "r") as infile: | |
# Write the relative path to the file as a heading | |
relative_path = os.path.relpath(file_path, folder_path) | |
outfile.write(f"# {relative_path}\n\n") | |
# Write the contents of the text file to the output file | |
outfile.write(infile.read()) | |
# Add a line break between files | |
outfile.write("\n\n---\n\n") | |
print("Concatenation complete. Output file:", output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment