shawngraham/mardownify.py

## mardownify.py
import pandas as pd
import os
import argparse

def create_markdown_files(csv_filepath, output_dir):
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(csv_filepath)

    # Check if the output directory exists, if not, create it
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate over each row of the DataFrame and create markdown files
    for index, row in df.iterrows():
        filename = row['Filename']  # Get the filename
        ocr_text = row['OCR_Text']  # Get the OCR text

        # Construct the markdown content
#        image_path = os.path.join(images_dir, filename)
        markdown_content = f"![[{filename}]]\n\n" \
                           "```\n" \
                           f"{ocr_text}\n" \
                           "```\n" \
                           f"## encoding \n"

        # Create a filename for the markdown file
        markdown_filename = os.path.splitext(filename)[0] + ".md"

        # Write markdown content to file
        with open(os.path.join(output_dir, markdown_filename), 'w', encoding='utf-8') as md_file:
            md_file.write(markdown_content)

def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description="Convert CSV file with OCR results into markdown files")
    parser.add_argument('csv_filepath', help="The filepath to the CSV file with OCR results.")
    parser.add_argument('output_dir', help="The directory to save the markdown files.")
#    parser.add_argument('images_dir', help="The directory where images are stored.")

    args = parser.parse_args()

    # Create markdown files from the CSV
    create_markdown_files(args.csv_filepath, args.output_dir) #args.images_dir)

if __name__ == '__main__':
    main()
	import pandas as pd
	import os
	import argparse

	def create_markdown_files(csv_filepath, output_dir):
	# Read the CSV file into a pandas DataFrame
	df = pd.read_csv(csv_filepath)

	# Check if the output directory exists, if not, create it
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)

	# Iterate over each row of the DataFrame and create markdown files
	for index, row in df.iterrows():
	filename = row['Filename'] # Get the filename
	ocr_text = row['OCR_Text'] # Get the OCR text

	# Construct the markdown content
	# image_path = os.path.join(images_dir, filename)
	markdown_content = f"![[{filename}]]\n\n" \
	"```\n" \
	f"{ocr_text}\n" \
	"```\n" \
	f"## encoding \n"

	# Create a filename for the markdown file
	markdown_filename = os.path.splitext(filename)[0] + ".md"

	# Write markdown content to file
	with open(os.path.join(output_dir, markdown_filename), 'w', encoding='utf-8') as md_file:
	md_file.write(markdown_content)

	def main():
	# Parse command line arguments
	parser = argparse.ArgumentParser(description="Convert CSV file with OCR results into markdown files")
	parser.add_argument('csv_filepath', help="The filepath to the CSV file with OCR results.")
	parser.add_argument('output_dir', help="The directory to save the markdown files.")
	# parser.add_argument('images_dir', help="The directory where images are stored.")

	args = parser.parse_args()

	# Create markdown files from the CSV
	create_markdown_files(args.csv_filepath, args.output_dir) #args.images_dir)

	if __name__ == '__main__':
	main()