Skip to content

Instantly share code, notes, and snippets.

@shawngraham
Last active December 15, 2023 17:50
Show Gist options
  • Save shawngraham/cfbb52a22266ba7913f1e275d8784719 to your computer and use it in GitHub Desktop.
Save shawngraham/cfbb52a22266ba7913f1e275d8784719 to your computer and use it in GitHub Desktop.
split a csv file into markdown files, for use with obsidian
import pandas as pd
import os
import argparse
def create_markdown_files(csv_filepath, output_dir):
# Read the CSV file into a pandas DataFrame
df = pd.read_csv(csv_filepath)
# Check if the output directory exists, if not, create it
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Iterate over each row of the DataFrame and create markdown files
for index, row in df.iterrows():
filename = row['Filename'] # Get the filename
ocr_text = row['OCR_Text'] # Get the OCR text
# Construct the markdown content
# image_path = os.path.join(images_dir, filename)
markdown_content = f"![[{filename}]]\n\n" \
"```\n" \
f"{ocr_text}\n" \
"```\n" \
f"## encoding \n"
# Create a filename for the markdown file
markdown_filename = os.path.splitext(filename)[0] + ".md"
# Write markdown content to file
with open(os.path.join(output_dir, markdown_filename), 'w', encoding='utf-8') as md_file:
md_file.write(markdown_content)
def main():
# Parse command line arguments
parser = argparse.ArgumentParser(description="Convert CSV file with OCR results into markdown files")
parser.add_argument('csv_filepath', help="The filepath to the CSV file with OCR results.")
parser.add_argument('output_dir', help="The directory to save the markdown files.")
# parser.add_argument('images_dir', help="The directory where images are stored.")
args = parser.parse_args()
# Create markdown files from the CSV
create_markdown_files(args.csv_filepath, args.output_dir) #args.images_dir)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment