Last active
December 15, 2023 17:50
-
-
Save shawngraham/cfbb52a22266ba7913f1e275d8784719 to your computer and use it in GitHub Desktop.
split a csv file into markdown files, for use with obsidian
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import os | |
import argparse | |
def create_markdown_files(csv_filepath, output_dir): | |
# Read the CSV file into a pandas DataFrame | |
df = pd.read_csv(csv_filepath) | |
# Check if the output directory exists, if not, create it | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
# Iterate over each row of the DataFrame and create markdown files | |
for index, row in df.iterrows(): | |
filename = row['Filename'] # Get the filename | |
ocr_text = row['OCR_Text'] # Get the OCR text | |
# Construct the markdown content | |
# image_path = os.path.join(images_dir, filename) | |
markdown_content = f"![[{filename}]]\n\n" \ | |
"```\n" \ | |
f"{ocr_text}\n" \ | |
"```\n" \ | |
f"## encoding \n" | |
# Create a filename for the markdown file | |
markdown_filename = os.path.splitext(filename)[0] + ".md" | |
# Write markdown content to file | |
with open(os.path.join(output_dir, markdown_filename), 'w', encoding='utf-8') as md_file: | |
md_file.write(markdown_content) | |
def main(): | |
# Parse command line arguments | |
parser = argparse.ArgumentParser(description="Convert CSV file with OCR results into markdown files") | |
parser.add_argument('csv_filepath', help="The filepath to the CSV file with OCR results.") | |
parser.add_argument('output_dir', help="The directory to save the markdown files.") | |
# parser.add_argument('images_dir', help="The directory where images are stored.") | |
args = parser.parse_args() | |
# Create markdown files from the CSV | |
create_markdown_files(args.csv_filepath, args.output_dir) #args.images_dir) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment