Created
September 7, 2022 16:50
-
-
Save femkeklaver/6176ac925a2481078cb30e5986633226 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import glob | |
| import re | |
| from pathlib import Path | |
| from typing import List | |
| from urllib.parse import unquote | |
| from PIL import Image | |
| def parse_list(lst: list): | |
| return [item[2:] for item in lst.strip().splitlines()] | |
| def parse_content(content: str): | |
| if content.lstrip().startswith("-"): | |
| return parse_list(content) | |
| else: | |
| # ignore cross-links | |
| content = re.sub("\[\[.+?\]\]", "", content) | |
| paragraphs = content.split("\n\n") | |
| return list(process_paragraphs(paragraphs)) | |
| def find_categories(regex, content): | |
| result = {} | |
| cats = re.finditer(regex, content, re.M) | |
| for cat in cats: | |
| title, content = cat.groups() | |
| title = title.lower() | |
| parsed_content = parse_content(content) | |
| result[title] = parsed_content | |
| return result | |
| def process_pictures(md: str): | |
| regex = r"!\[(.*)\]\((.+)\)" | |
| pictures = re.findall(regex, md) | |
| result_dict = {} | |
| for picture in pictures: | |
| image_name, image_file = picture[0], unquote(picture[1]) | |
| goal_path = Path(f"assets/images/{image_file}") | |
| result_dict[image_name] = image_file | |
| if goal_path.is_file(): | |
| continue | |
| image = Image.open(f"../content/recipes/attachments/{image_file}") | |
| image.thumbnail((500, 500), Image.ANTIALIAS) | |
| image.save(goal_path) | |
| return result_dict | |
| def process_paragraphs(paragraphs: List): | |
| for par in paragraphs: | |
| pictures = process_pictures(par) | |
| if pictures: | |
| yield pictures | |
| elif par: | |
| yield par | |
| def process_tagfile(md: str): | |
| regex = r'## (.+)\n((?: *-.*\n)+)' | |
| return find_categories(regex, md) | |
| def process_recipe(md: str): | |
| """processes recipe in markdown-format into a dict | |
| # = title (parse title: str) | |
| ## = part (parse content: lines below) | |
| """ | |
| result = find_categories(r'## (.*)\n([^#]*)', md) | |
| result["title"], result["title_image_title"], result["title_image"] = re.search(r'^# (.*)$(?:\n?!\[(.*)\]\((.+)\))?', md, re.M).groups() | |
| result["tags"] = re.findall(r'#([\w/!]+)', md) | |
| result["pictures"] = process_pictures(md) | |
| return result | |
| if __name__ == "__main__": | |
| from pprint import pprint | |
| for filename in glob.glob("../content/recipes/*.md"): | |
| with open(filename, "r") as fi: | |
| pprint(process_recipe(fi.read())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment