Skip to content

Instantly share code, notes, and snippets.

@femkeklaver
Created September 7, 2022 16:50
Show Gist options
  • Select an option

  • Save femkeklaver/6176ac925a2481078cb30e5986633226 to your computer and use it in GitHub Desktop.

Select an option

Save femkeklaver/6176ac925a2481078cb30e5986633226 to your computer and use it in GitHub Desktop.
import glob
import re
from pathlib import Path
from typing import List
from urllib.parse import unquote
from PIL import Image
def parse_list(lst: list):
return [item[2:] for item in lst.strip().splitlines()]
def parse_content(content: str):
if content.lstrip().startswith("-"):
return parse_list(content)
else:
# ignore cross-links
content = re.sub("\[\[.+?\]\]", "", content)
paragraphs = content.split("\n\n")
return list(process_paragraphs(paragraphs))
def find_categories(regex, content):
result = {}
cats = re.finditer(regex, content, re.M)
for cat in cats:
title, content = cat.groups()
title = title.lower()
parsed_content = parse_content(content)
result[title] = parsed_content
return result
def process_pictures(md: str):
regex = r"!\[(.*)\]\((.+)\)"
pictures = re.findall(regex, md)
result_dict = {}
for picture in pictures:
image_name, image_file = picture[0], unquote(picture[1])
goal_path = Path(f"assets/images/{image_file}")
result_dict[image_name] = image_file
if goal_path.is_file():
continue
image = Image.open(f"../content/recipes/attachments/{image_file}")
image.thumbnail((500, 500), Image.ANTIALIAS)
image.save(goal_path)
return result_dict
def process_paragraphs(paragraphs: List):
for par in paragraphs:
pictures = process_pictures(par)
if pictures:
yield pictures
elif par:
yield par
def process_tagfile(md: str):
regex = r'## (.+)\n((?: *-.*\n)+)'
return find_categories(regex, md)
def process_recipe(md: str):
"""processes recipe in markdown-format into a dict
# = title (parse title: str)
## = part (parse content: lines below)
"""
result = find_categories(r'## (.*)\n([^#]*)', md)
result["title"], result["title_image_title"], result["title_image"] = re.search(r'^# (.*)$(?:\n?!\[(.*)\]\((.+)\))?', md, re.M).groups()
result["tags"] = re.findall(r'#([\w/!]+)', md)
result["pictures"] = process_pictures(md)
return result
if __name__ == "__main__":
from pprint import pprint
for filename in glob.glob("../content/recipes/*.md"):
with open(filename, "r") as fi:
pprint(process_recipe(fi.read()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment