Skip to content

Instantly share code, notes, and snippets.

@agatan
Created July 14, 2021 12:07
Show Gist options
  • Save agatan/cd19b9ccb946c21b505a92953f51ac56 to your computer and use it in GitHub Desktop.
Save agatan/cd19b9ccb946c21b505a92953f51ac56 to your computer and use it in GitHub Desktop.
from pathlib import Path
from typing import Optional
import re
import html
import urllib.request
import markdownify
class Converter(markdownify.MarkdownConverter):
def convert_img(self, el, text, convert_as_inline):
src = el.attrs["src"]
print(src)
filename = src.rsplit("/")[-1]
dst = Path("./public/i") / filename
with urllib.request.urlopen(src) as webf:
with dst.open("wb") as fp:
fp.write(webf.read())
el.attrs["src"] = "/i/" + filename
return super().convert_img(el, text, convert_as_inline)
def filepath_to_html(filepath: Path) -> Optional[str]:
with open(filepath, "r", encoding="utf-8") as fp:
contents = fp.read()
frontmatter, contents = contents.split("\n\n", maxsplit=1)
if not contents.startswith("<"):
return None
converter = Converter(autolinks=False, heading_style="ATX", strip=["iframe"])
return frontmatter + "\n\n" + converter.convert(contents)
# [C++](http://d.hatena.ne.jp/keyword/C%2B%2B)
def cleanup(s: str) -> str:
return re.sub(r"\[([^\]]+)\]\(http://d.hatena.ne.jp/keyword/[^\)]*\)", r"\1", s)
def main() -> None:
basedir = Path("./posts/")
for filepath in basedir.glob("*.md"):
s = filepath_to_html(filepath)
if not s:
continue
s = html.unescape(cleanup(s))
filepath.open("w", encoding="utf-8").write(s)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment