Skip to content

Instantly share code, notes, and snippets.

@ttimasdf
Last active July 23, 2022 08:34
Show Gist options
  • Save ttimasdf/a5804aeef47102c948f552254aa2d727 to your computer and use it in GitHub Desktop.
Save ttimasdf/a5804aeef47102c948f552254aa2d727 to your computer and use it in GitHub Desktop.
Hexo to Hugo Article & Images migrator

Hexo to Hugo Migrator

Usage:

  1. cd to Hexo source/ directory. copy migrator.py here.
  2. If you have image host CDN mapped to local directory, change LOCAL_CDN_DOMAIN and p_imgcdn accordingly
  3. run ./migrator.py, pip package requests is the only dependency.
  4. check output in _export/
import re
import shutil
from pathlib import Path
from tempfile import NamedTemporaryFile
from urllib.parse import urlparse
import requests
REGEX_IMAGE = re.compile(r'!\[(?P<comment>[^\]]*)\]\((?P<filename>.*?)(?=\"|\))(?P<optionalpart>\".*\")?\)')
WHITELIST = []
LOCAL_CDN_DOMAIN = "your-cdn.domain"
p_posts = Path("./_posts")
p_imgcdn = Path("../../img-cdn-local")
p_export = Path("./_export")
p_export.mkdir(parents=True, exist_ok=True)
for p in p_posts.rglob("*.md"):
with p.open(encoding="utf-8") as f:
content = f.read()
images = list(REGEX_IMAGE.finditer(content))
existed_image = set()
dstdir = p_export / p.relative_to(p_posts).with_suffix("")
dstdir.mkdir(parents=True, exist_ok=True)
dstmd = dstdir / "index.zh.md"
for i, img in enumerate(images):
fpath = img.group("filename")
if fpath in existed_image:
print(p, img, "duplicated")
continue
elif not fpath:
print(p, img, "empty")
continue
else:
existed_image.add(fpath)
if fpath.startswith("http"):
url = urlparse(fpath)
if url.netloc == LOCAL_CDN_DOMAIN:
fpath = p_imgcdn / url.path[1:]
assert fpath.exists(), f"{p}: CDN image {url.path} should exist on {fpath}"
print(p, img, "found CDN image")
elif url.netloc not in WHITELIST:
print(p, img, "skipped not in whitelist.")
continue
else:
resp = requests.get(fpath)
suffix = Path(url.path).suffix
with NamedTemporaryFile(suffix=suffix, delete=False) as tmpfile:
tmpfile.write(resp.content)
fpath = Path(tmpfile.name)
print(p, img, "downloaded to", fpath)
else:
fpath = Path(fpath)
print(p, img, "is local image")
if not fpath.exists():
print(p, img, "not exists")
continue
dst = dstdir / f"{i:02d}{fpath.suffix}"
shutil.copyfile(fpath, dst)
cmt = img.group('comment')
if (opt := img.group('optionalpart')):
opt = f" {opt}"
else:
opt = ""
content = content.replace(img.group(0), f"![{cmt}]({dst.name}{opt})")
with dstmd.open("w", encoding="utf-8") as f:
f.write(content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment