Migrating a Ghost blog database to Jekyll
# coding: utf8 | |
""" | |
This script helps to import content from a Ghost blog database to Jekyll. | |
The database is expected to be running on a reachalbe MySQL host. | |
See the very end for DB configuration. | |
Quick Usage: | |
pip install -r requirements | |
python import.py | |
Posts will be written into _posts/<filename> | |
Authors will be listed to standard output. | |
""" | |
import MySQLdb | |
import yaml | |
tags = {} | |
authors = {} | |
def fetch_authors(): | |
authors_yaml = {"authors": {}} | |
c.execute("""SELECT | |
id, name, slug, bio, website, location, email | |
FROM users ORDER BY id""") | |
for entry in c.fetchall(): | |
authors[str(entry[0])] = entry[2] | |
authors_yaml["authors"][entry[2]] = { | |
"name": entry[1], | |
"bio": entry[3], | |
"website": entry[4], | |
"location": entry[5], | |
"email": entry[6], | |
} | |
print("Paste this part into your '_config.yml':") | |
print(yaml.dump(authors_yaml, default_flow_style=False)) | |
def fetch_tags(): | |
c.execute("""SELECT id, slug FROM tags ORDER BY id""") | |
for entry in c.fetchall(): | |
tags[str(entry[0])] = entry[1] | |
def clean_text(txt): | |
"""Fix codepage weirdness for all the Unicode characters we used""" | |
txt = txt.replace("’", "’") | |
txt = txt.replace("‘", "‘") | |
txt = txt.replace("–", "–") | |
txt = txt.replace("—", "–") | |
txt = txt.replace("―", "—") | |
txt = txt.replace("“", "\"") | |
txt = txt.replace("â€", "\"") | |
txt = txt.replace("â–ˆ", "█") | |
txt = txt.replace("â–‹", "▋") | |
txt = txt.replace("â–", "▍") | |
txt = txt.replace("â–Ž", "▎") | |
txt = txt.replace("â–", "▏") | |
txt = txt.replace("â–Š", "▊") | |
txt = txt.replace("â–‰", "▉") | |
txt = txt.replace("✅", "✅") | |
txt = txt.replace("„", "„") | |
txt = txt.replace("…", "…") | |
txt = txt.replace("Â ", "") | |
txt = txt.replace("ä", "ä") | |
txt = txt.replace("ö", "ö") | |
txt = txt.replace("ü", "ü") | |
txt = txt.replace("é", "é") | |
return txt | |
def fetch_posts(): | |
c.execute("""SELECT | |
id, title, slug, plaintext, meta_title, meta_description, | |
published_at, updated_at, author_id | |
FROM posts WHERE status='published' ORDER BY id""") | |
for entry in c.fetchall(): | |
author = authors[str(entry[8])] | |
date = str(entry[6])[0:10] | |
filename = date + "-" + entry[2] + ".md" | |
path = "_posts/" + filename | |
c2.execute("SELECT tag_id FROM posts_tags WHERE post_id=%s", (entry[0],)) | |
mytags = [] | |
for t in c2.fetchall(): | |
mytags.append(tags[str(t[0])]) | |
# frontmatter | |
frontmatter = {} | |
frontmatter["title"] = clean_text(entry[1]) | |
frontmatter["date"] = str(entry[6]) + " +0000" | |
frontmatter["categories"] = mytags | |
frontmatter["author"] = authors[str(entry[8])] | |
md = "---\n" + yaml.dump(frontmatter) + "---\n\n" | |
### text cleanup | |
text = entry[3] | |
text = text.replace("/content/images/", "/assets/") | |
text = clean_text(text) | |
md += text | |
with open(path, "w+") as markdownfile: | |
markdownfile.write(md) | |
if __name__ == "__main__": | |
db = MySQLdb.connect(host="localhost", user="ghost", passwd="password", db="ghost") | |
c = db.cursor() | |
c2 = db.cursor() | |
c.execute("SET NAMES utf8") | |
c.execute("SET CHARSET utf8") | |
fetch_tags() | |
fetch_authors() | |
fetch_posts() |
MySQL-python==1.2.5 | |
PyYAML==3.12 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment