Instantly share code, notes, and snippets.

@gmemstr /import.py forked from marians/import.py
Last active Nov 8, 2017

Embed
What would you like to do?
Migrating a Ghost blog database to Jekyll
# coding: utf8
"""
This script helps to import content from a Ghost blog database to Jekyll.
The database is expected to be running on a reachalbe MySQL host.
See the very end for DB configuration.
Quick Usage:
pip install -r requirements
python import.py
Posts will be written into _posts/<filename>
Authors will be listed to standard output.
"""
import MySQLdb
import yaml
tags = {}
authors = {}
def fetch_authors():
authors_yaml = {"authors": {}}
c.execute("""SELECT
id, name, slug, bio, website, location, email
FROM users ORDER BY id""")
for entry in c.fetchall():
authors[str(entry[0])] = entry[2]
authors_yaml["authors"][entry[2]] = {
"name": entry[1],
"bio": entry[3],
"website": entry[4],
"location": entry[5],
"email": entry[6],
}
print("Paste this part into your '_config.yml':")
print(yaml.dump(authors_yaml, default_flow_style=False))
def fetch_tags():
c.execute("""SELECT id, slug FROM tags ORDER BY id""")
for entry in c.fetchall():
tags[str(entry[0])] = entry[1]
def clean_text(txt):
"""Fix codepage weirdness for all the Unicode characters we used"""
txt = txt.replace("’", "")
txt = txt.replace("‘", "")
txt = txt.replace("–", "")
txt = txt.replace("—", "")
txt = txt.replace("―", "")
txt = txt.replace("“", "\"")
txt = txt.replace("”", "\"")
txt = txt.replace("â–ˆ", "")
txt = txt.replace("â–‹", "")
txt = txt.replace("▍", "")
txt = txt.replace("â–Ž", "")
txt = txt.replace("▏", "")
txt = txt.replace("â–Š", "")
txt = txt.replace("â–‰", "")
txt = txt.replace("✅", "")
txt = txt.replace("„", "")
txt = txt.replace("…", "")
txt = txt.replace(" ", "")
txt = txt.replace("ä", "ä")
txt = txt.replace("ö", "ö")
txt = txt.replace("ü", "ü")
txt = txt.replace("é", "é")
return txt
def fetch_posts():
c.execute("""SELECT
id, title, slug, plaintext, meta_title, meta_description,
published_at, updated_at, author_id
FROM posts WHERE status='published' ORDER BY id""")
for entry in c.fetchall():
author = authors[str(entry[8])]
date = str(entry[6])[0:10]
filename = date + "-" + entry[2] + ".md"
path = "_posts/" + filename
c2.execute("SELECT tag_id FROM posts_tags WHERE post_id=%s", (entry[0],))
mytags = []
for t in c2.fetchall():
mytags.append(tags[str(t[0])])
# frontmatter
frontmatter = {}
frontmatter["title"] = clean_text(entry[1])
frontmatter["date"] = str(entry[6]) + " +0000"
frontmatter["categories"] = mytags
frontmatter["author"] = authors[str(entry[8])]
md = "---\n" + yaml.dump(frontmatter) + "---\n\n"
### text cleanup
text = entry[3]
text = text.replace("/content/images/", "/assets/")
text = clean_text(text)
md += text
with open(path, "w+") as markdownfile:
markdownfile.write(md)
if __name__ == "__main__":
db = MySQLdb.connect(host="localhost", user="ghost", passwd="password", db="ghost")
c = db.cursor()
c2 = db.cursor()
c.execute("SET NAMES utf8")
c.execute("SET CHARSET utf8")
fetch_tags()
fetch_authors()
fetch_posts()
MySQL-python==1.2.5
PyYAML==3.12
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment