gmemstr/import.py

## import.py
# coding: utf8

"""
This script helps to import content from a Ghost blog database to Jekyll.

The database is expected to be running on a reachalbe MySQL host.
See the very end for DB configuration.

Quick Usage:
pip install -r requirements
python import.py

Posts will be written into _posts/<filename>

Authors will be listed to standard output.

"""

import MySQLdb
import yaml

tags = {}
authors = {}

def fetch_authors():
    authors_yaml = {"authors": {}}
    c.execute("""SELECT
        id, name, slug, bio, website, location, email
        FROM users ORDER BY id""")
    for entry in c.fetchall():
        authors[str(entry[0])] = entry[2]
        authors_yaml["authors"][entry[2]] = {
            "name": entry[1],
            "bio": entry[3],
            "website": entry[4],
            "location": entry[5],
            "email": entry[6],
        }
    print("Paste this part into your '_config.yml':")
    print(yaml.dump(authors_yaml, default_flow_style=False))


def fetch_tags():
    c.execute("""SELECT id, slug FROM tags ORDER BY id""")
    for entry in c.fetchall():
        tags[str(entry[0])] = entry[1]


def clean_text(txt):
    """Fix codepage weirdness for all the Unicode characters we used"""
    txt = txt.replace("â€™", "’")
    txt = txt.replace("â€˜", "‘")
    txt = txt.replace("â€“", "–")
    txt = txt.replace("â€”", "–")
    txt = txt.replace("â€•", "—")
    txt = txt.replace("â€œ", "\"")
    txt = txt.replace("â€", "\"")
    txt = txt.replace("â–ˆ", "█")
    txt = txt.replace("â–‹", "▋")
    txt = txt.replace("â–", "▍")
    txt = txt.replace("â–Ž", "▎")
    txt = txt.replace("â–", "▏")
    txt = txt.replace("â–Š", "▊")
    txt = txt.replace("â–‰", "▉")
    txt = txt.replace("âœ…", "✅")
    txt = txt.replace("â€ž", "„")
    txt = txt.replace("â€¦", "…")
    txt = txt.replace("Â ", "")
    txt = txt.replace("Ã¤", "ä")
    txt = txt.replace("Ã¶", "ö")
    txt = txt.replace("Ã¼", "ü")
    txt = txt.replace("Ã©", "é")
    return txt


def fetch_posts():
    c.execute("""SELECT
        id, title, slug, plaintext, meta_title, meta_description,
        published_at, updated_at, author_id
        FROM posts WHERE status='published' ORDER BY id""")

    for entry in c.fetchall():
        author = authors[str(entry[8])]
        date = str(entry[6])[0:10]
        filename = date + "-" + entry[2] + ".md"
        path = "_posts/" + filename

        c2.execute("SELECT tag_id FROM posts_tags WHERE post_id=%s", (entry[0],))
        mytags = []
        for t in c2.fetchall():
            mytags.append(tags[str(t[0])])

        # frontmatter
        frontmatter = {}
        frontmatter["title"] = clean_text(entry[1])
        frontmatter["date"] = str(entry[6]) + " +0000"
        frontmatter["categories"] = mytags
        frontmatter["author"] = authors[str(entry[8])]

        md = "---\n" + yaml.dump(frontmatter) + "---\n\n"

        ### text cleanup
        text = entry[3]
        text = text.replace("/content/images/", "/assets/")
        text = clean_text(text)
        md += text

        with open(path, "w+") as markdownfile:
            markdownfile.write(md)


if __name__ == "__main__":
    db = MySQLdb.connect(host="localhost", user="ghost", passwd="password", db="ghost")
    c = db.cursor()
    c2 = db.cursor()

    c.execute("SET NAMES utf8")
    c.execute("SET CHARSET utf8")

    fetch_tags()
    fetch_authors()
    fetch_posts()

## requirements.txt
MySQL-python==1.2.5
PyYAML==3.12
	# coding: utf8

	"""
	This script helps to import content from a Ghost blog database to Jekyll.

	The database is expected to be running on a reachalbe MySQL host.
	See the very end for DB configuration.

	Quick Usage:
	pip install -r requirements
	python import.py

	Posts will be written into _posts/<filename>

	Authors will be listed to standard output.

	"""

	import MySQLdb
	import yaml

	tags = {}
	authors = {}

	def fetch_authors():
	authors_yaml = {"authors": {}}
	c.execute("""SELECT
	id, name, slug, bio, website, location, email
	FROM users ORDER BY id""")
	for entry in c.fetchall():
	authors[str(entry[0])] = entry[2]
	authors_yaml["authors"][entry[2]] = {
	"name": entry[1],
	"bio": entry[3],
	"website": entry[4],
	"location": entry[5],
	"email": entry[6],
	}
	print("Paste this part into your '_config.yml':")
	print(yaml.dump(authors_yaml, default_flow_style=False))


	def fetch_tags():
	c.execute("""SELECT id, slug FROM tags ORDER BY id""")
	for entry in c.fetchall():
	tags[str(entry[0])] = entry[1]


	def clean_text(txt):
	"""Fix codepage weirdness for all the Unicode characters we used"""
	txt = txt.replace("â€™", "’")
	txt = txt.replace("â€˜", "‘")
	txt = txt.replace("â€“", "–")
	txt = txt.replace("â€”", "–")
	txt = txt.replace("â€•", "—")
	txt = txt.replace("â€œ", "\"")
	txt = txt.replace("â€", "\"")
	txt = txt.replace("â–ˆ", "█")
	txt = txt.replace("â–‹", "▋")
	txt = txt.replace("â–", "▍")
	txt = txt.replace("â–Ž", "▎")
	txt = txt.replace("â–", "▏")
	txt = txt.replace("â–Š", "▊")
	txt = txt.replace("â–‰", "▉")
	txt = txt.replace("âœ…", "✅")
	txt = txt.replace("â€ž", "„")
	txt = txt.replace("â€¦", "…")
	txt = txt.replace("Â ", "")
	txt = txt.replace("Ã¤", "ä")
	txt = txt.replace("Ã¶", "ö")
	txt = txt.replace("Ã¼", "ü")
	txt = txt.replace("Ã©", "é")
	return txt


	def fetch_posts():
	c.execute("""SELECT
	id, title, slug, plaintext, meta_title, meta_description,
	published_at, updated_at, author_id
	FROM posts WHERE status='published' ORDER BY id""")

	for entry in c.fetchall():
	author = authors[str(entry[8])]
	date = str(entry[6])[0:10]
	filename = date + "-" + entry[2] + ".md"
	path = "_posts/" + filename

	c2.execute("SELECT tag_id FROM posts_tags WHERE post_id=%s", (entry[0],))
	mytags = []
	for t in c2.fetchall():
	mytags.append(tags[str(t[0])])

	# frontmatter
	frontmatter = {}
	frontmatter["title"] = clean_text(entry[1])
	frontmatter["date"] = str(entry[6]) + " +0000"
	frontmatter["categories"] = mytags
	frontmatter["author"] = authors[str(entry[8])]

	md = "---\n" + yaml.dump(frontmatter) + "---\n\n"

	### text cleanup
	text = entry[3]
	text = text.replace("/content/images/", "/assets/")
	text = clean_text(text)
	md += text

	with open(path, "w+") as markdownfile:
	markdownfile.write(md)


	if __name__ == "__main__":
	db = MySQLdb.connect(host="localhost", user="ghost", passwd="password", db="ghost")
	c = db.cursor()
	c2 = db.cursor()

	c.execute("SET NAMES utf8")
	c.execute("SET CHARSET utf8")

	fetch_tags()
	fetch_authors()
	fetch_posts()