Created
September 20, 2017 08:56
-
-
Save jinie/3718564d90a303276b8ffe5872d26653 to your computer and use it in GitHub Desktop.
Convert a Ghost v1.x JSON export file to Hugo posts.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import json | |
import re | |
import argparse | |
import sys | |
import os | |
def read_file(filename): | |
with open(filename, 'rt') as f: | |
dat = f.read() | |
db = json.loads(dat) | |
return db | |
def parse_args(): | |
ap = argparse.ArgumentParser() | |
ap.add_argument('INPUT_FILE', help="Input Ghost .json file", | |
type=str, action='store') | |
ap.add_argument('OUTPUT_DIR', help="Output Directory", | |
type=str, action='store') | |
args = ap.parse_args() | |
return args | |
def get_tags(db, postid): | |
dtags = db['db'][0]['data']['tags'] | |
post_tags = db['db'][0]['data']['posts_tags'] | |
tt = [t['tag_id'] for t in post_tags if t['post_id'] == postid] | |
rtags = [tag['name'] for tag in dtags if tag['id'] in tt] | |
return rtags | |
def get_author(db, author_id): | |
authors = db['db'][0]['data']['users'] | |
for a in authors: | |
if a['id'] == author_id: | |
return a['name'] | |
return None | |
def fix_links(markdown): | |
regex = r"(\[[^\]]*\]\()(\/(?!images).*)(\))" | |
reg = re.compile(regex, re.MULTILINE) | |
ret = reg.sub(r"\1/post\2\3", markdown) | |
return ret | |
def get_posts(db): | |
ret = dict() | |
for post in db['db'][0]['data']['posts']: | |
pid = post['id'] | |
author_id = get_author(db, post['author_id']) | |
title = post['title'] | |
slug = post['slug'] | |
page = post['page'] | |
created = post['created_at'] | |
updated = post['updated_at'] | |
published = post['published_at'] | |
tags = get_tags(db, pid) | |
markdown = '' | |
try: | |
doc = json.loads(post['mobiledoc']) | |
markdown = doc['cards'][0][1]['markdown'] | |
# fix images | |
markdown = markdown.replace('/content/images', '/images') | |
# fix links, prefix /post | |
markdown = fix_links(markdown) | |
except: | |
pass | |
draft = post['status'] == 'draft' | |
out = '---\n' | |
out += 'title: {}\n'.format(title) | |
out += 'slug: {}\n'.format(slug) | |
out += 'author: {}\n'.format(author_id) if author_id else '' | |
out += 'lastmod: {}\n'.format(updated) if updated else '' | |
out += 'date: {}\n'.format(published) if published else '' | |
out += 'draft: true\n' if draft else '' | |
tstring = "tags: [" | |
for t in tags: | |
tstring += '"{}", '.format(t) | |
if len(tags): | |
tstring = tstring[:-2] | |
tstring += ']\n' | |
out += tstring | |
out += '---\n\n' | |
out += markdown | |
ret[slug] = out | |
return(ret) | |
def write_posts(outdir, posts): | |
for k in posts.keys(): | |
fname = os.path.join(outdir, "{}.markdown".format(k)) | |
with open(fname, 'wt') as of: | |
of.write(posts[k]) | |
def main(): | |
args = parse_args() | |
db = read_file(args.INPUT_FILE) | |
posts = get_posts(db) | |
print('Converted {} posts'.format(len(posts))) | |
write_posts(args.OUTPUT_DIR, posts) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment