imdanielch/converter.py

## converter.py
#!usr/bin/env python3

'''
Ghost to Gatsby-netlify-cms converter
Will generate blog and pages directory with markdown files
'''
import os
import json
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("input", metavar="<input json file>", help="path to input file (*.json)")
parser.add_argument("output_dir", metavar="<output directory>", help="path to output directory")
parser.add_argument(
        "--date",
        metavar="<reference date>",
        help='''
        Which field to use when filling 'date'
        (created_at, updated_at, published_at) default: updated_at
        ''')
args = parser.parse_args()
cwd = os.getcwd()
json_file_path = os.path.join(cwd, args.input)
output_directory = os.path.join(cwd, args.output_dir)

# config
date = args.date if args.date else 'updated_at'
templateKey = 'blog-post'

filedata = ""
if os.path.isfile(json_file_path):
    with open(json_file_path, "r") as f:
        for line in f:
            filedata += line
else:
    print('invalid input file, must be ghost-blog exported json')
    quit()

jsondata = json.loads(filedata)

json_posts = jsondata["db"][0]["data"]["posts"]
json_posts_tags = jsondata["db"][0]["data"]["posts_tags"]
json_tags = jsondata["db"][0]["data"]["tags"]


def populate_header(data):
    """populate_header
    generate string from data which will be the header
    :param data: post data object
    :return: header string
    """
    output = '---\n'
    # populate data
    output += "templateKey: " + templateKey + "\n"
    output += "status: " + data['status'] + "\n"
    output += "title: " + data['title'] + '\n'
    output += "date: " + data[date] + '\n'
    output += "featuredpost: {}\n".format('true' if data['featured'] else 'false')
    output += "featuredimage: {}\n".format(str(data['image']) if data['image'] is not None else "")
    output += "description: \n"
    output += "tags:\n"
    output += get_tags(data['id'])

    output += '---\n'
    return output


def gen_post_file_name(data):
    """gen_post_file_name
    create file name by concatenating date, title, and appending markdown file ending
    :param data: post data object to pull information from
    :return: file name string
    """
    return "{}_{}.md".format(data[date].split('T')[0], data['slug'])


def get_tags(post_id):
    """get_tags
    loop through json_posts_tags to get tag_id associated with the post and pull the tag
    slug from json_tags
    :param post_id: integer
    :return: array of tag slugs
    """
    tags = ''
    for pair in json_posts_tags:
        if pair['post_id'] is post_id:
            for tag in json_tags:
                if tag['id'] is pair['tag_id']:
                    tags += "  - {}\n".format(tag['slug'])
                    break
    return tags


def main():
    for index, post in enumerate(json_posts):
        print('processing: {}'.format(post['title']))
        # pull data, populate file data to string based on template
        newFileContent = ''
        newFileContent += populate_header(post)
        newFileContent += post['markdown']
        # get post file name
        filename = gen_post_file_name(post)
        # check if it's a page or a blog post
        if post['page']:
            try:
                pages_dir = os.path.join(output_directory, 'pages')
                os.makedirs(pages_dir)
            except FileExistsError:
                # directory already exists
                pass
            with open(os.path.join(pages_dir, filename), 'w+') as f:
                f.write(newFileContent)
        else:
            try:
                published_dir = os.path.join(output_directory, 'blogs', 'published')
                unpublished_dir = os.path.join(output_directory, 'blogs', 'unpublished')
                os.makedirs(published_dir)
                os.makedirs(unpublished_dir)
            except FileExistsError:
                # directory already exists
                pass
            if post['status'] == 'published':
                with open(os.path.join(published_dir, filename), 'w+') as f:
                    f.write(newFileContent)
            else:
                with open(os.path.join(unpublished_dir, filename), 'w+') as f:
                    f.write(newFileContent)


if __name__ == "__main__":
    main()
	#!usr/bin/env python3

	'''
	Ghost to Gatsby-netlify-cms converter
	Will generate blog and pages directory with markdown files
	'''
	import os
	import json
	import argparse
	parser = argparse.ArgumentParser()
	parser.add_argument("input", metavar="<input json file>", help="path to input file (*.json)")
	parser.add_argument("output_dir", metavar="<output directory>", help="path to output directory")
	parser.add_argument(
	"--date",
	metavar="<reference date>",
	help='''
	Which field to use when filling 'date'
	(created_at, updated_at, published_at) default: updated_at
	''')
	args = parser.parse_args()
	cwd = os.getcwd()
	json_file_path = os.path.join(cwd, args.input)
	output_directory = os.path.join(cwd, args.output_dir)

	# config
	date = args.date if args.date else 'updated_at'
	templateKey = 'blog-post'

	filedata = ""
	if os.path.isfile(json_file_path):
	with open(json_file_path, "r") as f:
	for line in f:
	filedata += line
	else:
	print('invalid input file, must be ghost-blog exported json')
	quit()

	jsondata = json.loads(filedata)

	json_posts = jsondata["db"][0]["data"]["posts"]
	json_posts_tags = jsondata["db"][0]["data"]["posts_tags"]
	json_tags = jsondata["db"][0]["data"]["tags"]


	def populate_header(data):
	"""populate_header
	generate string from data which will be the header
	:param data: post data object
	:return: header string
	"""
	output = '---\n'
	# populate data
	output += "templateKey: " + templateKey + "\n"
	output += "status: " + data['status'] + "\n"
	output += "title: " + data['title'] + '\n'
	output += "date: " + data[date] + '\n'
	output += "featuredpost: {}\n".format('true' if data['featured'] else 'false')
	output += "featuredimage: {}\n".format(str(data['image']) if data['image'] is not None else "")
	output += "description: \n"
	output += "tags:\n"
	output += get_tags(data['id'])

	output += '---\n'
	return output


	def gen_post_file_name(data):
	"""gen_post_file_name
	create file name by concatenating date, title, and appending markdown file ending
	:param data: post data object to pull information from
	:return: file name string
	"""
	return "{}_{}.md".format(data[date].split('T')[0], data['slug'])


	def get_tags(post_id):
	"""get_tags
	loop through json_posts_tags to get tag_id associated with the post and pull the tag
	slug from json_tags
	:param post_id: integer
	:return: array of tag slugs
	"""
	tags = ''
	for pair in json_posts_tags:
	if pair['post_id'] is post_id:
	for tag in json_tags:
	if tag['id'] is pair['tag_id']:
	tags += " - {}\n".format(tag['slug'])
	break
	return tags


	def main():
	for index, post in enumerate(json_posts):
	print('processing: {}'.format(post['title']))
	# pull data, populate file data to string based on template
	newFileContent = ''
	newFileContent += populate_header(post)
	newFileContent += post['markdown']
	# get post file name
	filename = gen_post_file_name(post)
	# check if it's a page or a blog post
	if post['page']:
	try:
	pages_dir = os.path.join(output_directory, 'pages')
	os.makedirs(pages_dir)
	except FileExistsError:
	# directory already exists
	pass
	with open(os.path.join(pages_dir, filename), 'w+') as f:
	f.write(newFileContent)
	else:
	try:
	published_dir = os.path.join(output_directory, 'blogs', 'published')
	unpublished_dir = os.path.join(output_directory, 'blogs', 'unpublished')
	os.makedirs(published_dir)
	os.makedirs(unpublished_dir)
	except FileExistsError:
	# directory already exists
	pass
	if post['status'] == 'published':
	with open(os.path.join(published_dir, filename), 'w+') as f:
	f.write(newFileContent)
	else:
	with open(os.path.join(unpublished_dir, filename), 'w+') as f:
	f.write(newFileContent)


	if __name__ == "__main__":
	main()