imdanielch/ghost-gatsby.py

## ghost-gatsby.py
#!usr/bin/env python3

"""
Ghost to Gatsby-netlify-cms converter
Will take exported JSON from ghost blog v0.11.x and generate blog and pages
directory with markdown files for use in gatsby-netlify-cms
"""
import os
import json
import argparse

# parser to handle command line arguments
parser = argparse.ArgumentParser()
parser.add_argument(
    "input", metavar="<input json file>", help="path to input file (*.json)"
)
parser.add_argument(
    "output_dir", metavar="<output directory>", help="path to output directory"
)
parser.add_argument(
    "--date",
    metavar="<reference date>",
    help="""
        Which field to use when filling 'date'
        (created_at, updated_at, published_at) default: updated_at
        """,
)
args = parser.parse_args()
cwd = os.getcwd()
json_file_path = os.path.join(cwd, args.input)
output_directory = os.path.join(cwd, args.output_dir)

date = args.date if args.date else "updated_at"
template_key = "blog-post"

filedata = ""
if os.path.isfile(json_file_path):
    with open(json_file_path, "r") as f:
        for line in f:
            filedata += line
else:
    print("invalid input file, must be ghost-blog exported json")
    quit()

jsondata = json.loads(filedata)

json_posts = jsondata["db"][0]["data"]["posts"]
json_posts_tags = jsondata["db"][0]["data"]["posts_tags"]
json_tags = jsondata["db"][0]["data"]["tags"]


def populate_header(data):
    """populate_header
    generate string from data which will be the header
    :param data: post data object
    :return: header string
    """
    output = "---\n"
    # populate data
    output += "templateKey: {}\n".format(template_key)
    output += "status: {}\n".format(data["status"])
    output += "title: {}\n".format(data["title"])
    output += "date: {}\n".format(data[date])
    output += "featuredpost: {}\n".format("true" if data["featured"] else "false")
    output += "featuredimage: {}\n".format(
        str(data["image"]) if data["image"] is not None else ""
    )
    output += "description:\n"
    output += "tags:\n"
    output += get_tags(data["id"])

    output += "---\n"
    return output


def gen_post_file_name(data):
    """gen_post_file_name
    create file name by concatenating date, title, and appending markdown file ending
    :param data: post data object to pull information from
    :return: file name string
    """
    return "{}_{}.md".format(data[date].split("T")[0], data["slug"])


def get_tags(post_id):
    """get_tags
    loop through json_posts_tags to get tag_id associated with the post and pull the tag
    slug from json_tags
    :param post_id: integer
    :return: array of tag slugs
    """
    tags = ""
    for pair in json_posts_tags:
        if pair["post_id"] is post_id:
            for tag in json_tags:
                if tag["id"] is pair["tag_id"]:
                    tags += "  - {}\n".format(tag["slug"])
                    break
    return tags


def main():
    for index, post in enumerate(json_posts):
        print("processing: {}".format(post["title"]))
        # pull data, populate file data to string based on template
        newFileContent = ""
        newFileContent += populate_header(post)
        newFileContent += post["markdown"]
        # get post file name
        filename = gen_post_file_name(post)
        # check if it's a page or a blog post
        if post["page"]:
            try:
                pages_dir = os.path.join(output_directory, "pages")
                os.makedirs(pages_dir)
            except FileExistsError:
                # directory already exists
                pass
            with open(os.path.join(pages_dir, filename), "w+") as f:
                f.write(newFileContent)
        else:
            try:
                published_dir = os.path.join(output_directory, "blogs", "published")
                unpublished_dir = os.path.join(output_directory, "blogs", "unpublished")
                os.makedirs(published_dir)
                os.makedirs(unpublished_dir)
            except FileExistsError:
                # directory already exists
                pass
            if post["status"] == "published":
                with open(os.path.join(published_dir, filename), "w+") as f:
                    f.write(newFileContent)
            else:
                with open(os.path.join(unpublished_dir, filename), "w+") as f:
                    f.write(newFileContent)


if __name__ == "__main__":
    main()
	#!usr/bin/env python3

	"""
	Ghost to Gatsby-netlify-cms converter
	Will take exported JSON from ghost blog v0.11.x and generate blog and pages
	directory with markdown files for use in gatsby-netlify-cms
	"""
	import os
	import json
	import argparse

	# parser to handle command line arguments
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"input", metavar="<input json file>", help="path to input file (*.json)"
	)
	parser.add_argument(
	"output_dir", metavar="<output directory>", help="path to output directory"
	)
	parser.add_argument(
	"--date",
	metavar="<reference date>",
	help="""
	Which field to use when filling 'date'
	(created_at, updated_at, published_at) default: updated_at
	""",
	)
	args = parser.parse_args()
	cwd = os.getcwd()
	json_file_path = os.path.join(cwd, args.input)
	output_directory = os.path.join(cwd, args.output_dir)

	date = args.date if args.date else "updated_at"
	template_key = "blog-post"

	filedata = ""
	if os.path.isfile(json_file_path):
	with open(json_file_path, "r") as f:
	for line in f:
	filedata += line
	else:
	print("invalid input file, must be ghost-blog exported json")
	quit()

	jsondata = json.loads(filedata)

	json_posts = jsondata["db"][0]["data"]["posts"]
	json_posts_tags = jsondata["db"][0]["data"]["posts_tags"]
	json_tags = jsondata["db"][0]["data"]["tags"]


	def populate_header(data):
	"""populate_header
	generate string from data which will be the header
	:param data: post data object
	:return: header string
	"""
	output = "---\n"
	# populate data
	output += "templateKey: {}\n".format(template_key)
	output += "status: {}\n".format(data["status"])
	output += "title: {}\n".format(data["title"])
	output += "date: {}\n".format(data[date])
	output += "featuredpost: {}\n".format("true" if data["featured"] else "false")
	output += "featuredimage: {}\n".format(
	str(data["image"]) if data["image"] is not None else ""
	)
	output += "description:\n"
	output += "tags:\n"
	output += get_tags(data["id"])

	output += "---\n"
	return output


	def gen_post_file_name(data):
	"""gen_post_file_name
	create file name by concatenating date, title, and appending markdown file ending
	:param data: post data object to pull information from
	:return: file name string
	"""
	return "{}_{}.md".format(data[date].split("T")[0], data["slug"])


	def get_tags(post_id):
	"""get_tags
	loop through json_posts_tags to get tag_id associated with the post and pull the tag
	slug from json_tags
	:param post_id: integer
	:return: array of tag slugs
	"""
	tags = ""
	for pair in json_posts_tags:
	if pair["post_id"] is post_id:
	for tag in json_tags:
	if tag["id"] is pair["tag_id"]:
	tags += " - {}\n".format(tag["slug"])
	break
	return tags


	def main():
	for index, post in enumerate(json_posts):
	print("processing: {}".format(post["title"]))
	# pull data, populate file data to string based on template
	newFileContent = ""
	newFileContent += populate_header(post)
	newFileContent += post["markdown"]
	# get post file name
	filename = gen_post_file_name(post)
	# check if it's a page or a blog post
	if post["page"]:
	try:
	pages_dir = os.path.join(output_directory, "pages")
	os.makedirs(pages_dir)
	except FileExistsError:
	# directory already exists
	pass
	with open(os.path.join(pages_dir, filename), "w+") as f:
	f.write(newFileContent)
	else:
	try:
	published_dir = os.path.join(output_directory, "blogs", "published")
	unpublished_dir = os.path.join(output_directory, "blogs", "unpublished")
	os.makedirs(published_dir)
	os.makedirs(unpublished_dir)
	except FileExistsError:
	# directory already exists
	pass
	if post["status"] == "published":
	with open(os.path.join(published_dir, filename), "w+") as f:
	f.write(newFileContent)
	else:
	with open(os.path.join(unpublished_dir, filename), "w+") as f:
	f.write(newFileContent)


	if __name__ == "__main__":
	main()