Skip to content

Instantly share code, notes, and snippets.

@mattbierner
Last active December 5, 2018 15:00
Show Gist options
  • Save mattbierner/91d90806fc6d3b414498 to your computer and use it in GitHub Desktop.
Save mattbierner/91d90806fc6d3b414498 to your computer and use it in GitHub Desktop.
Automatic migration of ghost images to Jekyll
<figure class="image">
{% capture image_src %}{% if include.file %}/content/{{ page.path | remove_first:'_posts/' | split:'.' | first }}/{{ include.file }}{% else %}{{ include.url }}{% endif %}{% endcapture %}
<a href="{{ image_src }}">
<img src="{{ image_src }}" alt="{{ include.description }}" />
</a>
{% if include.description %}
<figcaption>{{ include.description }}</figcaption>
{% endif %}
</figure>
"""
Second step of the Ghost to Jekyll conversion.
The instructions here: http://import.jekyllrb.com/docs/ghost/
cover how to port post content from Ghost to Jeklly, but this does not cover
migrating image assets uploaded to Ghost. This script handles downloading all
such images into Jekll and converting the post image links to use the new,
local image files.a
Example usage:
cd my_jekyll_blog
python migrate_ghost_images.py _posts --site http://my-super-blog.com
Writes all the images under a folder called `content`, with each post having its
own directory in `content`:
/content/2016-01-12-my-post/image1.png
The current ghost blog must be running for the image download part of this script to work.
`replace_image_tags_with_includes` all the markdown to use the `image.html` template to render the images themselves.
"""
import argparse
import os
import re
import urllib
from distutils.dir_util import mkpath
ALL_IMAGE_RE = re.compile(r"\!\[([^\]]*)\]\(([^\)]+)\)")
LOCAL_IMAGE_RE = re.compile(r"\!\[([^\]]*)\]\((/[^\)]+)\)")
def download_image(postname, path, output_dir):
"""Download an image to a file"""
output_file = path.rsplit('/', 1)[-1]
output_path = os.path.join(output_dir, output_file)
urllib.urlretrieve(path, output_path)
def replace_image_tags_with_includes(match):
"""Replace markdown image includes with template"""
image_desc = match.group(1)
image_path = match.group(2)
image_name = image_path.rsplit('/', 1)[-1]
return '{{% include image.html file="{1}" description="{0}" %}}'.format(image_desc, image_name)
def process_post(site, path, postname, post_content, local_only=False):
"""Download and rewrite a single post"""
output_dir = os.path.join('content', postname)
didRun = False
image_re = LOCAL_IMAGE_RE if local_only else ALL_IMAGE_RE
for image in re.finditer(image_re, post_content):
if not didRun:
didRun = True
mkpath(output_dir)
image_path = image.group(2)
if image_path[0] == '/':
image_path = site + image_path
download_image(filename, image_path, output_dir)
if not didRun:
return post_content
# Rewrite image includes
return re.sub(image_re,
replace_image_tags_with_includes,
post_content)
def process_post_file(site, filename, path, local_only=False):
"""Process a file in _posts"""
with open(path, 'r') as f:
post_content = f.read()
new_content = process_post(site, path,
os.path.splitext(filename)[0],
post_content,
local_only=local_only)
with open(path, 'w') as f:
f.write(new_content)
parser = argparse.ArgumentParser(description='Download images from ghost posts.')
parser.add_argument('path', help='path to `_posts` directory to process')
parser.add_argument('--site', dest='site',
help="Current url to running instance of blog. Used to grab site relative images.")
parser.add_argument('--local_only', dest='local_only', action='store_true',
default=False,
help="Only download images stored at an absolute url for the blog?")
args = parser.parse_args()
for filename in os.listdir(args.path):
process_post_file(
args.site,
filename,
os.path.join(args.path, filename),
local_only=args.local_only)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment