Skip to content

Instantly share code, notes, and snippets.

@crepererum
Created March 3, 2019 13:36
Show Gist options
  • Save crepererum/137de687af0f9d8301e907e735d2f516 to your computer and use it in GitHub Desktop.
Save crepererum/137de687af0f9d8301e907e735d2f516 to your computer and use it in GitHub Desktop.
from collections import defaultdict
import gzip
import json
import os
import os.path
import urllib.request
EXPORT_FILE = 'crepererum-net.ghost.2019-02-23.json'
OUT_PATH = 'out'
SITE_URL = 'https://crepererum.net'
TEMPLATE = '''+++
title = "{title}"
date = {date}
[taxonomies]
categories = ["blog"]
tags = [{tags}]
+++
{markdown}
'''
def get_markdown(post):
mobiledoc = json.loads(post['mobiledoc'])
cards = mobiledoc['cards']
if len(cards) != 1:
return None
assert len(cards) == 1
card0 = cards[0]
assert len(card0) == 2
ctype, cdata = card0
assert ctype == 'card-markdown'
return cdata['markdown']
def download_uncompressed(url, target):
response = urllib.request.urlopen(url)
data = response.read()
if data.startswith(b'\x1f\x8b'):
data = gzip.decompress(data)
with open(target, 'wb') as fp:
fp.write(data)
def fetch_images(md, basedir):
while True:
start = md.find('/content/images')
if start == -1:
break
ends = (
md.find(marker, start)
for marker in (')', '"')
)
end = min((
p
for p in ends
if p != -1
))
path = md[start:end]
basename = os.path.basename(path)
outpath = os.path.join(basedir, basename)
url = SITE_URL + path
print(f' fetch {url}')
download_uncompressed(url, outpath)
md = md[:start] + basename + md[end:]
return md
def get_date(post):
return post['published_at'].split(' ')[0]
def process_post(post, tag_dict):
slug = post['slug']
print(f'converting {slug}:')
status = post['status']
if status != 'published':
print(' skipping (not published)')
return
pid = post['id']
tags = tag_dict.get(pid, [])
title = post['title']
md = get_markdown(post)
if md is None:
print('skipped (not markdown)')
return
date = get_date(post)
basedir = os.path.join(
OUT_PATH,
slug,
)
try:
os.makedirs(basedir)
except FileExistsError:
pass
md = fetch_images(md, basedir)
index_md = TEMPLATE.format(
markdown=md,
date=date,
title=title,
tags=', '.join(
f'"{t}"'
for t in sorted(tags)
)
)
with open(os.path.join(basedir, 'index.md'), 'w') as fp:
fp.write(index_md)
print(' done')
def process_tags(db0_data):
tag_ids = {
tag['id']: tag['name']
for tag in db0_data['tags']
}
tag_dict = defaultdict(list)
for relation in db0_data['posts_tags']:
tag_dict[relation['post_id']].append(tag_ids[relation['tag_id']])
return dict(tag_dict)
with open(EXPORT_FILE) as fp:
data = json.load(fp)
db0 = data['db'][0]
db0_data = db0['data']
posts = db0_data['posts']
tag_dict = process_tags(db0_data)
for post in posts:
process_post(post, tag_dict)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment