Last active
October 8, 2024 00:01
-
-
Save bitbybyte/3c87290a08ca2329bcb1edadea9bafb2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Create a reasonable Markdown export from a Cohost post JSON in your exported profile | |
Usage: cohost-post-json-to-markdown.py './project/bitto/posts/published/123456-example/post.json' '/home/bitto/cohost-exports' | |
Post will be written to '/home/bitto/cohost-exports/123456.markdown' | |
""" | |
import json | |
import os | |
import sys | |
__version = "0.1" | |
__author = "bitto" | |
__license = "MIT" | |
IMAGE_TEMPLATE = '' | |
AUDIO_TEMPLATE = ''' | |
### {1} - {2} | |
<audio controls> | |
<source src="{0}" /> | |
</audio> | |
''' | |
def convert_attachment_row(block: json): | |
attachments = block["attachments"] | |
return [convert_attachment(attachment) for attachment in attachments] | |
def convert_attachment(block: json): | |
attachment = block["attachment"] | |
kind = attachment.get("kind") | |
if kind == "image": | |
src = attachment["fileURL"] | |
alt = attachment["altText"] | |
return IMAGE_TEMPLATE.format(src, alt) | |
elif kind == "audio": | |
src = attachment["fileURL"] | |
artist = attachment["artist"] | |
title = attachment["title"] | |
return AUDIO_TEMPLATE.format(src, artist, title) | |
else: | |
raise Exception(f"Attachment type not supported: {kind}") | |
if __name__ == "__main__": | |
if not len(sys.argv) == 3: | |
sys.exit(f"Usage: {sys.argv[0]} [post_json] [out_dir]") | |
post_json_path = sys.argv[1] | |
out_dir = sys.argv[2] | |
post_json = json.load(open(post_json_path)) | |
post_id = post_json["postId"] | |
post_title = post_json["headline"] | |
post_date = post_json["publishedAt"] | |
if not os.path.exists(out_dir): | |
os.makedirs(out_dir, exist_ok=True) | |
out_path = os.path.join(out_dir, f"{post_id}.markdown") | |
sys.stdout = open(out_path, "wt") | |
# Jekyll/Hugo/etc header | |
print("---") | |
print(f"title: {post_title}") | |
print(f"date: {post_date}") | |
print("---\n") | |
post_blocks = post_json["blocks"] | |
for block in post_blocks: | |
block_type = block.get("type") | |
if not block_type: | |
raise Exception(f"No block type found for block in post: {post_title}") | |
elif block_type == "markdown": | |
print(block["markdown"]["content"]) | |
elif block_type == "attachment": | |
print(convert_attachment(block)) | |
elif block_type == "attachment-row": | |
print(*convert_attachment_row(block), sep="\n") | |
else: | |
raise Exception(f"Block type not supported: {block_type}") | |
print() # Line break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment