Skip to content

Instantly share code, notes, and snippets.

@manton
Last active April 28, 2023 19:40
Show Gist options
  • Save manton/946ddbb74b5d0948053d7e23793ead77 to your computer and use it in GitHub Desktop.
Save manton/946ddbb74b5d0948053d7e23793ead77 to your computer and use it in GitHub Desktop.
# Mastodon ActivityStreams (.tar.gz) file to Blog Archive (.bar) converter
# by Manton Reece
# MIT license
require 'rubygems'
require 'zip'
require 'zlib'
require 'minitar'
require 'json'
require 'fileutils'
# you may need to install a few gems like:
# gem install zip
# gem install minitar
# gem install fileutils
if ARGV.length != 2
puts ""
puts "Usage: ruby mastodon_to_blog_archive.rb /path/mastodon.tar.gz /path/export.bar"
puts ""
return
end
# set up some paths
mastodon_path = ARGV[0]
bar_path = ARGV[1]
temp_folder = "blog_archive_temp_" + rand(10000...50000).to_s
temp_mastodon_folder = File.join(temp_folder, "mastodon")
temp_blog_folder = File.join(temp_folder, "blog")
FileUtils.mkdir_p(temp_mastodon_folder)
FileUtils.mkdir_p(temp_blog_folder)
puts "Converting from #{File.basename(mastodon_path)} to #{File.basename(bar_path)}..."
# we'll keep an array of posts between formats
mastodon_posts = []
jsonfeed_posts = []
mf2_posts = []
files_to_zip = []
# unzip the Mastodon posts
tar_gz = Zlib::GzipReader.open(mastodon_path)
tar = Archive::Tar::Minitar::Reader.new(tar_gz)
tar.each do |entry|
if entry.name.include?("outbox.json")
# extract so we can read posts
json_path = File.join(temp_mastodon_folder, "outbox.json")
File.open(json_path, 'wb') do |file|
file.write(entry.read)
end
mastodon_posts = JSON.parse(IO.read(json_path))["orderedItems"]
elsif entry.name.include?("media_attachments")
# extract to blog folder
media_path = File.join(temp_blog_folder, entry.name)
media_folder = File.dirname(media_path)
FileUtils.mkdir_p(media_folder)
puts "Extracting media file: " + File.basename(media_path)
File.open(media_path, 'wb') do |file|
file.write(entry.read)
end
files_to_zip << media_path
end
end
tar.close
tar_gz.close
puts "Found #{mastodon_posts.size} posts in Mastodon archive."
# re-structure the posts for Blog Archive Format
for post in mastodon_posts
if post["type"] == "Create"
obj = post["object"]
post_id = obj["id"]
post_url = obj["url"]
post_published = obj["published"]
post_content = obj["content"]
post_attachments = obj["attachment"]
# add images to HTML
html = post_content
for a in post_attachments
if a["type"] == "Document"
attachment_url = a["url"]
if a["mediaType"].include?("image/")
html = html + "<img src=\"#{attachment_url}\">"
elsif a["mediaType"].include?("video/")
html = html + "<video src=\"#{attachment_url}\">"
elsif a["mediaType"].include?("audio/")
html = html + "<audio src=\"#{attachment_url}\">"
end
end
end
# relative path
html = html.gsub("src=\"/media_attachments/", "src=\"media_attachments/")
jsonfeed_posts << {
id: post_id,
url: post_url,
date_published: post_published,
content_html: html
}
mf2_post = "<div class=\"h-entry\"><div class=\"e-content\">" +
html +
"<div><a href=\"#{post_url}\" class=\"u-url\">" +
"<time datetime=\"#{post_published}\" class=\"dt-published\">#{post_published}</time>" +
"</a></div>" +
"</div></div>"
mf2_posts << mf2_post
end
end
# save the JSON Feed
puts "Saving posts to JSON Feed..."
jsonfeed_info = {
version: "https://jsonfeed.org/version/1.1",
items: jsonfeed_posts
}
jsonfeed_path = File.join(temp_blog_folder, "feed.json")
IO.write(jsonfeed_path, jsonfeed_info.to_json)
# save an HTML file with Microformats
puts "Saving posts to HTML with Microformats..."
html_path = File.join(temp_blog_folder, "index.html")
html_header = "<html><body><div class=\"h-feed\">"
html_footer = "</div></body></html>"
IO.write(html_path, html_header + mf2_posts.join("\n") + html_footer)
# delete the old file if it looks like a .bar archive
if File.exists?(bar_path) && !File.directory?(bar_path) && bar_path.include?(".bar")
File.delete(bar_path)
end
files_to_zip << jsonfeed_path
files_to_zip << html_path
# zip the .bar
Zip::File.open(bar_path, Zip::File::CREATE) do |zipfile|
for filepath in files_to_zip
parent_folder = filepath.gsub(temp_blog_folder + "/", "")
zipfile.add(parent_folder, filepath)
end
end
# cleanup
puts "Cleaning up temporary folder #{temp_folder}..."
FileUtils.rm_r(temp_folder, force: true)
puts "Done! Created new file in Blog Archive Format: #{bar_path}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment