manton/mastodon_to_blog_archive.rb

## mastodon_to_blog_archive.rb
# Mastodon ActivityStreams (.tar.gz) file to Blog Archive (.bar) converter
# by Manton Reece
# MIT license

require 'rubygems'
require 'zip'
require 'zlib'
require 'minitar'
require 'json'
require 'fileutils'

# you may need to install a few gems like:
#   gem install zip
#   gem install minitar
#   gem install fileutils

if ARGV.length != 2
	puts ""
	puts "Usage: ruby mastodon_to_blog_archive.rb /path/mastodon.tar.gz /path/export.bar"
	puts ""
	return
end

# set up some paths
mastodon_path = ARGV[0]
bar_path = ARGV[1]
temp_folder = "blog_archive_temp_" + rand(10000...50000).to_s
temp_mastodon_folder = File.join(temp_folder, "mastodon")
temp_blog_folder = File.join(temp_folder, "blog")
FileUtils.mkdir_p(temp_mastodon_folder)
FileUtils.mkdir_p(temp_blog_folder)

puts "Converting from #{File.basename(mastodon_path)} to #{File.basename(bar_path)}..."

# we'll keep an array of posts between formats
mastodon_posts = []
jsonfeed_posts = []
mf2_posts = []
files_to_zip = []

# unzip the Mastodon posts
tar_gz = Zlib::GzipReader.open(mastodon_path)
tar = Archive::Tar::Minitar::Reader.new(tar_gz)

tar.each do |entry|
	if entry.name.include?("outbox.json")
		# extract so we can read posts
		json_path = File.join(temp_mastodon_folder, "outbox.json")
		File.open(json_path, 'wb') do |file|
			file.write(entry.read)
		end
		mastodon_posts = JSON.parse(IO.read(json_path))["orderedItems"]
	elsif entry.name.include?("media_attachments")
		# extract to blog folder
		media_path = File.join(temp_blog_folder, entry.name)
		media_folder = File.dirname(media_path)
		FileUtils.mkdir_p(media_folder)
		puts "Extracting media file: " + File.basename(media_path)
		File.open(media_path, 'wb') do |file|
			file.write(entry.read)
		end
		files_to_zip << media_path
	end
end

tar.close
tar_gz.close

puts "Found #{mastodon_posts.size} posts in Mastodon archive."

# re-structure the posts for Blog Archive Format
for post in mastodon_posts
	if post["type"] == "Create"
		obj = post["object"]

		post_id = obj["id"]
		post_url = obj["url"]
		post_published = obj["published"]
		post_content = obj["content"]
		post_attachments = obj["attachment"]

		# add images to HTML
		html = post_content
		for a in post_attachments
			if a["type"] == "Document"
				attachment_url = a["url"]
				if a["mediaType"].include?("image/")
					html = html + "<img src=\"#{attachment_url}\">"
				elsif a["mediaType"].include?("video/")
					html = html + "<video src=\"#{attachment_url}\">"
				elsif a["mediaType"].include?("audio/")
					html = html + "<audio src=\"#{attachment_url}\">"
				end
			end
		end

		# relative path
		html = html.gsub("src=\"/media_attachments/", "src=\"media_attachments/")

		jsonfeed_posts << {
			id: post_id,
			url: post_url,
			date_published: post_published,
			content_html: html
		}

		mf2_post = "<div class=\"h-entry\"><div class=\"e-content\">" +
			html +
			"<div><a href=\"#{post_url}\" class=\"u-url\">" +
			"<time datetime=\"#{post_published}\" class=\"dt-published\">#{post_published}</time>" +
			"</a></div>" +
			"</div></div>"
		mf2_posts << mf2_post
	end
end

# save the JSON Feed
puts "Saving posts to JSON Feed..."
jsonfeed_info = {
	version: "https://jsonfeed.org/version/1.1",
	items: jsonfeed_posts
}
jsonfeed_path = File.join(temp_blog_folder, "feed.json")
IO.write(jsonfeed_path, jsonfeed_info.to_json)

# save an HTML file with Microformats
puts "Saving posts to HTML with Microformats..."
html_path = File.join(temp_blog_folder, "index.html")
html_header = "<html><body><div class=\"h-feed\">"
html_footer = "</div></body></html>"
IO.write(html_path, html_header + mf2_posts.join("\n") + html_footer)

# delete the old file if it looks like a .bar archive
if File.exists?(bar_path) && !File.directory?(bar_path) && bar_path.include?(".bar")
	File.delete(bar_path)
end

files_to_zip << jsonfeed_path
files_to_zip << html_path

# zip the .bar
Zip::File.open(bar_path, Zip::File::CREATE) do |zipfile|
	for filepath in files_to_zip
		parent_folder = filepath.gsub(temp_blog_folder + "/", "")
		zipfile.add(parent_folder, filepath)
	end
end

# cleanup
puts "Cleaning up temporary folder #{temp_folder}..."
FileUtils.rm_r(temp_folder, force: true)
puts "Done! Created new file in Blog Archive Format: #{bar_path}"
	# Mastodon ActivityStreams (.tar.gz) file to Blog Archive (.bar) converter
	# by Manton Reece
	# MIT license

	require 'rubygems'
	require 'zip'
	require 'zlib'
	require 'minitar'
	require 'json'
	require 'fileutils'

	# you may need to install a few gems like:
	# gem install zip
	# gem install minitar
	# gem install fileutils

	if ARGV.length != 2
	puts ""
	puts "Usage: ruby mastodon_to_blog_archive.rb /path/mastodon.tar.gz /path/export.bar"
	puts ""
	return
	end

	# set up some paths
	mastodon_path = ARGV[0]
	bar_path = ARGV[1]
	temp_folder = "blog_archive_temp_" + rand(10000...50000).to_s
	temp_mastodon_folder = File.join(temp_folder, "mastodon")
	temp_blog_folder = File.join(temp_folder, "blog")
	FileUtils.mkdir_p(temp_mastodon_folder)
	FileUtils.mkdir_p(temp_blog_folder)

	puts "Converting from #{File.basename(mastodon_path)} to #{File.basename(bar_path)}..."

	# we'll keep an array of posts between formats
	mastodon_posts = []
	jsonfeed_posts = []
	mf2_posts = []
	files_to_zip = []

	# unzip the Mastodon posts
	tar_gz = Zlib::GzipReader.open(mastodon_path)
	tar = Archive::Tar::Minitar::Reader.new(tar_gz)

	tar.each do \|entry\|
	if entry.name.include?("outbox.json")
	# extract so we can read posts
	json_path = File.join(temp_mastodon_folder, "outbox.json")
	File.open(json_path, 'wb') do \|file\|
	file.write(entry.read)
	end
	mastodon_posts = JSON.parse(IO.read(json_path))["orderedItems"]
	elsif entry.name.include?("media_attachments")
	# extract to blog folder
	media_path = File.join(temp_blog_folder, entry.name)
	media_folder = File.dirname(media_path)
	FileUtils.mkdir_p(media_folder)
	puts "Extracting media file: " + File.basename(media_path)
	File.open(media_path, 'wb') do \|file\|
	file.write(entry.read)
	end
	files_to_zip << media_path
	end
	end

	tar.close
	tar_gz.close

	puts "Found #{mastodon_posts.size} posts in Mastodon archive."

	# re-structure the posts for Blog Archive Format
	for post in mastodon_posts
	if post["type"] == "Create"
	obj = post["object"]

	post_id = obj["id"]
	post_url = obj["url"]
	post_published = obj["published"]
	post_content = obj["content"]
	post_attachments = obj["attachment"]

	# add images to HTML
	html = post_content
	for a in post_attachments
	if a["type"] == "Document"
	attachment_url = a["url"]
	if a["mediaType"].include?("image/")
	html = html + "<img src=\"#{attachment_url}\">"
	elsif a["mediaType"].include?("video/")
	html = html + "<video src=\"#{attachment_url}\">"
	elsif a["mediaType"].include?("audio/")
	html = html + "<audio src=\"#{attachment_url}\">"
	end
	end
	end

	# relative path
	html = html.gsub("src=\"/media_attachments/", "src=\"media_attachments/")

	jsonfeed_posts << {
	id: post_id,
	url: post_url,
	date_published: post_published,
	content_html: html
	}

	mf2_post = "<div class=\"h-entry\"><div class=\"e-content\">" +
	html +
	"<div><a href=\"#{post_url}\" class=\"u-url\">" +
	"<time datetime=\"#{post_published}\" class=\"dt-published\">#{post_published}</time>" +
	"</a></div>" +
	"</div></div>"
	mf2_posts << mf2_post
	end
	end

	# save the JSON Feed
	puts "Saving posts to JSON Feed..."
	jsonfeed_info = {
	version: "https://jsonfeed.org/version/1.1",
	items: jsonfeed_posts
	}
	jsonfeed_path = File.join(temp_blog_folder, "feed.json")
	IO.write(jsonfeed_path, jsonfeed_info.to_json)

	# save an HTML file with Microformats
	puts "Saving posts to HTML with Microformats..."
	html_path = File.join(temp_blog_folder, "index.html")
	html_header = "<html><body><div class=\"h-feed\">"
	html_footer = "</div></body></html>"
	IO.write(html_path, html_header + mf2_posts.join("\n") + html_footer)

	# delete the old file if it looks like a .bar archive
	if File.exists?(bar_path) && !File.directory?(bar_path) && bar_path.include?(".bar")
	File.delete(bar_path)
	end

	files_to_zip << jsonfeed_path
	files_to_zip << html_path

	# zip the .bar
	Zip::File.open(bar_path, Zip::File::CREATE) do \|zipfile\|
	for filepath in files_to_zip
	parent_folder = filepath.gsub(temp_blog_folder + "/", "")
	zipfile.add(parent_folder, filepath)
	end
	end

	# cleanup
	puts "Cleaning up temporary folder #{temp_folder}..."
	FileUtils.rm_r(temp_folder, force: true)
	puts "Done! Created new file in Blog Archive Format: #{bar_path}"