Skip to content

Instantly share code, notes, and snippets.

@skullvalanche
Last active July 19, 2023 20:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skullvalanche/4d377f489d853e6c2c5d9c9fa74e8f99 to your computer and use it in GitHub Desktop.
Save skullvalanche/4d377f489d853e6c2c5d9c9fa74e8f99 to your computer and use it in GitHub Desktop.
Convert a Mastodon output.json file into HTML
#!/usr/bin/env python
import sys
import json
import datetime
def get_content_values_from_json(json_file_path):
with open(json_file_path, 'r', encoding='ISO-8859-1') as file:
data = json.load(file)
content_values = []
for item in data.get('orderedItems', []):
try:
content = item.get('object', {}).get('content', None)
url = item.get('object', {}).get('atomUri', None)
publish_date = item.get('object', {}).get('published', None)
except AttributeError as e:
continue
if content is not None:
content_values.append(f"<div class='pubDate'>{publish_date}</div>")
content_values.append(
f"<div class='source'><a href='{url}'>{url}</a></div>")
content_values.append(content)
return content_values
def write_to_html_file(content_values):
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename = f"output-{timestamp}.html"
header = """\
<!DOCTYPE html>
<head>
<style>
.pubDate {
font-size: 1.6em;
}
.source {
font-size: 0.9em;
font-style: italic;
}
p {
font-family: Arial, Helvetica, sans-serif;
}
</style>
</head>"""
with open(filename, 'w') as file:
file.write(header)
file.write("<body>\n")
for content in content_values:
file.write(f"{content}\n")
file.write("</body>\n")
file.write("</html>\n")
return filename
# Example usage:
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python script_name.py <json_file_path>")
else:
json_file_path = sys.argv[1]
content_values = get_content_values_from_json(json_file_path)
output = write_to_html_file(content_values)
print(f"Extraction complete. Output written to {output}")
@skullvalanche
Copy link
Author

This script takes the output.json file which is part of a standard Mastodon archive export and converts it to a readable/searchable HTML file.

PREREQUISITES:

  • you must have Python 3.9 or higher installed
  • you must know how to use a command line

USAGE:

  1. Download this file to the same folder as your output.json file.
  2. Open a command prompt.
  3. cd to the folder where you saved this and output.json files.
  4. Run the command python mastodon2html.py output.json

The script will output an HTML file, which you should be able to open with a standard web browser.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment