Last active
July 19, 2023 20:49
-
-
Save skullvalanche/4d377f489d853e6c2c5d9c9fa74e8f99 to your computer and use it in GitHub Desktop.
Convert a Mastodon output.json file into HTML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import json | |
import datetime | |
def get_content_values_from_json(json_file_path): | |
with open(json_file_path, 'r', encoding='ISO-8859-1') as file: | |
data = json.load(file) | |
content_values = [] | |
for item in data.get('orderedItems', []): | |
try: | |
content = item.get('object', {}).get('content', None) | |
url = item.get('object', {}).get('atomUri', None) | |
publish_date = item.get('object', {}).get('published', None) | |
except AttributeError as e: | |
continue | |
if content is not None: | |
content_values.append(f"<div class='pubDate'>{publish_date}</div>") | |
content_values.append( | |
f"<div class='source'><a href='{url}'>{url}</a></div>") | |
content_values.append(content) | |
return content_values | |
def write_to_html_file(content_values): | |
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") | |
filename = f"output-{timestamp}.html" | |
header = """\ | |
<!DOCTYPE html> | |
<head> | |
<style> | |
.pubDate { | |
font-size: 1.6em; | |
} | |
.source { | |
font-size: 0.9em; | |
font-style: italic; | |
} | |
p { | |
font-family: Arial, Helvetica, sans-serif; | |
} | |
</style> | |
</head>""" | |
with open(filename, 'w') as file: | |
file.write(header) | |
file.write("<body>\n") | |
for content in content_values: | |
file.write(f"{content}\n") | |
file.write("</body>\n") | |
file.write("</html>\n") | |
return filename | |
# Example usage: | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print("Usage: python script_name.py <json_file_path>") | |
else: | |
json_file_path = sys.argv[1] | |
content_values = get_content_values_from_json(json_file_path) | |
output = write_to_html_file(content_values) | |
print(f"Extraction complete. Output written to {output}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script takes the
output.json
file which is part of a standard Mastodon archive export and converts it to a readable/searchable HTML file.PREREQUISITES:
USAGE:
cd
to the folder where you saved this andoutput.json
files.python mastodon2html.py output.json
The script will output an HTML file, which you should be able to open with a standard web browser.