Skip to content

Instantly share code, notes, and snippets.

@glensc
Forked from multun/message_json_parser.py
Last active June 27, 2021 12:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save glensc/3ef3590f8e3c4bed0550b99e40220c32 to your computer and use it in GitHub Desktop.
Save glensc/3ef3590f8e3c4bed0550b99e40220c32 to your computer and use it in GitHub Desktop.
Pretty-print a facebook message.json, fixing up broken encoding
#!/usr/bin/env python3
import sys
import json
import warnings
from datetime import datetime
def fixup_str(text):
return text.encode('latin1').decode('utf8')
def fixup_list(l):
return [fixup(e) for e in l]
def fixup_dict(dct):
return {fixup_str(k): fixup(v) for k, v in dct.items()}
def fixup(e):
if isinstance(e, dict):
return fixup_dict(e)
if isinstance(e, list):
return fixup_list(e)
if isinstance(e, str):
return fixup_str(e)
return e
def filter_messages(messages):
for message in messages:
if message['type'] == 'Generic':
yield message
def format_message(message):
sender = message['sender_name']
timestamp = datetime.fromtimestamp(message['timestamp_ms']/1000)
if message['is_unsent']:
content = "<Unsent Message>"
elif 'content' in message:
content = message['content']
elif 'photos' in message:
content = 'Photo: <{0}>'.format(message['photos'][0]['uri'])
elif 'gifs' in message:
content = "Gif: <{0}>".format(message['gifs'][0]['uri'])
elif 'audio_files' in message:
content = "Audio: <{0}>".format(message['audio_files'][0]['uri'])
elif 'videos' in message:
content = "Video: <{0}>".format(message['videos'][0]['uri'])
elif 'files' in message:
content = "Video: <{0}>".format(message['files'][0]['uri'])
elif 'sticker' in message:
content = "Sticker: <{0}>".format(message['sticker']['uri'])
else:
return None
return f'{timestamp}\t{sender}\t{content}'
def main(args):
if not args:
args = ['message_1.json']
messages = []
from pprint import pprint
for filename in reversed(args):
with open(filename, 'r') as f:
chunk = fixup(json.loads(f.read())['messages'])
chunk.reverse()
messages.extend(chunk)
for message in filter_messages(messages):
formatted = format_message(message)
if not formatted:
warnings.warn(f"Skipped {message}")
continue
print(formatted)
if __name__ == '__main__':
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment