Skip to content

Instantly share code, notes, and snippets.

@fallenby fallenby/hangouts.py
Last active Oct 17, 2018

Embed
What would you like to do?
Convert/Parse Google Takeout/Export Data Hangouts/Chat into individual conversations
# Run this in the same directory as the Hangouts.json file generated by Google Takeout / Data Export tool.
# python3 hangouts.py
import json
import datetime
import os
import shutil
import re
chat_dir = "hangouts"
if os.path.exists(chat_dir):
print("Hangouts directory already exists. Should I remove it before proceeding? y/n")
if input().lower() == "y":
print("Removing the hangouts directory.")
shutil.rmtree(chat_dir)
else:
print("Exiting.")
quit()
class User:
def __init__(self, id, name):
self.id = id
self.name = name
def setName(self, name):
self.name = name
def getId(self):
return self.id
def getName(self):
return self.name
class Message:
def __init__(self, id, sender_id, sender_name, timestamp, text):
self.id = id
self.sender_id = sender_id
self.sender_name = sender_name
self.timestamp = datetime.datetime(1970,1,1) + datetime.timedelta(microseconds=int(timestamp)) + datetime.timedelta(hours=2) # Webkit time to UTC to GMT+2 conversion
self.text = text
def display(self):
return "[" + self.timestamp.strftime('%Y-%m-%d %H:%M:%S') + "] " + self.sender_name + ": " + self.text
class ConversationSet:
def __init__(self):
self.conversations = dict()
def getParticipantNameById(self, id):
for c in self.conversations:
if self.conversations[c].getParticipantById(id) is not None:
return self.conversations[c].getParticipantById(id).name
return "Unknown"
def addConversationParticipants(self, id, json_participant_data):
self.conversations[id] = Conversation(id)
for participant in json_participant_data:
p_id = participant["id"]["gaia_id"]
if "fallback_name" in participant:
self.conversations[id].addParticipant(p_id, participant["fallback_name"])
else:
self.conversations[id].addParticipant(p_id, "")
def addConversationEvents(self, id, json_participant_data, json_event_data):
conversation = self.getConversationById(id)
for participant in json_participant_data:
p_id = participant["id"]["gaia_id"]
if "fallback_name" in participant:
self.conversations[id].setParticipantName(p_id, participant["fallback_name"])
else:
self.conversations[id].setParticipantName(p_id, self.getParticipantNameById(p_id))
for event in json_event_data:
if event["event_type"] == "REGULAR_CHAT_MESSAGE":
message_text_segments = []
if "segment" in event["chat_message"]["message_content"]:
for segment in event["chat_message"]["message_content"]["segment"]:
message_text_segments.append(segment["text"])
self.conversations[id].addMessage(
event["event_id"],
event["sender_id"]["gaia_id"],
self.conversations[id].getParticipantById(event["sender_id"]["gaia_id"]).name,
event["timestamp"],
"".join(message_text_segments)
)
def getConversations(self):
list = []
for c in self.conversations:
list.append(self.conversations[c])
return list
def getConversationById(self, id):
for c in self.getConversations():
if c.id == id:
return c
return None
class Conversation:
def __init__(self, id):
self.id = id
self.participants = dict()
self.messages = []
def addParticipant(self, id, name):
if id not in self.participants:
self.participants[id] = User(id, name)
def addMessage(self, id, sender_id, sender_name, timestamp, text):
self.messages.append(Message(id, sender_id, sender_name, timestamp, text))
def getMessages(self):
return self.messages
def setParticipantName(self, id, name):
if id in self.participants:
self.participants[id].setName(name)
def getId(self):
return self.id
def getParticipants(self):
list = []
for p in self.participants:
list.append(self.participants[p])
return list
def getParticipantById(self, id):
if id in self.participants:
return self.participants[id]
return None
def participantCount(self):
return len(self.participants)
def get_valid_filename(s):
# https://github.com/django/django/blob/master/django/utils/text.py#L218
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
print("Processing Hangouts.json ..")
with open('Hangouts.json', 'r') as f:
hangouts_dict = json.load(f)
conversations = ConversationSet()
for hangout in hangouts_dict["conversations"]:
if "conversation" in hangout:
conversations.addConversationParticipants(
hangout["conversation"]["conversation_id"]["id"],
hangout["conversation"]["conversation"]["participant_data"]
)
for hangout in hangouts_dict["conversations"]:
if "conversation" in hangout:
conversations.addConversationEvents(
hangout["conversation"]["conversation_id"]["id"],
hangout["conversation"]["conversation"]["participant_data"],
hangout["events"]
)
os.makedirs(chat_dir)
for c in conversations.getConversations():
participants = []
f_name = "-"
for p in c.getParticipants():
f_name = p.name
participants.append(p.name)
if c.participantCount() > 1:
f_name = " and ".join(participants)
if os.path.isfile(chat_dir + "/" + get_valid_filename(f_name) + ".txt"):
f_name = f_name + '_2'
c_file = open(chat_dir + "/" + get_valid_filename(f_name) + ".txt", "w")
for m in c.getMessages():
c_file.write(m.display() + "\n")
c_file.close()
print("Done. Check the hangouts directory for chat output files.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.