Skip to content

Instantly share code, notes, and snippets.

@purneshwar
Forked from naptar/hangouts.py
Created April 28, 2020 23:28
Show Gist options
  • Save purneshwar/f53bd07beb23bc06a47579e473ef91fe to your computer and use it in GitHub Desktop.
Save purneshwar/f53bd07beb23bc06a47579e473ef91fe to your computer and use it in GitHub Desktop.
Convert/Parse Google Takeout/Export Data Hangouts/Chat into individual conversations
# Run this in the same directory as the Hangouts.json file generated by Google Takeout / Data Export tool.
# python3 hangouts.py
import json
import datetime
import os
import shutil
import re
chat_dir = "hangouts"
if os.path.exists(chat_dir):
print("Hangouts directory already exists. Should I remove it before proceeding? y/n")
if input().lower() == "y":
print("Removing the hangouts directory.")
shutil.rmtree(chat_dir)
else:
print("Exiting.")
quit()
class User:
def __init__(self, id, name):
self.id = id
self.name = name
def setName(self, name):
self.name = name
def getId(self):
return self.id
def getName(self):
return self.name
class Message:
def __init__(self, id, sender_id, sender_name, timestamp, text):
self.id = id
self.sender_id = sender_id
self.sender_name = sender_name
self.timestamp = datetime.datetime(1970,1,1) + datetime.timedelta(microseconds=int(timestamp)) + datetime.timedelta(hours=2) # Webkit time to UTC to GMT+2 conversion
self.text = text
def display(self):
return "[" + self.timestamp.strftime('%Y-%m-%d %H:%M:%S') + "] " + self.sender_name + ": " + self.text
class ConversationSet:
def __init__(self):
self.conversations = dict()
def getParticipantNameById(self, id):
for c in self.conversations:
if self.conversations[c].getParticipantById(id) is not None:
return self.conversations[c].getParticipantById(id).name
return "Unknown"
def addConversationParticipants(self, id, json_participant_data):
self.conversations[id] = Conversation(id)
for participant in json_participant_data:
p_id = participant["id"]["gaia_id"]
if "fallback_name" in participant:
self.conversations[id].addParticipant(p_id, participant["fallback_name"])
else:
self.conversations[id].addParticipant(p_id, "")
def addConversationEvents(self, id, json_participant_data, json_event_data):
conversation = self.getConversationById(id)
for participant in json_participant_data:
p_id = participant["id"]["gaia_id"]
if "fallback_name" in participant:
self.conversations[id].setParticipantName(p_id, participant["fallback_name"])
else:
self.conversations[id].setParticipantName(p_id, self.getParticipantNameById(p_id))
for event in json_event_data:
if event["event_type"] == "REGULAR_CHAT_MESSAGE":
message_text_segments = []
if "segment" in event["chat_message"]["message_content"]:
for segment in event["chat_message"]["message_content"]["segment"]:
message_text_segments.append(segment["text"])
self.conversations[id].addMessage(
event["event_id"],
event["sender_id"]["gaia_id"],
self.conversations[id].getParticipantById(event["sender_id"]["gaia_id"]).name,
event["timestamp"],
"".join(message_text_segments)
)
def getConversations(self):
list = []
for c in self.conversations:
list.append(self.conversations[c])
return list
def getConversationById(self, id):
for c in self.getConversations():
if c.id == id:
return c
return None
class Conversation:
def __init__(self, id):
self.id = id
self.participants = dict()
self.messages = []
def addParticipant(self, id, name):
if id not in self.participants:
self.participants[id] = User(id, name)
def addMessage(self, id, sender_id, sender_name, timestamp, text):
self.messages.append(Message(id, sender_id, sender_name, timestamp, text))
def getMessages(self):
return self.messages
def setParticipantName(self, id, name):
if id in self.participants:
self.participants[id].setName(name)
def getId(self):
return self.id
def getParticipants(self):
list = []
for p in self.participants:
list.append(self.participants[p])
return list
def getParticipantById(self, id):
if id in self.participants:
return self.participants[id]
return None
def participantCount(self):
return len(self.participants)
def get_valid_filename(s):
# https://github.com/django/django/blob/master/django/utils/text.py#L218
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
print("Processing Hangouts.json ..")
with open('Hangouts.json', 'r') as f:
hangouts_dict = json.load(f)
conversations = ConversationSet()
for hangout in hangouts_dict["conversations"]:
if "conversation" in hangout:
conversations.addConversationParticipants(
hangout["conversation"]["conversation_id"]["id"],
hangout["conversation"]["conversation"]["participant_data"]
)
for hangout in hangouts_dict["conversations"]:
if "conversation" in hangout:
conversations.addConversationEvents(
hangout["conversation"]["conversation_id"]["id"],
hangout["conversation"]["conversation"]["participant_data"],
hangout["events"]
)
os.makedirs(chat_dir)
for c in conversations.getConversations():
participants = []
f_name = "-"
for p in c.getParticipants():
f_name = p.name
participants.append(p.name)
if c.participantCount() > 1:
f_name = " and ".join(participants)
if os.path.isfile(chat_dir + "/" + get_valid_filename(f_name) + ".txt"):
f_name = f_name + '_2'
c_file = open(chat_dir + "/" + get_valid_filename(f_name) + ".txt", "w")
for m in c.getMessages():
c_file.write(m.display() + "\n")
c_file.close()
print("Done. Check the hangouts directory for chat output files.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment