Skip to content

Instantly share code, notes, and snippets.

@naptar
Last active January 12, 2022 00:54
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save naptar/7c67bd755632f7a0e5a0990424875d5b to your computer and use it in GitHub Desktop.
Save naptar/7c67bd755632f7a0e5a0990424875d5b to your computer and use it in GitHub Desktop.
Convert/Parse Google Takeout/Export Data Hangouts/Chat into individual conversations
# Run this in the same directory as the Hangouts.json file generated by Google Takeout / Data Export tool.
# python3 hangouts.py
import json
import datetime
import os
import shutil
import re
chat_dir = "hangouts"
if os.path.exists(chat_dir):
print("Hangouts directory already exists. Should I remove it before proceeding? y/n")
if input().lower() == "y":
print("Removing the hangouts directory.")
shutil.rmtree(chat_dir)
else:
print("Exiting.")
quit()
class User:
def __init__(self, id, name):
self.id = id
self.name = name
def setName(self, name):
self.name = name
def getId(self):
return self.id
def getName(self):
return self.name
class Message:
def __init__(self, id, sender_id, sender_name, timestamp, text):
self.id = id
self.sender_id = sender_id
self.sender_name = sender_name
self.timestamp = datetime.datetime(1970,1,1) + datetime.timedelta(microseconds=int(timestamp)) + datetime.timedelta(hours=2) # Webkit time to UTC to GMT+2 conversion
self.text = text
def display(self):
return "[" + self.timestamp.strftime('%Y-%m-%d %H:%M:%S') + "] " + self.sender_name + ": " + self.text
class ConversationSet:
def __init__(self):
self.conversations = dict()
def getParticipantNameById(self, id):
for c in self.conversations:
if self.conversations[c].getParticipantById(id) is not None:
return self.conversations[c].getParticipantById(id).name
return "Unknown"
def addConversationParticipants(self, id, json_participant_data):
self.conversations[id] = Conversation(id)
for participant in json_participant_data:
p_id = participant["id"]["gaia_id"]
if "fallback_name" in participant:
self.conversations[id].addParticipant(p_id, participant["fallback_name"])
else:
self.conversations[id].addParticipant(p_id, "")
def addConversationEvents(self, id, json_participant_data, json_event_data):
conversation = self.getConversationById(id)
for participant in json_participant_data:
p_id = participant["id"]["gaia_id"]
if "fallback_name" in participant:
self.conversations[id].setParticipantName(p_id, participant["fallback_name"])
else:
self.conversations[id].setParticipantName(p_id, self.getParticipantNameById(p_id))
for event in json_event_data:
if event["event_type"] == "REGULAR_CHAT_MESSAGE":
message_text_segments = []
if "segment" in event["chat_message"]["message_content"]:
for segment in event["chat_message"]["message_content"]["segment"]:
message_text_segments.append(segment["text"])
self.conversations[id].addMessage(
event["event_id"],
event["sender_id"]["gaia_id"],
self.conversations[id].getParticipantById(event["sender_id"]["gaia_id"]).name,
event["timestamp"],
"".join(message_text_segments)
)
def getConversations(self):
list = []
for c in self.conversations:
list.append(self.conversations[c])
return list
def getConversationById(self, id):
for c in self.getConversations():
if c.id == id:
return c
return None
class Conversation:
def __init__(self, id):
self.id = id
self.participants = dict()
self.messages = []
def addParticipant(self, id, name):
if id not in self.participants:
self.participants[id] = User(id, name)
def addMessage(self, id, sender_id, sender_name, timestamp, text):
self.messages.append(Message(id, sender_id, sender_name, timestamp, text))
def getMessages(self):
return self.messages
def setParticipantName(self, id, name):
if id in self.participants:
self.participants[id].setName(name)
def getId(self):
return self.id
def getParticipants(self):
list = []
for p in self.participants:
list.append(self.participants[p])
return list
def getParticipantById(self, id):
if id in self.participants:
return self.participants[id]
return None
def participantCount(self):
return len(self.participants)
def get_valid_filename(s):
# https://github.com/django/django/blob/master/django/utils/text.py#L218
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
print("Processing Hangouts.json ..")
with open('Hangouts.json', 'r') as f:
hangouts_dict = json.load(f)
conversations = ConversationSet()
for hangout in hangouts_dict["conversations"]:
if "conversation" in hangout:
conversations.addConversationParticipants(
hangout["conversation"]["conversation_id"]["id"],
hangout["conversation"]["conversation"]["participant_data"]
)
for hangout in hangouts_dict["conversations"]:
if "conversation" in hangout:
conversations.addConversationEvents(
hangout["conversation"]["conversation_id"]["id"],
hangout["conversation"]["conversation"]["participant_data"],
hangout["events"]
)
os.makedirs(chat_dir)
for c in conversations.getConversations():
participants = []
f_name = "-"
for p in c.getParticipants():
f_name = p.name
participants.append(p.name)
if c.participantCount() > 1:
f_name = " and ".join(participants)
if os.path.isfile(chat_dir + "/" + get_valid_filename(f_name) + ".txt"):
f_name = f_name + '_2'
c_file = open(chat_dir + "/" + get_valid_filename(f_name) + ".txt", "w")
for m in c.getMessages():
c_file.write(m.display() + "\n")
c_file.close()
print("Done. Check the hangouts directory for chat output files.")
@chopsticks321
Copy link

It is showing an error
Traceback (most recent call last):
File "/Users/Desktop/hangouts/hangouts.py", line 147, in
with open('Hangouts.json', 'r') as f:
IOError: [Errno 2] No such file or directory: 'Hangouts.json'

@chopsticks321
Copy link

can you help

@chopsticks321
Copy link

Traceback (most recent call last):
File "/Users/Downloads/Takeout-2/Hangouts/hangouts.py", line 148, in
hangouts_dict = json.load(f)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/json/init.py", line 265, in load
return loads(fp.read(),
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/encodings/ascii.py", line 26, in decode
return codecs.ascii_decode(input, self.errors)[0]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 7578: ordinal not in range(128)

having this error @fallenby

@naptar
Copy link
Author

naptar commented Oct 2, 2020

Hi there @kroy1200. I think I got this script from another place and put it here for safekeeping, though I don't think I wrote it myself. It could be that the format of the JSON file has changed, and this script no longer works, since this was uploaded in 2018.

@chopsticks321
Copy link

I dont know know what I am doing wrong. Few months back it worked and now its not. Anyways, Thanks for the help.Much appreciated.

@naptar
Copy link
Author

naptar commented Oct 2, 2020

Hmm, I'm not sure. Sorry that I can't be of much help - I am out of touch with regards to hangouts/takeout and that sort of thing. From your error it seems to be that there is an invalid data field in the JSON file. You could open it manually and then navigate to position 7578 to see what it is.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment