Skip to content

Instantly share code, notes, and snippets.

@lsloan
Created April 19, 2019 15:51
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save lsloan/4b0d82b3f8a0388b89ea443ebe0b743b to your computer and use it in GitHub Desktop.
Save lsloan/4b0d82b3f8a0388b89ea443ebe0b743b to your computer and use it in GitHub Desktop.
Parse hangouts.json from Google Takeout, via https://paste.jay2k1.com/view/5fcebdfe
<?php
/*
(in this version, I added support for more message types and offer both plaintext and HTML message format)
This is a function that transforms the JSON you get from Google Takeout when you export your Hangouts history
into a PHP array which can be used to further manipulate the data.
A use case is my hangouts parser at http://hangoutparser.jay2k1.com/ -- a description can be seen at
http://blog.jay2k1.com/2014/11/10/how-to-export-and-backup-your-google-hangouts-chat-history/
You feed the function with the JSON, and in return you get a nice array holding all the conversations.
As a parameter, it expects JSON text (you can get this for example by using file_get_contents('yourfile.json'))
It returns an array in this format:
$array[0..N] array of conversations
$array[0..N][name] conversation name. afaik only group chats can have one
$array[0..N][type] conversation type. can be either STICKY_ONE_TO_ONE or GROUP
$array[0..N][msg_count] message count for that conversation
$array[0..N][members] array of conversation members where key = sender ID and value = sender name
$array[0..N][messages] array of messages
$array[0..N][messages][0..N] array with message details
$array[0..N][messages][0..N][timestamp] timestamp of message in unixtime (actually, unixtime plus six more digits)
$array[0..N][messages][0..N][datetime] timestamp of message in YYYY-MM-DD HH:MM:SS format
$array[0..N][messages][0..N][sender_id] google's chat ID of the message's sender
$array[0..N][messages][0..N][sender] name of the message's sender (the "from")
$array[0..N][messages][0..N][event_type] type of message/event. can be RENAME_CONVERSATION, HANGOUT_EVENT, REGULAR_CHAT_MESSAGE, ADD_USER, REMOVE_USER, SMS, OTR_MODIFICATION, VOICEMAIL and maybe more...
$array[0..N][messages][0..N][message] the actual message text
$array[0..N][messages][0..N][message_html] HTML version of message text, if applicable (links are clickable, images are embedded etc)
So you could call it like this: $my_conversations = hangoutsToArray(file_get_contents('/tmp/hangouts.json'));
*/
function replaceSmileys($string) {
// replaces UTF-8 graphical emoticons by their ASCII equivalents
// list of emoji codes taken from https://aprescott.com/posts/hangouts-emoji
$patterns = array(
'/\x{1F41D}/u', // -<@% ? honeybee
'/\x{1F435}/u', // :(|) ? monkey face
'/\x{1F437}/u', // :(:) ? pig face
'/\x{1F473}/u', // (]:{ ? man with turban
'/\x{1F494}/u', // <\3 </3 ? broken heart
'/\x{1F49C}/u', // <3 ? purple heart
'/\x{1F4A9}/u', // ~@~ ? pile of poo
'/\x{1F600}/u', // :D :-D ? grinning face
'/\x{1F601}/u', // ^_^ ? grinning face with smiling eyes
'/\x{1F602}/u', // XD
'/\x{1F603}/u', // :) :-) =) ? smiling face with open mouth
'/\x{1F604}/u', // =D ? smiling face with open mouth and smiling eyes
'/\x{1F605}/u', // ^_^;; ? smiling face with open mouth and cold sweat
'/\x{1F607}/u', // O:) O:-) O=) ? smiling face with halo
'/\x{1F608}/u', // }:) }:-) }=) ? smiling face with horns
'/\x{1F609}/u', // ;) ;-) ? winking face
'/\x{1F60E}/u', // B) B-) ? smiling face with sunglasses
'/\x{1F610}/u', // :-| :| =| ? neutral face
'/\x{1F611}/u', // -_- ? expressionless face
'/\x{1F613}/u', // o_o; ? face with cold sweat
'/\x{1F614}/u', // u_u ? pensive face
'/\x{1F615}/u', // :\ :/ :-\ :-/ =\ =/ ? confused face
'/\x{1F616}/u', // :S :-S :s :-s ? confounded face
'/\x{1F617}/u', // :* :-* ? kissing face
'/\x{1F618}/u', // ;* ;-* ? face throwing a kiss
'/\x{1F61B}/u', // :P :-P =P :p :-p =p ? face with stuck-out tongue
'/\x{1F61C}/u', // ;P ;-P ;p ;-p ? face with stuck-out tongue and winking eye
'/\x{1F61E}/u', // :( :-( =( ? disappointed face
'/\x{1F621}/u', // >.< >:( >:-( >=( ? pouting face
'/\x{1F622}/u', // T_T :'( ;_; ='( ? crying face
'/\x{1F623}/u', // >_< ? persevering face
'/\x{1F626}/u', // D: ? frowning face with open mouth
'/\x{1F62E}/u', // o.o :o :-o =o ? face with open mouth
'/\x{1F632}/u', // O.O :O :-O =O ? astonished face
'/\x{1F634}/u', // O.O :O :-O =O ? astonished face
'/\x{1F635}/u', // x_x X-O X-o X( X-( ? dizzy face
'/\x{1F638}/u' // :X) :3 (=^..^=) (=^.^=) =^_^= ? grinning cat face with smiling eyes
);
$replacements = array(
'-<@%',
':(|)',
':(:)',
'(]:{',
'</3',
'<3',
'~@~',
':D',
'^_^',
'XD',
':)',
'=D',
'^_^;;',
'O:)',
'}:)',
';)',
'B-)',
':|',
'-_-',
'o_o;',
'u_u',
':/',
':S',
':*',
';*',
':P',
';P',
':(',
'>.<',
":'(",
'>_<',
'D:',
':o',
':O',
'-_-Zzz',
'x_x',
':3'
);
return preg_replace($patterns, $replacements, $string);
}
function hangoutsToArray($json) {
// set the desired timestamp format here
// the default is 'Y-m-d H:i:s' which is YYYY-MM-DD HH:mm:ss.
$timestamp_format = 'Y-m-d H:i:s';
////////////////////////////////////////////////////////////
// decode JSON
$decoded = json_decode($json,true);
// extract useful part
$rawconvos = $decoded['conversation_state'];
$return = array();
// loop through conversations
for ($i = 0; $i < sizeof($rawconvos); $i++) {
// first, get metadata
$convo = $rawconvos[$i];
$in_conv = $rawconvos[$i]['conversation_state']['conversation'];
$in_event = $rawconvos[$i]['conversation_state']['event'];
$pdata = $in_conv['participant_data'];
$return[$i]['type'] = $in_conv['type'];
$return[$i]['msgcount'] = sizeof($in_event);
$return[$i]['name'] = (isset($in_conv['name']) ? $in_conv['name'] : "");
// conversation participants
for ($j = 0; $j < sizeof($pdata); $j++) {
$id = $pdata[$j]['id']['chat_id'];
// use "unknown_<chat_id>" as name if they don't have a fallback_name
$name = (isset($pdata[$j]['fallback_name']) ? $pdata[$j]['fallback_name'] : 'unknown_'.$id);
$return[$i]['members'][$id] = $name;
}
// loop through messages/events
$messages = array();
for ($k = 0; $k < sizeof($in_event); $k++) {
$messages[$k]['timestamp'] = $in_event[$k]['timestamp'];
$messages[$k]['datetime'] = date($timestamp_format,substr($messages[$k]['timestamp'], 0, 10));
$messages[$k]['sender_id'] = $in_event[$k]['sender_id']['chat_id'];
$messages[$k]['sender'] = (isset($return[$i]['members'][$messages[$k]['sender_id']]) ? $return[$i]['members'][$messages[$k]['sender_id']] : 'unknown_'.$id);
$messages[$k]['event_type'] = $in_event[$k]['event_type'];
switch ($messages[$k]['event_type']) {
case 'RENAME_CONVERSATION':
$newname = $in_event[$k]['conversation_rename']['new_name'];
$oldname = $in_event[$k]['conversation_rename']['old_name'];
$messages[$k]['message'] = 'changed conversation name '.($oldname != '' ? 'from \''.$oldname.'\' ' : '').'to \''.$newname.'\'';
break;
case 'HANGOUT_EVENT':
switch ($in_event[$k]['hangout_event']['event_type']) {
case 'START_HANGOUT':
$messages[$k]['message'] = 'started a video chat';
break;
case 'END_HANGOUT':
$messages[$k]['message'] = 'ended a video chat';
break;
default:
$messages[$k]['message'] = $in_event[$k]['hangout_event']['event_type'];
}
break;
case 'REGULAR_CHAT_MESSAGE':
$messages[$k]['message'] = "";
$msg = "";
$msghtml = "";
// join message segments together
if (isset($in_event[$k]['chat_message']['message_content']['segment'])) {
foreach ($in_event[$k]['chat_message']['message_content']['segment'] as $num=>$event) {
if (!isset($event['text'])) continue;
if ($event['type'] == 'TEXT') {
$msg .= $event['text'];
$msghtml .= preg_replace('/\n/','<br>',$event['text']);
} else if ($event['type'] == 'LINK') {
$msg .= $event['text'];
$msghtml .= '<a href="'.$event['link_data']['link_target'].'" target="_blank">'.$event['text'].'</a>';
} else if ($event['type'] == 'LINE_BREAK') {
$msg .= $event['text'];
$msghtml .= preg_replace('/\n/','<br>',$event['text']);
}
}
}
// handle attachments
else if (isset($in_event[$k]['chat_message']['message_content']['attachment'])) {
// loop through attachments
foreach ($in_event[$k]['chat_message']['message_content']['attachment'] as $att) {
//echo "<pre>";print_r($att);echo "</pre>";
if ($att['embed_item']['type'][0] == 'PLUS_PHOTO') {
$imgurl = $att['embed_item']['embeds.PlusPhoto.plus_photo']['url'];
$msg .= $imgurl;
$msghtml .= '<a href="'.$imgurl.'" target="_blank"><img src="'.$imgurl.'" alt="attached image" style="max-width:100%"></a>';
}
}
}
// replace unicode emoticon characters by smileys
$messages[$k]['message'] = replaceSmileys($msg);
if ($msg != $msghtml) { $messages[$k]['message_html'] = replaceSmileys($msghtml); }
break;
case 'ADD_USER':
$newuserid = $in_event[$k]['membership_change']['participant_id'][0]['chat_id'];
$newusername = (isset($return[$i]['members'][$newuserid]) ? $return[$i]['members'][$newuserid] : 'unknown_'.$newuserid);
$messages[$k]['message'] = 'added user \''.$newusername.'\' to conversation';
break;
case 'REMOVE_USER':
$newuserid = $in_event[$k]['membership_change']['participant_id'][0]['chat_id'];
$newusername = (isset($return[$i]['members'][$newuserid]) ? $return[$i]['members'][$newuserid] : 'unknown_'.$newuserid);
$messages[$k]['message'] = 'removed user \''.$newusername.'\' from conversation';
break;
case 'SMS':
$messages[$k]['message'] = "";
// join message segments together
if (isset($in_event[$k]['chat_message']['message_content']['segment'])) {
for ($l = 0; $l < sizeof($in_event[$k]['chat_message']['message_content']['segment']); $l++) {
if (!isset($in_event[$k]['chat_message']['message_content']['segment'][$l]['text'])) continue;
$messages[$k]['message'] .= $in_event[$k]['chat_message']['message_content']['segment'][$l]['text'];
}
}
// replace unicode emoticon characters by smileys
$messages[$k]['message'] = replaceSmileys($messages[$k]['message']);
break;
case 'OTR_MODIFICATION':
$messages[$k]['message'] = 'unknown OTR_MODIFICATION';
break;
case 'VOICEMAIL':
$messages[$k]['message'] = "new voicemail:\n";
// join message segments together
if (isset($in_event[$k]['chat_message']['message_content']['segment'])) {
for ($l = 0; $l < sizeof($in_event[$k]['chat_message']['message_content']['segment']); $l++) {
if (!isset($in_event[$k]['chat_message']['message_content']['segment'][$l]['text'])) continue;
$messages[$k]['message'] .= $in_event[$k]['chat_message']['message_content']['segment'][$l]['text'];
}
}
// replace unicode emoticon characters by smileys
$messages[$k]['message'] = replaceSmileys($messages[$k]['message']);
break;
}
}
// sort messages by timestamp because for some reason they're cluttered
usort($messages, function($a, $b) { return $a['timestamp'] - $b['timestamp']; });
// add the messages array to the conversation array
$return[$i]['messages'] = $messages;
}
return $return;
}
?>
<?php
/*
(in this version, I added support for more message types and offer both plaintext and HTML message format)
This is a function that transforms the JSON you get from Google Takeout when you export your Hangouts history
into a PHP array which can be used to further manipulate the data.
A use case is my hangouts parser at http://hangoutparser.jay2k1.com/ -- a description can be seen at
http://blog.jay2k1.com/2014/11/10/how-to-export-and-backup-your-google-hangouts-chat-history/
You feed the function with the JSON, and in return you get a nice array holding all the conversations.
As a parameter, it expects JSON text (you can get this for example by using file_get_contents('yourfile.json'))
It returns an array in this format:
$array[0..N] array of conversations
$array[0..N][name] conversation name. afaik only group chats can have one
$array[0..N][type] conversation type. can be either STICKY_ONE_TO_ONE or GROUP
$array[0..N][msg_count] message count for that conversation
$array[0..N][members] array of conversation members where key = sender ID and value = sender name
$array[0..N][messages] array of messages
$array[0..N][messages][0..N] array with message details
$array[0..N][messages][0..N][timestamp] timestamp of message in unixtime (actually, unixtime plus six more digits)
$array[0..N][messages][0..N][datetime] timestamp of message in YYYY-MM-DD HH:MM:SS format
$array[0..N][messages][0..N][sender_id] google's chat ID of the message's sender
$array[0..N][messages][0..N][sender] name of the message's sender (the "from")
$array[0..N][messages][0..N][event_type] type of message/event. can be RENAME_CONVERSATION, HANGOUT_EVENT, REGULAR_CHAT_MESSAGE, ADD_USER, REMOVE_USER, SMS, OTR_MODIFICATION, VOICEMAIL and maybe more...
$array[0..N][messages][0..N][message] the actual message text
$array[0..N][messages][0..N][message_html] HTML version of message text, if applicable (links are clickable, images are embedded etc)
So you could call it like this: $my_conversations = hangoutsToArray(file_get_contents('/tmp/hangouts.json'));
*/
function replaceSmileys($string) {
// replaces UTF-8 graphical emoticons by their ASCII equivalents
// list of emoji codes taken from https://aprescott.com/posts/hangouts-emoji
$patterns = array(
'/\x{1F41D}/u', // -<@% ? honeybee
'/\x{1F435}/u', // :(|) ? monkey face
'/\x{1F437}/u', // :(:) ? pig face
'/\x{1F473}/u', // (]:{ ? man with turban
'/\x{1F494}/u', // <\3 </3 ? broken heart
'/\x{1F49C}/u', // <3 ? purple heart
'/\x{1F4A9}/u', // ~@~ ? pile of poo
'/\x{1F600}/u', // :D :-D ? grinning face
'/\x{1F601}/u', // ^_^ ? grinning face with smiling eyes
'/\x{1F602}/u', // XD
'/\x{1F603}/u', // :) :-) =) ? smiling face with open mouth
'/\x{1F604}/u', // =D ? smiling face with open mouth and smiling eyes
'/\x{1F605}/u', // ^_^;; ? smiling face with open mouth and cold sweat
'/\x{1F607}/u', // O:) O:-) O=) ? smiling face with halo
'/\x{1F608}/u', // }:) }:-) }=) ? smiling face with horns
'/\x{1F609}/u', // ;) ;-) ? winking face
'/\x{1F60E}/u', // B) B-) ? smiling face with sunglasses
'/\x{1F610}/u', // :-| :| =| ? neutral face
'/\x{1F611}/u', // -_- ? expressionless face
'/\x{1F613}/u', // o_o; ? face with cold sweat
'/\x{1F614}/u', // u_u ? pensive face
'/\x{1F615}/u', // :\ :/ :-\ :-/ =\ =/ ? confused face
'/\x{1F616}/u', // :S :-S :s :-s ? confounded face
'/\x{1F617}/u', // :* :-* ? kissing face
'/\x{1F618}/u', // ;* ;-* ? face throwing a kiss
'/\x{1F61B}/u', // :P :-P =P :p :-p =p ? face with stuck-out tongue
'/\x{1F61C}/u', // ;P ;-P ;p ;-p ? face with stuck-out tongue and winking eye
'/\x{1F61E}/u', // :( :-( =( ? disappointed face
'/\x{1F621}/u', // >.< >:( >:-( >=( ? pouting face
'/\x{1F622}/u', // T_T :'( ;_; ='( ? crying face
'/\x{1F623}/u', // >_< ? persevering face
'/\x{1F626}/u', // D: ? frowning face with open mouth
'/\x{1F62E}/u', // o.o :o :-o =o ? face with open mouth
'/\x{1F632}/u', // O.O :O :-O =O ? astonished face
'/\x{1F634}/u', // O.O :O :-O =O ? astonished face
'/\x{1F635}/u', // x_x X-O X-o X( X-( ? dizzy face
'/\x{1F638}/u' // :X) :3 (=^..^=) (=^.^=) =^_^= ? grinning cat face with smiling eyes
);
$replacements = array(
'-<@%',
':(|)',
':(:)',
'(]:{',
'</3',
'<3',
'~@~',
':D',
'^_^',
'XD',
':)',
'=D',
'^_^;;',
'O:)',
'}:)',
';)',
'B-)',
':|',
'-_-',
'o_o;',
'u_u',
':/',
':S',
':*',
';*',
':P',
';P',
':(',
'>.<',
":'(",
'>_<',
'D:',
':o',
':O',
'-_-Zzz',
'x_x',
':3'
);
return preg_replace($patterns, $replacements, $string);
}
function hangoutsToArray($json) {
// set the desired timestamp format here
// the default is 'Y-m-d H:i:s' which is YYYY-MM-DD HH:mm:ss.
$timestamp_format = 'Y-m-d H:i:s';
////////////////////////////////////////////////////////////
// decode JSON
$decoded = json_decode($json,true);
// extract useful part
$rawconvos = $decoded['conversation_state'];
$return = array();
// loop through conversations
for ($i = 0; $i < sizeof($rawconvos); $i++) {
// first, get metadata
$convo = $rawconvos[$i];
$in_conv = $rawconvos[$i]['conversation_state']['conversation'];
$in_event = $rawconvos[$i]['conversation_state']['event'];
$pdata = $in_conv['participant_data'];
$return[$i]['type'] = $in_conv['type'];
$return[$i]['msgcount'] = sizeof($in_event);
$return[$i]['name'] = (isset($in_conv['name']) ? $in_conv['name'] : "");
// conversation participants
for ($j = 0; $j < sizeof($pdata); $j++) {
$id = $pdata[$j]['id']['chat_id'];
// use "unknown_<chat_id>" as name if they don't have a fallback_name
$name = (isset($pdata[$j]['fallback_name']) ? $pdata[$j]['fallback_name'] : 'unknown_'.$id);
$return[$i]['members'][$id] = $name;
}
// loop through messages/events
$messages = array();
for ($k = 0; $k < sizeof($in_event); $k++) {
$messages[$k]['timestamp'] = $in_event[$k]['timestamp'];
$messages[$k]['datetime'] = date($timestamp_format,substr($messages[$k]['timestamp'], 0, 10));
$messages[$k]['sender_id'] = $in_event[$k]['sender_id']['chat_id'];
$messages[$k]['sender'] = (isset($return[$i]['members'][$messages[$k]['sender_id']]) ? $return[$i]['members'][$messages[$k]['sender_id']] : 'unknown_'.$id);
$messages[$k]['event_type'] = $in_event[$k]['event_type'];
switch ($messages[$k]['event_type']) {
case 'RENAME_CONVERSATION':
$newname = $in_event[$k]['conversation_rename']['new_name'];
$oldname = $in_event[$k]['conversation_rename']['old_name'];
$messages[$k]['message'] = 'changed conversation name '.($oldname != '' ? 'from \''.$oldname.'\' ' : '').'to \''.$newname.'\'';
break;
case 'HANGOUT_EVENT':
switch ($in_event[$k]['hangout_event']['event_type']) {
case 'START_HANGOUT':
$messages[$k]['message'] = 'started a video chat';
break;
case 'END_HANGOUT':
$messages[$k]['message'] = 'ended a video chat';
break;
default:
$messages[$k]['message'] = $in_event[$k]['hangout_event']['event_type'];
}
break;
case 'REGULAR_CHAT_MESSAGE':
$messages[$k]['message'] = "";
$msg = "";
$msghtml = "";
// join message segments together
if (isset($in_event[$k]['chat_message']['message_content']['segment'])) {
foreach ($in_event[$k]['chat_message']['message_content']['segment'] as $num=>$event) {
if (!isset($event['text'])) continue;
if ($event['type'] == 'TEXT') {
$msg .= $event['text'];
$msghtml .= preg_replace('/\n/','<br>',$event['text']);
} else if ($event['type'] == 'LINK') {
$msg .= $event['text'];
$msghtml .= '<a href="'.$event['link_data']['link_target'].'" target="_blank">'.$event['text'].'</a>';
} else if ($event['type'] == 'LINE_BREAK') {
$msg .= $event['text'];
$msghtml .= preg_replace('/\n/','<br>',$event['text']);
}
}
}
// handle attachments
else if (isset($in_event[$k]['chat_message']['message_content']['attachment'])) {
// loop through attachments
foreach ($in_event[$k]['chat_message']['message_content']['attachment'] as $att) {
//echo "<pre>";print_r($att);echo "</pre>";
if ($att['embed_item']['type'][0] == 'PLUS_PHOTO') {
$imgurl = $att['embed_item']['embeds.PlusPhoto.plus_photo']['url'];
$msg .= $imgurl;
$msghtml .= '<a href="'.$imgurl.'" target="_blank"><img src="'.$imgurl.'" alt="attached image" style="max-width:100%"></a>';
}
}
}
// replace unicode emoticon characters by smileys
$messages[$k]['message'] = replaceSmileys($msg);
if ($msg != $msghtml) { $messages[$k]['message_html'] = replaceSmileys($msghtml); }
break;
case 'ADD_USER':
$newuserid = $in_event[$k]['membership_change']['participant_id'][0]['chat_id'];
$newusername = (isset($return[$i]['members'][$newuserid]) ? $return[$i]['members'][$newuserid] : 'unknown_'.$newuserid);
$messages[$k]['message'] = 'added user \''.$newusername.'\' to conversation';
break;
case 'REMOVE_USER':
$newuserid = $in_event[$k]['membership_change']['participant_id'][0]['chat_id'];
$newusername = (isset($return[$i]['members'][$newuserid]) ? $return[$i]['members'][$newuserid] : 'unknown_'.$newuserid);
$messages[$k]['message'] = 'removed user \''.$newusername.'\' from conversation';
break;
case 'SMS':
$messages[$k]['message'] = "";
// join message segments together
if (isset($in_event[$k]['chat_message']['message_content']['segment'])) {
for ($l = 0; $l < sizeof($in_event[$k]['chat_message']['message_content']['segment']); $l++) {
if (!isset($in_event[$k]['chat_message']['message_content']['segment'][$l]['text'])) continue;
$messages[$k]['message'] .= $in_event[$k]['chat_message']['message_content']['segment'][$l]['text'];
}
}
// replace unicode emoticon characters by smileys
$messages[$k]['message'] = replaceSmileys($messages[$k]['message']);
break;
case 'OTR_MODIFICATION':
$messages[$k]['message'] = 'unknown OTR_MODIFICATION';
break;
case 'VOICEMAIL':
$messages[$k]['message'] = "new voicemail:\n";
// join message segments together
if (isset($in_event[$k]['chat_message']['message_content']['segment'])) {
for ($l = 0; $l < sizeof($in_event[$k]['chat_message']['message_content']['segment']); $l++) {
if (!isset($in_event[$k]['chat_message']['message_content']['segment'][$l]['text'])) continue;
$messages[$k]['message'] .= $in_event[$k]['chat_message']['message_content']['segment'][$l]['text'];
}
}
// replace unicode emoticon characters by smileys
$messages[$k]['message'] = replaceSmileys($messages[$k]['message']);
break;
}
}
// sort messages by timestamp because for some reason they're cluttered
usort($messages, function($a, $b) { return $a['timestamp'] - $b['timestamp']; });
// add the messages array to the conversation array
$return[$i]['messages'] = $messages;
}
return $return;
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment