Created
November 24, 2012 20:06
-
-
Save mavam/4141216 to your computer and use it in GitHub Desktop.
Facebook Chat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##! | |
##! A Facebook analysis script. | |
##! | |
##! The script parses the HTTP body of Facebook JSON messages and reconstructs | |
##! a stream of chat messages from it. | |
##! | |
##! Since Facebook switched to HTTPS only, this script no longer works. You may | |
##! use it for inspiration or instructional purposes. | |
##! | |
##! For details, see my blog post: | |
##! http://matthias.vallentin.net/blog/2011/06/analyzing-facebook-webchat-sessions-with-bro/ | |
##! | |
# TODO: | |
# - Add more message types. | |
# - Parse other non-chat messages as well and establish the notion of a | |
# session. To this end, we can use the actual closing message from Facebook | |
# itself, which looks like this: | |
# for (;;);{"t":"msg","c":"p_1111111111","s":18,"ms":[{ | |
# "id":111111111111111, "window_id":1111111111, | |
# "type":"close_chat"}]}" | |
@load bodies | |
redef HTTP::hook_reply_bodies = T; | |
redef HTTP::hook_host_pattern = /[0-9]+\.channel\.facebook\.com/; | |
module Facebook; | |
export { | |
redef enum Log::ID += { LOG }; | |
type Info: record { | |
timestamp: string &log; | |
chat_from: string &log; | |
chat_to: string &log; | |
chat_msg: string &log; | |
}; | |
## The types of AJAX messages. | |
type MessageType: enum { | |
CHAT ##< A webchat message. | |
}; | |
## A chat message | |
type ChatMessage: record | |
{ | |
msg_type: MessageType; ##< Message type. | |
timestamp: string; ##< Message timestamp. | |
from: string; ##< Name of the sender | |
to: string; ##< Name of the recipient. | |
text: string; ##< The actual message. | |
}; | |
global log_facebook: event(rec: Info); | |
} | |
event bro_init() | |
{ | |
Log::create_stream(Facebook::LOG, [$columns=Info, $ev=log_facebook]); | |
} | |
## Extract integer (or quoted string) value from a key:value (or key:"value"). | |
function extract_value(str: string) : string | |
{ | |
local s = split1(str, /:/)[2]; | |
s = sub(s, /^\"/, ""); #" | |
return sub(s, /\"$/, ""); #" | |
} | |
## Extract text between the last two two double quotes. | |
function extract_last_quoted(str: string) : string | |
{ | |
local q = find_last(str, /\"([^\"]|\\\")*\"/); # " | |
return split(q, /\"/)[2]; # " | |
} | |
## Create a webchat message from JSON data. | |
function parse_fb_message(data: string) : ChatMessage | |
{ | |
local msg: ChatMessage; | |
local array = split(data, /,\"/); # " | |
for ( i in array ) | |
{ | |
local val = array[i]; | |
if ( strstr(val, "time\":") > 0 ) | |
msg$timestamp = extract_value(val); | |
else if ( strstr(val, "from_name\":\"") > 0 ) | |
msg$from = extract_value(val); | |
else if ( strstr(val, "to_name\":\"") > 0 ) | |
msg$to = extract_value(val); | |
else if ( strstr(val, "\"msg\":{\"text\":\"") > 0 ) | |
msg$text = extract_last_quoted(val); | |
} | |
return msg; | |
} | |
## Reassemble the HTTP body of replies and look for Facebook chat messages. | |
event http_body_complete(c: connection) | |
{ | |
# Only consider chat messages for now. | |
if (/^for \(;;\);\{\"t\":\"msg\".*text\":\"/ !in c$http$body) #" | |
return; | |
local msg = parse_fb_message(c$http$body); | |
local i: Info; | |
i$timestamp = msg$timestamp; | |
i$chat_from = msg$from; | |
i$chat_to = msg$to; | |
i$chat_msg = msg$text; | |
Log::write(Facebook::LOG, i); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment