Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@sethhall
Created July 24, 2013 04:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sethhall/6068165 to your computer and use it in GitHub Desktop.
Save sethhall/6068165 to your computer and use it in GitHub Desktop.
Rip titles out of web pages and add it to a title field in http.log
@load base/protocols/http
module HTTPTitleRipper;
export {
## The depth to search for titles in HTTP response bodies.
const search_depth = 10000;
redef record HTTP::Info += {
## A title from the webpage.
title: string &log &optional;
};
}
event http_body_chunk(f: fa_file, data: string, off: count)
{
local parts = split(data, /<\/?[tT][iI][tT][lL][eE]>/);
if ( |parts| > 2 )
{
for ( id in f$conns )
{
if ( f$conns[id]?$http )
{
f$conns[id]$http$title = parts[2];
}
}
}
# We remove after either getting a title or exceeding the search depth.
if ( |parts| > 2 || off > search_depth )
Files::remove_analyzer(f, Files::ANALYZER_DATA_EVENT, [$chunk_event=http_body_chunk]);
}
event file_new(f: fa_file)
{
if ( f$source == "HTTP" && ! f$is_orig )
Files::add_analyzer(f, Files::ANALYZER_DATA_EVENT, [$chunk_event=http_body_chunk]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment