Created
July 24, 2013 04:57
-
-
Save sethhall/6068165 to your computer and use it in GitHub Desktop.
Rip titles out of web pages and add it to a title field in http.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@load base/protocols/http | |
module HTTPTitleRipper; | |
export { | |
## The depth to search for titles in HTTP response bodies. | |
const search_depth = 10000; | |
redef record HTTP::Info += { | |
## A title from the webpage. | |
title: string &log &optional; | |
}; | |
} | |
event http_body_chunk(f: fa_file, data: string, off: count) | |
{ | |
local parts = split(data, /<\/?[tT][iI][tT][lL][eE]>/); | |
if ( |parts| > 2 ) | |
{ | |
for ( id in f$conns ) | |
{ | |
if ( f$conns[id]?$http ) | |
{ | |
f$conns[id]$http$title = parts[2]; | |
} | |
} | |
} | |
# We remove after either getting a title or exceeding the search depth. | |
if ( |parts| > 2 || off > search_depth ) | |
Files::remove_analyzer(f, Files::ANALYZER_DATA_EVENT, [$chunk_event=http_body_chunk]); | |
} | |
event file_new(f: fa_file) | |
{ | |
if ( f$source == "HTTP" && ! f$is_orig ) | |
Files::add_analyzer(f, Files::ANALYZER_DATA_EVENT, [$chunk_event=http_body_chunk]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment