sethhall/http-title-ripper.bro

## http-title-ripper.bro
@load base/protocols/http

module HTTPTitleRipper;

export {
	## The depth to search for titles in HTTP response bodies.
	const search_depth = 10000;

	redef record HTTP::Info += {
		## A title from the webpage.
		title: string &log &optional;
	};
}

event http_body_chunk(f: fa_file, data: string, off: count)
	{
	local parts = split(data, /<\/?[tT][iI][tT][lL][eE]>/);
	if ( |parts| > 2 )
		{
		for ( id in f$conns )
			{
			if ( f$conns[id]?$http )
				{
				f$conns[id]$http$title = parts[2];
				}
			}
		}

	# We remove after either getting a title or exceeding the search depth.
	if ( |parts| > 2 || off > search_depth )
		Files::remove_analyzer(f, Files::ANALYZER_DATA_EVENT, [$chunk_event=http_body_chunk]);
	}

event file_new(f: fa_file)
	{
	if ( f$source == "HTTP" && ! f$is_orig )
		Files::add_analyzer(f, Files::ANALYZER_DATA_EVENT, [$chunk_event=http_body_chunk]);
	}
	@load base/protocols/http

	module HTTPTitleRipper;

	export {
	## The depth to search for titles in HTTP response bodies.
	const search_depth = 10000;

	redef record HTTP::Info += {
	## A title from the webpage.
	title: string &log &optional;
	};
	}

	event http_body_chunk(f: fa_file, data: string, off: count)
	{
	local parts = split(data, /<\/?[tT][iI][tT][lL][eE]>/);
	if ( \|parts\| > 2 )
	{
	for ( id in f$conns )
	{
	if ( f$conns[id]?$http )
	{
	f$conns[id]$http$title = parts[2];
	}
	}
	}

	# We remove after either getting a title or exceeding the search depth.
	if ( \|parts\| > 2 \|\| off > search_depth )
	Files::remove_analyzer(f, Files::ANALYZER_DATA_EVENT, [$chunk_event=http_body_chunk]);
	}

	event file_new(f: fa_file)
	{
	if ( f$source == "HTTP" && ! f$is_orig )
	Files::add_analyzer(f, Files::ANALYZER_DATA_EVENT, [$chunk_event=http_body_chunk]);
	}