Last active
November 6, 2015 19:01
-
-
Save robison/6f518c55a7176d273d37 to your computer and use it in GitHub Desktop.
Lua/LPEG grammar for parsing modsec_audit.log files, generated by mod_security
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This Source Code Form is subject to the terms of the Mozilla Public | |
-- License, v. 2.0. If a copy of the MPL was not distributed with this | |
-- file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
--[[ | |
Parses modsec_audit.log formatted message into Heka message Fields. | |
Config: | |
- hostname_keep (boolean, defaults to false) | |
Always preserve the original 'Hostname' field set by Logstreamer's 'hostname' configuration setting. | |
- msg_type (string, optional default "modsec.audit") | |
Sets the message 'Type' header to the specified value. | |
- payload_keep (bool, default false) | |
If true, maintain the original Payload in the new message. Since we're not doing heavy parsing with this version of the decoder, it is highly recommended that this is set to 'true' | |
*Example Heka Configuration* | |
.. code-block:: ini | |
[ModsecAuditLogInput] | |
type = "LogstreamerInput" | |
splitter = "ModsecAuditLogSplitter" | |
decoder = "ModsecAuditLogDecoder" | |
file_match = "modsec_audit.log" | |
log_directory = "/var/log/httpd" | |
[ModsecAuditLogSplitter] | |
type = "RegexSplitter" | |
delimiter = '(--[A-Fa-f0-9]{8}-Z--\n)' | |
delimiter_eol = true | |
[ModsecAuditLogDecoder] | |
type = "SandboxDecoder" | |
filename = "lua_decoders/modsec_audit.lua" | |
[ModsecAuditLogDecoder.config] | |
payload_keep = true | |
*Example Heka Message* | |
2011/06/26 18:45:49 | |
:Timestamp: 2011-06-26 18:45:49 +0000 | |
:Type: modsec.audit | |
:Hostname: www.example.com | |
:Pid: 0 | |
:Uuid: b6971999-f9fa-4009-abb5-69f89be6918b | |
:Logger: ModsecAuditLogInput | |
:Payload: --5f3acc73-A-- | |
[26/Jul/2011:11:45:49 +0700] Ti5GfNJW71wAAC63D4YAAABU 1.2.3.4 12446 5.6.7.8 80 | |
--5f3acc73-B-- | |
POST /newthread.php?do=postthread&f=8 HTTP/1.1 | |
Host: www.example.com | |
User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1 | |
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 | |
Accept-Language: en-us | |
Accept-Encoding: gzip, deflate | |
Accept-Charset: UTF-8,* | |
Keep-Alive: 115 | |
Connection: keep-alive | |
Referer: http://www.example.com/newthread.php?do=newthread&f=8 | |
Cookie: some cookie | |
Content-Type: application/x-www-form-urlencoded | |
Content-Length: 24390 | |
--5f3acc73-C-- | |
subject=some subject&message=link to a site thats banned such as http://www.example.com | |
--5f3acc73-F-- | |
HTTP/1.1 403 Forbidden | |
Content-Length: 278 | |
Keep-Alive: timeout=15, max=100 | |
Connection: Keep-Alive | |
Content-Type: text/html; charset=iso-8859-1 | |
--5f3acc73-H-- | |
Message: [file "/etc/httpd/modsecurity.d/30_asl_antispam.conf"] [line "52"] [id "300001"] [rev "23"] [msg "Atomicorp.com WAF Rules: Blacklist Spam Domain"] [data ""] [severity "CRITICAL"] Access denied with code 403 (phase 2). Matched phrase "www.example.com" at ARGS:message. | |
Action: Intercepted (phase 2) | |
Apache-Handler: php5-script | |
Stopwatch: 1311655548998047 492700 (405774* 492191 -) | |
WAF: ModSecurity for Apache/2.5.13 ( http://www.modsecurity.org/); 201107251315. | |
Server: Apache/2.2.18 (CentOS) | |
--5f3acc73-Z-- | |
--]] | |
local l = require 'lpeg' | |
local clf = require 'common_log_format' | |
local util = require 'util' | |
l.locale(l) | |
local C, Cg, Ct, P, R, S = l.C, l.Cg, l.Ct, l.P, l.R, l.S | |
local print, space, xdigit = l.print, l.space, l.xdigit | |
local eol = P"\n\r" + P"\r\n" + P"\n" + P"\r" | |
local line = C(print^1) * eol | |
local cline = C(print^1) | |
local msid = P(xdigit)^8 | |
local function delimiter(char) | |
return P("--" * P(msid) * "-" * char * "--" * eol) | |
end | |
local function merge(t1, t2) | |
for k, v in pairs(t1) do | |
t2[k] = v | |
end | |
return t2 | |
end | |
local section_a_grammar = clf.build_apache_grammar("%t %L %a %{remote}p %A %{local}p") | |
local request_grammar = clf.build_apache_grammar("%m %U %H") | |
local section_a = eol^0 * P(delimiter("A") * section_a_grammar * eol^1) * eol^0 | |
local section_b = P(delimiter("B") * (request_grammar * eol * Ct(Cg(C(line^1), "request_headers"))) / merge) * eol^0 | |
local section_c = P(delimiter("C") * Cg(C(line^1), "request_body")) * eol^0 | |
local section_e = P(delimiter("E") * Cg(C(line^1), "intended_response_body")) * eol^0 | |
local section_f = P(delimiter("F") * Cg(C(cline), "response") * eol * Cg(C(line^1), "response_headers")) * eol^0 | |
local section_h = P(delimiter("H") * Cg(C(line^1), "audit_log_trailer")) * eol^0 | |
local section_i = P(delimiter("I") * Cg(C(line^1), "multipart_req_body")) * eol^0 | |
local section_k = P(delimiter("K") * Cg(C(line^1), "matched_rules")) * eol^0 | |
local footer = "--" * P(msid) * "-" * "Z" * "--" * eol^-2 | |
grammar = ((section_a * section_b / merge) * Ct(section_c^-1 * section_e^-1 * section_f^-1 * section_h^-1 * section_i^-1 * section_k^-1) * footer) / merge | |
local msg_type = read_config("type") or "modsec.audit" | |
local payload_keep = read_config("payload_keep") | |
local msg = { | |
Timestamp = nil, | |
EnvVersion = nil, | |
Hostname = nil, | |
Type = msg_type, | |
Payload = nil, | |
Fields = nil, | |
Severity = nil | |
} | |
function process_message () | |
local data = read_message("Payload") | |
local fields = grammar:match(data) | |
if not fields then | |
return -1, "Failed to match grammar" | |
end | |
if payload_keep then | |
msg.Payload = data | |
end | |
if fields.time then | |
msg.Timestamp = fields.time | |
fields.time = nil | |
end | |
msg.Fields = fields | |
if not pcall(inject_message, msg) then | |
return -1, "Failed to inject message." | |
end | |
return 0 | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment