Skip to content

Instantly share code, notes, and snippets.

@robison
Last active November 6, 2015 19:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robison/6f518c55a7176d273d37 to your computer and use it in GitHub Desktop.
Save robison/6f518c55a7176d273d37 to your computer and use it in GitHub Desktop.
Lua/LPEG grammar for parsing modsec_audit.log files, generated by mod_security
-- This Source Code Form is subject to the terms of the Mozilla Public
-- License, v. 2.0. If a copy of the MPL was not distributed with this
-- file, You can obtain one at http://mozilla.org/MPL/2.0/.
--[[
Parses modsec_audit.log formatted message into Heka message Fields.
Config:
- hostname_keep (boolean, defaults to false)
Always preserve the original 'Hostname' field set by Logstreamer's 'hostname' configuration setting.
- msg_type (string, optional default "modsec.audit")
Sets the message 'Type' header to the specified value.
- payload_keep (bool, default false)
If true, maintain the original Payload in the new message. Since we're not doing heavy parsing with this version of the decoder, it is highly recommended that this is set to 'true'
*Example Heka Configuration*
.. code-block:: ini
[ModsecAuditLogInput]
type = "LogstreamerInput"
splitter = "ModsecAuditLogSplitter"
decoder = "ModsecAuditLogDecoder"
file_match = "modsec_audit.log"
log_directory = "/var/log/httpd"
[ModsecAuditLogSplitter]
type = "RegexSplitter"
delimiter = '(--[A-Fa-f0-9]{8}-Z--\n)'
delimiter_eol = true
[ModsecAuditLogDecoder]
type = "SandboxDecoder"
filename = "lua_decoders/modsec_audit.lua"
[ModsecAuditLogDecoder.config]
payload_keep = true
*Example Heka Message*
2011/06/26 18:45:49
:Timestamp: 2011-06-26 18:45:49 +0000
:Type: modsec.audit
:Hostname: www.example.com
:Pid: 0
:Uuid: b6971999-f9fa-4009-abb5-69f89be6918b
:Logger: ModsecAuditLogInput
:Payload: --5f3acc73-A--
[26/Jul/2011:11:45:49 +0700] Ti5GfNJW71wAAC63D4YAAABU 1.2.3.4 12446 5.6.7.8 80
--5f3acc73-B--
POST /newthread.php?do=postthread&f=8 HTTP/1.1
Host: www.example.com
User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language: en-us
Accept-Encoding: gzip, deflate
Accept-Charset: UTF-8,*
Keep-Alive: 115
Connection: keep-alive
Referer: http://www.example.com/newthread.php?do=newthread&f=8
Cookie: some cookie
Content-Type: application/x-www-form-urlencoded
Content-Length: 24390
--5f3acc73-C--
subject=some subject&message=link to a site thats banned such as http://www.example.com
--5f3acc73-F--
HTTP/1.1 403 Forbidden
Content-Length: 278
Keep-Alive: timeout=15, max=100
Connection: Keep-Alive
Content-Type: text/html; charset=iso-8859-1
--5f3acc73-H--
Message: [file "/etc/httpd/modsecurity.d/30_asl_antispam.conf"] [line "52"] [id "300001"] [rev "23"] [msg "Atomicorp.com WAF Rules: Blacklist Spam Domain"] [data ""] [severity "CRITICAL"] Access denied with code 403 (phase 2). Matched phrase "www.example.com" at ARGS:message.
Action: Intercepted (phase 2)
Apache-Handler: php5-script
Stopwatch: 1311655548998047 492700 (405774* 492191 -)
WAF: ModSecurity for Apache/2.5.13 ( http://www.modsecurity.org/); 201107251315.
Server: Apache/2.2.18 (CentOS)
--5f3acc73-Z--
--]]
local l = require 'lpeg'
local clf = require 'common_log_format'
local util = require 'util'
l.locale(l)
local C, Cg, Ct, P, R, S = l.C, l.Cg, l.Ct, l.P, l.R, l.S
local print, space, xdigit = l.print, l.space, l.xdigit
local eol = P"\n\r" + P"\r\n" + P"\n" + P"\r"
local line = C(print^1) * eol
local cline = C(print^1)
local msid = P(xdigit)^8
local function delimiter(char)
return P("--" * P(msid) * "-" * char * "--" * eol)
end
local function merge(t1, t2)
for k, v in pairs(t1) do
t2[k] = v
end
return t2
end
local section_a_grammar = clf.build_apache_grammar("%t %L %a %{remote}p %A %{local}p")
local request_grammar = clf.build_apache_grammar("%m %U %H")
local section_a = eol^0 * P(delimiter("A") * section_a_grammar * eol^1) * eol^0
local section_b = P(delimiter("B") * (request_grammar * eol * Ct(Cg(C(line^1), "request_headers"))) / merge) * eol^0
local section_c = P(delimiter("C") * Cg(C(line^1), "request_body")) * eol^0
local section_e = P(delimiter("E") * Cg(C(line^1), "intended_response_body")) * eol^0
local section_f = P(delimiter("F") * Cg(C(cline), "response") * eol * Cg(C(line^1), "response_headers")) * eol^0
local section_h = P(delimiter("H") * Cg(C(line^1), "audit_log_trailer")) * eol^0
local section_i = P(delimiter("I") * Cg(C(line^1), "multipart_req_body")) * eol^0
local section_k = P(delimiter("K") * Cg(C(line^1), "matched_rules")) * eol^0
local footer = "--" * P(msid) * "-" * "Z" * "--" * eol^-2
grammar = ((section_a * section_b / merge) * Ct(section_c^-1 * section_e^-1 * section_f^-1 * section_h^-1 * section_i^-1 * section_k^-1) * footer) / merge
local msg_type = read_config("type") or "modsec.audit"
local payload_keep = read_config("payload_keep")
local msg = {
Timestamp = nil,
EnvVersion = nil,
Hostname = nil,
Type = msg_type,
Payload = nil,
Fields = nil,
Severity = nil
}
function process_message ()
local data = read_message("Payload")
local fields = grammar:match(data)
if not fields then
return -1, "Failed to match grammar"
end
if payload_keep then
msg.Payload = data
end
if fields.time then
msg.Timestamp = fields.time
fields.time = nil
end
msg.Fields = fields
if not pcall(inject_message, msg) then
return -1, "Failed to inject message."
end
return 0
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment