Created
February 25, 2016 19:35
-
-
Save p0pr0ck5/69a1cd6ce08e1ca53cda to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use JSON; | |
my @valid_directives = qw(SecRule SecAction SecDefaultAction); | |
sub valid_line { | |
my ($line) = @_; | |
# the directive must be the first element in the line | |
# so if this does not match our whitelist we can't process it | |
return if ! grep { $line =~ m/^$_ / } @valid_directives; | |
return 1; | |
} | |
sub clean_input { | |
my ($fh) = @_; | |
my (@lines, @line_buf, $multi_line); | |
while (<$fh>) { | |
my $line = $_; | |
chomp $line; | |
# ignore comments and blank lines | |
next if $line =~ m/^\s*$/; | |
next if $line =~ m/^\s*#/; | |
# trim whitespace | |
$line =~ s/^\s*|\s*$//; | |
# merge multi-line directives | |
# ex. | |
# | |
# SecRule \ | |
# ARGS \ | |
# "foo" \ | |
# "t:none, \ | |
# block, \ | |
# phase:1, \ | |
# setvar:tx.foo=bar, \ | |
# expirevar:tx.foo=60" | |
# | |
if ($line =~ m/\s*\\\s*$/) { | |
# strip the multi-line ecape and surrounding whitespace | |
$line =~ s/\s*\\\s*$//; | |
push @line_buf, $line; | |
} else { | |
# either the end of a multi line directive or a standalone line | |
# push the buffer to the return array and clear the buffer | |
push @line_buf, $line; | |
my $final_line = join ' ', @line_buf; | |
push @lines, $final_line if valid_line($final_line); | |
@line_buf = (); | |
} | |
} | |
return @lines; | |
} | |
# take a line and return an array of tokens representing various rule parts | |
sub tokenize { | |
my ($line) = @_; | |
my @tokens; | |
# so... this sucks | |
# we have to make a few assumptions about our line | |
# - tokens are whitespace separated | |
# - tokens must be quoted with " if they contain spaces | |
# - " chars within tokens must be escaped with \ | |
my $re_quoted = qr/^"(.*?(?<!\\))"/; | |
my $re_unquoted = qr/([^ ]+)/; | |
# walk the given string and grab the next token | |
# which may be either quoted or unquoted | |
# from there, push the token to our list of fields | |
# and strip it from the input line | |
while ($line =~ $re_quoted || $line =~ $re_unquoted) { | |
my $token = $1; | |
push @tokens, $token; | |
$line =~ s/"?\Q$token\E"?//; | |
$line =~ s/^\s*//; | |
} | |
return @tokens; | |
} | |
# take an array of rule parts and return a hashref of parsed tokens | |
sub parse_tokens { | |
my (@tokens) = @_; | |
my ($entry, $directive, $vars, $operator, $opts); | |
$entry = {}; | |
$directive = shift @tokens; | |
if ($directive eq 'SecRule') { | |
$vars = shift @tokens; | |
$operator = shift @tokens; | |
} | |
$opts = shift @tokens; | |
die "Uh oh! We shouldn't have any fields left but we still have @tokens\n" | |
if @tokens; | |
$entry->{directive} = $directive; | |
$entry->{vars} = parse_vars($vars) if $vars; | |
$entry->{operator} = parse_operator($operator) if $operator; | |
$entry->{opts} = parse_options($opts) if $opts; | |
return $entry; | |
} | |
sub parse_vars { | |
my ($raw_var) = @_; | |
my @vars = split '\|', $raw_var; | |
my @parsed_vars; | |
for my $var (@vars) { | |
# variables may take a few forms | |
# ! and & are optional metacharacters (mutually exclusive) | |
# an optional ':foo' element may also exist | |
my ($var, @rest) = split ':', $var; | |
my $modifier = substr $var, 0, 1; | |
my $specific = join ':', @rest; | |
my $parsed = {}; | |
$parsed->{variable} = $var; | |
$parsed->{specific} = $specific if $specific; | |
if ($modifier =~ m/[&!]/) { | |
$parsed->{modifier} = $modifier; | |
} | |
push @parsed_vars, $parsed; | |
} | |
return \@parsed_vars; | |
} | |
sub parse_operator { | |
my ($raw_operator) = @_; | |
# operators may be defined by the @ symbol | |
# if one isnt' defined, 'rx' is the default | |
# everything following in this token is the pattern | |
# | |
# using a regex here makes the parser a little more flexible | |
# we could split on space, but that breaks if the operator | |
# is not single space separated from the pattern, and splitting | |
# on \s+ isn't possible because that could break the pattern | |
# when joining back together | |
$raw_operator =~ m/^\s*(?:(\!)?\@([a-zA-Z]+)\s+)?(.*)$/; | |
my $negated = $1; | |
my $operator = $2 ? $2 : 'rx'; | |
my $pattern = $3; | |
my $parsed = {}; | |
$parsed->{negated} = $negated if $negated; | |
$parsed->{operator} = $operator; | |
$parsed->{pattern} = $pattern; | |
return $parsed; | |
} | |
sub parse_options { | |
my ($raw_options) = @_; | |
my (@tokens, @parsed_options, @opt_buf, $sentinal); | |
my @split_options = split ',', $raw_options; | |
# options may take one of a few forms | |
# standalone: deny | |
# express a value: phase:1 | |
# express a quoted value: msg:'foo bar' | |
# | |
# because the quoted value in something like msg or logdata | |
# may have commas, we cant simply split on comma | |
# so we need to loop through and piece together tokens | |
while (@split_options) { | |
# take a chunk and add it the buffer array | |
# once we know we've reached the end of an | |
# option, we'll put the buffer elements | |
# back together and add it to the final array | |
my $chunk = shift @split_options; | |
push @opt_buf, $chunk; | |
# we're done chaining together chunks if: | |
# | |
# - we didnt have the potential to split | |
# meaning that the chunk didnt have a : or ' | |
# and that the first member of the buffer | |
# did contain a ' | |
# | |
# OR | |
# | |
# - we could have split but we know we're done | |
# (we know this if the last member of the chunk is a ') | |
$sentinal = 1 if (($chunk !~ m/'/ || $chunk !~ m/:/) && | |
! (scalar @opt_buf > 1 && $opt_buf[0] =~ m/'/)) || | |
$chunk =~ m/'$/; | |
if ($sentinal) { | |
push @tokens, join ',', @opt_buf; | |
@opt_buf = (); | |
$sentinal = 0; | |
} | |
} | |
# great, now that we have proper tokens | |
# we can split any potential key value pairs | |
# and add them to the final array | |
for my $token (@tokens) { | |
my ($opt, @value) = split /:/, $token; | |
my $parsed = {}; | |
$parsed->{opt} = $opt; | |
$parsed->{value} = strip_encap_quotes(join ':', @value) if @value; | |
push @parsed_options, $parsed; | |
} | |
return \@parsed_options; | |
} | |
sub strip_encap_quotes { | |
my ($line) = @_; | |
$line =~ s/^(['"])(.*)\1$/$2/; | |
return $line; | |
} | |
sub main { | |
my $fh = *STDIN; | |
my @cleaned_lines = clean_input($fh); | |
my @parsed_lines = map { parse_tokens(tokenize($_)) } @cleaned_lines; | |
print to_json(\@parsed_lines); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment