Skip to content

Instantly share code, notes, and snippets.

@p0pr0ck5
Created February 25, 2016 19:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save p0pr0ck5/69a1cd6ce08e1ca53cda to your computer and use it in GitHub Desktop.
Save p0pr0ck5/69a1cd6ce08e1ca53cda to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use strict;
use warnings;
use JSON;
my @valid_directives = qw(SecRule SecAction SecDefaultAction);
sub valid_line {
my ($line) = @_;
# the directive must be the first element in the line
# so if this does not match our whitelist we can't process it
return if ! grep { $line =~ m/^$_ / } @valid_directives;
return 1;
}
sub clean_input {
my ($fh) = @_;
my (@lines, @line_buf, $multi_line);
while (<$fh>) {
my $line = $_;
chomp $line;
# ignore comments and blank lines
next if $line =~ m/^\s*$/;
next if $line =~ m/^\s*#/;
# trim whitespace
$line =~ s/^\s*|\s*$//;
# merge multi-line directives
# ex.
#
# SecRule \
# ARGS \
# "foo" \
# "t:none, \
# block, \
# phase:1, \
# setvar:tx.foo=bar, \
# expirevar:tx.foo=60"
#
if ($line =~ m/\s*\\\s*$/) {
# strip the multi-line ecape and surrounding whitespace
$line =~ s/\s*\\\s*$//;
push @line_buf, $line;
} else {
# either the end of a multi line directive or a standalone line
# push the buffer to the return array and clear the buffer
push @line_buf, $line;
my $final_line = join ' ', @line_buf;
push @lines, $final_line if valid_line($final_line);
@line_buf = ();
}
}
return @lines;
}
# take a line and return an array of tokens representing various rule parts
sub tokenize {
my ($line) = @_;
my @tokens;
# so... this sucks
# we have to make a few assumptions about our line
# - tokens are whitespace separated
# - tokens must be quoted with " if they contain spaces
# - " chars within tokens must be escaped with \
my $re_quoted = qr/^"(.*?(?<!\\))"/;
my $re_unquoted = qr/([^ ]+)/;
# walk the given string and grab the next token
# which may be either quoted or unquoted
# from there, push the token to our list of fields
# and strip it from the input line
while ($line =~ $re_quoted || $line =~ $re_unquoted) {
my $token = $1;
push @tokens, $token;
$line =~ s/"?\Q$token\E"?//;
$line =~ s/^\s*//;
}
return @tokens;
}
# take an array of rule parts and return a hashref of parsed tokens
sub parse_tokens {
my (@tokens) = @_;
my ($entry, $directive, $vars, $operator, $opts);
$entry = {};
$directive = shift @tokens;
if ($directive eq 'SecRule') {
$vars = shift @tokens;
$operator = shift @tokens;
}
$opts = shift @tokens;
die "Uh oh! We shouldn't have any fields left but we still have @tokens\n"
if @tokens;
$entry->{directive} = $directive;
$entry->{vars} = parse_vars($vars) if $vars;
$entry->{operator} = parse_operator($operator) if $operator;
$entry->{opts} = parse_options($opts) if $opts;
return $entry;
}
sub parse_vars {
my ($raw_var) = @_;
my @vars = split '\|', $raw_var;
my @parsed_vars;
for my $var (@vars) {
# variables may take a few forms
# ! and & are optional metacharacters (mutually exclusive)
# an optional ':foo' element may also exist
my ($var, @rest) = split ':', $var;
my $modifier = substr $var, 0, 1;
my $specific = join ':', @rest;
my $parsed = {};
$parsed->{variable} = $var;
$parsed->{specific} = $specific if $specific;
if ($modifier =~ m/[&!]/) {
$parsed->{modifier} = $modifier;
}
push @parsed_vars, $parsed;
}
return \@parsed_vars;
}
sub parse_operator {
my ($raw_operator) = @_;
# operators may be defined by the @ symbol
# if one isnt' defined, 'rx' is the default
# everything following in this token is the pattern
#
# using a regex here makes the parser a little more flexible
# we could split on space, but that breaks if the operator
# is not single space separated from the pattern, and splitting
# on \s+ isn't possible because that could break the pattern
# when joining back together
$raw_operator =~ m/^\s*(?:(\!)?\@([a-zA-Z]+)\s+)?(.*)$/;
my $negated = $1;
my $operator = $2 ? $2 : 'rx';
my $pattern = $3;
my $parsed = {};
$parsed->{negated} = $negated if $negated;
$parsed->{operator} = $operator;
$parsed->{pattern} = $pattern;
return $parsed;
}
sub parse_options {
my ($raw_options) = @_;
my (@tokens, @parsed_options, @opt_buf, $sentinal);
my @split_options = split ',', $raw_options;
# options may take one of a few forms
# standalone: deny
# express a value: phase:1
# express a quoted value: msg:'foo bar'
#
# because the quoted value in something like msg or logdata
# may have commas, we cant simply split on comma
# so we need to loop through and piece together tokens
while (@split_options) {
# take a chunk and add it the buffer array
# once we know we've reached the end of an
# option, we'll put the buffer elements
# back together and add it to the final array
my $chunk = shift @split_options;
push @opt_buf, $chunk;
# we're done chaining together chunks if:
#
# - we didnt have the potential to split
# meaning that the chunk didnt have a : or '
# and that the first member of the buffer
# did contain a '
#
# OR
#
# - we could have split but we know we're done
# (we know this if the last member of the chunk is a ')
$sentinal = 1 if (($chunk !~ m/'/ || $chunk !~ m/:/) &&
! (scalar @opt_buf > 1 && $opt_buf[0] =~ m/'/)) ||
$chunk =~ m/'$/;
if ($sentinal) {
push @tokens, join ',', @opt_buf;
@opt_buf = ();
$sentinal = 0;
}
}
# great, now that we have proper tokens
# we can split any potential key value pairs
# and add them to the final array
for my $token (@tokens) {
my ($opt, @value) = split /:/, $token;
my $parsed = {};
$parsed->{opt} = $opt;
$parsed->{value} = strip_encap_quotes(join ':', @value) if @value;
push @parsed_options, $parsed;
}
return \@parsed_options;
}
sub strip_encap_quotes {
my ($line) = @_;
$line =~ s/^(['"])(.*)\1$/$2/;
return $line;
}
sub main {
my $fh = *STDIN;
my @cleaned_lines = clean_input($fh);
my @parsed_lines = map { parse_tokens(tokenize($_)) } @cleaned_lines;
print to_json(\@parsed_lines);
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment