Created
March 28, 2018 15:14
-
-
Save rfdrake/ab275d6eac1aa86bba10814021994da3 to your computer and use it in GitHub Desktop.
Proof of concept Marpa::R2 parser for ISC dhcpd.leases
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Marpa::R2; | |
my $DSL = <<'END_DSL'; | |
lexeme default = latm => 1 | |
:discard ~ Whitespace | |
:discard ~ Comment | |
:default ::= action => ::array | |
# :default ::= action => [name,values] | |
start ::= Block+ | |
Block ::= Host | Lease | Failover | |
Lease ::= 'lease' IPAddr (OpenCurly) Statements (CloseCurly) | |
Host ::= 'host' Hostname (OpenCurly) Statements (CloseCurly) | |
Failover ::= 'failover peer' QuotedString 'state' (OpenCurly) FailoverStatements (CloseCurly) | |
Statements ::= Statement* separator => SemiColon | |
Statement ::= LeaseOperation Number Date Time | |
| BindingStateWord BindingState | |
| 'fixed-address' IPAddr | |
| 'hardware' HardwareType Hex | |
| 'uid' QuotedString | |
| 'client-hostname' QuotedString | |
| ('set') SetOptions ('=') QuotedString | |
| ('option') Options Value | |
| 'dynamic' | |
FailoverStatements ::= FailoverStatement* separator => SemiColon | |
FailoverStatement ::= 'my state communications-interrupted at' Number Date Time | |
| 'partner state normal at' Number Date Time | |
Value ::= Hex | QuotedString | |
# | |
SetOptions ~ 'ddns-txt' | 'vendor-class-identifier' | 'ddns-fwd-name' | |
Options ~ 'agent.circuit-id' | 'agent.remote-id' | |
BindingStateWord ~ 'next binding state' | 'rewind binding state' | 'binding state' | |
BindingState ~ 'free' | 'active' | 'expired' | 'released' | 'backup' | 'abandoned' | |
Date ~ [\d][\d][\d][\d] '/' [\d][\d] '/' [\d][\d] | |
Time ~ [\d][\d] ':' [\d][\d] ':' [\d][\d] | |
IPAddr ~ Digits '.' Digits '.' Digits '.' Digits | |
LeaseOperation ~ 'starts' | 'ends' | 'tstp' | 'cltt' | 'tsfp' | 'atsfp' | |
HardwareType ~ 'ethernet' | 'token-ring' | 'unknown-' Digits | |
QuotedString ~ ["] NotQuotes ["] | |
NotQuotes ~ NotQuote* | |
NotQuote ~ '\' [\d\D] | |
NotQuote ~ [^\v"\x5c] | |
Number ~ Digits | |
Digits ~ [\d]+ | |
Hex ~ [\dA-Fa-f:]+ | |
OpenCurly ~ '{' | |
CloseCurly ~ '}' | |
SemiColon ~ ';' | |
Hostname ~ [\w]+ | |
Whitespace ~ [\s]+ | |
Comment ~ '#' CommentChars VertSpaceChar | |
CommentChars ~ [^\r\n]* | |
VertSpaceChar ~ [\r\n] | |
END_DSL | |
my $GRAMMAR = Marpa::R2::Scanless::G->new({ | |
source => \$DSL, | |
}); | |
my $re = Marpa::R2::Scanless::R->new({ | |
grammar => $GRAMMAR, | |
semantics_package => 'main', | |
}); | |
for (@ARGV) { | |
my $buffer; | |
open my $fh, '<', $_ or die "Can't open file $_: $!"; | |
read $fh, $buffer, -s $fh or die "Couldn't read file $_: $!"; | |
close $fh; | |
$re->read(\$buffer); | |
my $value = $re->value; | |
use Data::Dumper; | |
print Dumper $value; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I decided to try parsing the dhcpd.leases file with Marpa because it's a simpler format than dhcpd.conf.
It ended up being slow and I don't know if there are good ways to make it faster.
Here is an example on an 11Mb file (with Data::Dumper commented out so it's just reading/parsing):
Here is an example of a regex parser on the same file: