Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Sample CSS Parser using Marpa::XS
use strict;
use warnings;
use feature ':5.10';
use Marpa::XS;
use Data::Dumper;
#
# a simple CSS snippet for testing
#
my $text = q{
@charset "ebcdic";
// ignored comment -- not CSS compatible, however
@import blabla("asdf.css");
div.class {
font-size: 13px;
font: "Helvetica Regular";
}
/* processed comment */
};
my %symbol_for_char = (
'{' => 'OPEN_CURLY', '}' => 'CLOSE_CURLY',
'(' => 'OPEN_PAREN', ')' => 'CLOSE_PAREN',
'[' => 'OPEN_BRACKET', ']' => 'CLOSE_BRACKET',
';' => 'SEMICOLON', ':' => 'COLON',
',' => 'COMMA', '.' => 'DOT',
'*' => 'SPLASH', '#' => 'HASH',
'/' => 'SLASH', '>' => 'GT',
'+' => 'PLUS',
);
#
# Phase 1 -- sequentially scan terminals
#
my @tokens;
sub t {
push @tokens, [ @_ ];
# must return empty string, because we might be inside a replacement
return '';
}
while (length($text)) {
no warnings;
$text =~ s{\A \s* // .*? ^}{}xms and next;
$text =~ s{\A \s* (/\* .*? \*/) \s*}{ t COMMENT => $1 }exms and next;
$text =~ s{\A \s* \@([a-z]+) \s*}{ t "AT_\U$1" }exms and next;
$text =~ s{\A \s* !important \s*}{ t 'IMPORTANT' }exms and next;
$text =~ s{\A \s* (\#[0-9a-fA-F]{3}(?:[0-9a-fA-F]{3})?) \s*}{ t HEXCOLOR => $1 }exms and next;
$text =~ s{\A \s* (['"]) ((?:[^\\\1] | \\.)*?) \1 \s*}{ t STRING => $2 }exms and next;
$text =~ s{\A \s* ([~|]?=) \s*}{ t ATTR_CMP => $1 }exms and next;
$text =~ s{\A \s+ }{ t 'SPACE' }exms and next;
$text =~ s{\A \s* (-?[_a-zA-Z][-_a-zA-Z0-9]*)}{t IDENT => $1}exms and next;
$text =~ s{\A \s* (\.\d+ | \d+ (?:\.\d*)?) \s* (%|em|ex|px|cm|mm|pt|pc|deg|rad|grad|ms|s|hz|khz)? \s*}{ t NUMBER => "$1$2" }exms and next;
my $char = substr($text,0,1,'');
if (exists($symbol_for_char{$char})) {
t $symbol_for_char{$char};
} else {
t CHAR => $char;
}
}
#
# phase 2 -- define grammar
#
my $grammar = Marpa::XS::Grammar->new({
start => 'css',
actions => 'main',
default_action => 'do_default',
terminals => [qw(
OPEN_CURLY CLOSE_CURLY OPEN_PAREN CLOSE_PAREN OPEN_BRACKET CLOSE_BRACKET
SPACE SEMICOLON COLON COMMA SPLASH DOT HASH ATTR_CMP GT PLUS
STRING IDENT HEXCOLOR NUMBER
AT_CHARSET AT_IMPORT AT_MEDIA AT_PAGE IMPORTANT
)],
rules => [
{
lhs => 'css',
rhs => [qw(optional_space charset
optional_space imports
optional_space css_content
optional_space)],
},
{ lhs => 'charset' },
{ lhs => 'charset', rhs => [qw(AT_CHARSET STRING SEMICOLON)] },
{ lhs => 'imports' },
{
lhs => 'imports',
rhs => [qw(AT_IMPORT expression SEMICOLON)],
},
{ lhs => 'css_content', rhs => ['ruleset_media_page'], min => 0 },
{ lhs => 'ruleset_media_page', rhs => [qw(ruleset)], },
{ lhs => 'ruleset_media_page', rhs => [qw(media)] },
{ lhs => 'ruleset_media_page', rhs => [qw(page)] },
{
lhs => 'ruleset',
rhs => [qw(optional_space selectors
optional_space OPEN_CURLY
optional_space declarations
optional_space CLOSE_CURLY)],
},
{ lhs => 'declarations', rhs => [qw(declaration optional_space SEMICOLON declarations)], },
{ lhs => 'declarations', rhs => [qw(declaration)], min => 0 },
{
lhs => 'declaration',
rhs => [qw(optional_space property
optional_space COLON
optional_space expression
optional_space prio)] },
{ lhs => 'prio', rhs => [qw(IMPORTANT)] },
{ lhs => 'prio' },
{ lhs => 'selectors', rhs => [qw(selector COMMA selectors)] },
{ lhs => 'selectors', rhs => [qw(selector)] },
{ lhs => 'selector', rhs => [qw(element specializers)] },
{ lhs => 'selector', rhs => [qw(specializer)], min => 1 },
{ lhs => 'selector', rhs => [qw(selector divider selector)] },
{ lhs => 'element', rhs => [qw(IDENT)] },
{ lhs => 'element', rhs => [qw(SPLASH)] },
{ lhs => 'divider', rhs => [qw(SPACE)] },
{ lhs => 'divider', rhs => [qw(GT)] },
{ lhs => 'divider', rhs => [qw(PLUS)] },
{ lhs => 'specializers', rhs => [qw(specializer)], min => 0 },
{ lhs => 'specializer', rhs => [qw(DOT IDENT)] },
{ lhs => 'specializer', rhs => [qw(HASH IDENT)] },
{ lhs => 'specializer', rhs => [qw(COLON ident_or_function)] },
{ lhs => 'specializer', rhs => [qw(OPEN_BRACKET attribute CLOSE_BRACKET)] },
{ lhs => 'attribute', rhs => [qw(IDENT)] },
{ lhs => 'attribute', rhs => [qw(IDENT ATTR_CMP ident_or_string)] },
{ lhs => 'property', rhs => [qw(expression)] },
{ lhs => 'media', rhs => [qw(AT_MEDIA idents OPEN_CURLY ruleset CLOSE_CURLY)] },
{ lhs => 'page', rhs => [qw(AT_PAGE pseudo_page OPEN_CURLY declarations CLOSE_CURLY)] },
{ lhs => 'pseudo_page', rhs => [qw(COLON idents)] },
{ lhs => 'pseudo_page' },
{ lhs => 'expression', rhs => ['STRING'] },
{ lhs => 'expression', rhs => ['NUMBER'] },
{ lhs => 'expression', rhs => ['IDENT'] },
{ lhs => 'expression', rhs => ['HEXCOLOR'] },
{ lhs => 'expression', rhs => ['function_call'] },
{ lhs => 'expression', rhs => [qw(OPEN_PAREN expression CLOSE_PAREN)] },
{ lhs => 'ident_or_string', rhs => [qw(IDENT)] },
{ lhs => 'ident_or_string', rhs => [qw(STRING)] },
{ lhs => 'ident_or_function', rhs => [qw(IDENT)] },
{ lhs => 'ident_or_function', rhs => [qw(function_call)] },
{ lhs => 'idents', rhs => [qw(IDENT)] },
{ lhs => 'idents', rhs => [qw(IDENT COMMA idents)] },
{ lhs => 'function_call', rhs => [qw(IDENT OPEN_PAREN expression CLOSE_PAREN)] },
{ lhs => 'optional_space', rhs => [qw(SPACE)], min => 0 },
],
});
$grammar->precompute;
#
# phase 3 -- parse our tokens
#
my $rec = Marpa::XS::Recognizer->new( { grammar => $grammar } );
foreach my $token (@tokens) {
if ($token->[0] eq 'COMMENT') {
# process comments in a different way
} elsif (defined $rec->read( @$token )) {
say "reading Token: @$token";
} else {
die "Error reading Token: @$token";
};
}
say Data::Dumper->Dump([$rec->value], ['value']);
#
# some actions for testing
#
sub charset {
say Data::Dumper->Dump([ [@_] ],[ 'Charset' ]);
return;
}
sub imports {
say Data::Dumper->Dump([ [@_] ],[ 'Imports' ]);
return;
}
sub function_call {
say Data::Dumper->Dump([ [@_] ],[ 'Function_Call' ]);
return "$_[1]($_[3])";
}
sub expression {
say Data::Dumper->Dump([ [@_] ],[ 'Expression' ]);
return $_[1];
}
# default action if specific action is not defined
sub do_default {
say Data::Dumper->Dump([ [@_] ],[ 'default' ]);
return;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.