Skip to content

Instantly share code, notes, and snippets.

@jirutka
Created September 3, 2013 21:18
Show Gist options
  • Save jirutka/6429757 to your computer and use it in GitHub Desktop.
Save jirutka/6429757 to your computer and use it in GitHub Desktop.
Pegdown plugin that demonstrates how to “hide” any inline or block rule defined in the Pegdown Parser class.
import java.util.ArrayList;
import java.util.List;
import org.parboiled.Rule;
import org.parboiled.matchers.FirstOfMatcher;
import org.parboiled.matchers.Matcher;
import org.pegdown.Parser;
import org.pegdown.ast.InlineHtmlNode;
import org.pegdown.plugins.InlinePluginParser;
/**
* Pluggable parser for Pegdown that parsers an inline XHTML/XML.
*
* <p>This is intended to replace the Inline HTML feature of the Pegdown parser which is too lenient
* and so very hard to escape correctly in renderer. XML syntax is more strict and so more suitable
* for an inline (X)HTML in the Markdown syntax.</p>
*
* <p>There are some restrictions and one relaxation:</p>
* <ul>
* <li>Only tags, attributes and comments are supported.</li>
* <li>No new lines are permitted inside tags.</li>
* <li>An attribute value may not be quoted when it doesn't contain spaces or '>' characters.</li>
* </ul>
*/
public class InlineXHtmlPegdownPluginParser extends Parser implements InlinePluginParser
{
public InlineXHtmlPegdownPluginParser()
{
super(ALL, 1000l, DefaultParseRunnerProvider);
}
@Override
public Rule[] inlinePluginRules()
{
// HideInlineHtmlRule must be after InlineXHtml and before InlineHtml
return new Rule[]{ InlineXHtml(), HideInlineHtmlRule() };
}
/**
* Rule that "hides" the {@link Parser#InlineHtml() InlineHtml} rule from the Pegdown parser.
*
* <p>This rule tests {@link Parser#HtmlTag() HtmlTag} against an input and when it succeeds,
* then it tries all of the subrules from the {@link Parser#NonLinkInline() NonLinkInline}
* except the {@code InlineHtml} rule. This means that the parser never reaches the
* {@code InlineHtml} rule and an inline HTML is parsed as a normal text.</p>
*
* <p>Why so complicated? Parsing of an inline HTML cannot be simply disabled in Pegdown, only
* suppressed, i.e. HTML is parsed and then dropped. The {@link Parser} class cannot be smoothly
* subclassed (see <a href="https://github.com/sirthias/pegdown/issues/54"> #54</a>) so there's
* no a straightforward way how to override the {@code InlineHtml} rule. Therefore this is
* probably the most elegant way how to do it without patching the Pegdown parser.</p>
*/
public Rule HideInlineHtmlRule()
{
FirstOfMatcher nonLinkInline = (FirstOfMatcher) NonLinkInline();
List<Matcher> matchers = new ArrayList<Matcher>();
// Copy all matchers but InlineHtml
for (Matcher matcher : nonLinkInline.getChildren()) {
if (! "InlineHtml".equals(matcher.getLabel())) {
matchers.add(matcher);
}
}
return NodeSequence(
Test(InlineHtml()),
// This is basically NonLinkInline rule but without InlineHtml
FirstOf(matchers.toArray())
);
}
/**
* Rule for an inline XHTML/XML.
*/
public Rule InlineXHtml()
{
return NodeSequence(
FirstOf(
HtmlComment(), //comments are same in XML and HTML, so reuse it
XmlTag()
), push(new InlineHtmlNode(match()))
);
}
/**
* Rule for an XML tag.
* It must not contain new lines.
*/
public Rule XmlTag()
{
return Sequence(
'<', Optional('/'),
OneOrMore(
Alphanumeric()
),
Sp(),
ZeroOrMore(
XmlAttribute(),
Sp()
),
Optional('/'), '>'
);
}
/**
* Rule for a XML attribute.
* An attribute value may not be quoted when it doesn't contain spaces or '>' characters.
*/
public Rule XmlAttribute()
{
return Sequence(
OneOrMore(
FirstOf(Alphanumeric(), '-', '_')
),
Sp(), '=', Sp(),
FirstOf(
Quoted(),
// Non-quoted value is not valid in XML, but we may not be so strict
OneOrMore(
TestNot('>'),
Nonspacechar()
)
)
);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment