Created
September 3, 2013 21:18
-
-
Save jirutka/6429757 to your computer and use it in GitHub Desktop.
Pegdown plugin that demonstrates how to “hide” any inline or block rule defined in the Pegdown Parser class.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.ArrayList; | |
import java.util.List; | |
import org.parboiled.Rule; | |
import org.parboiled.matchers.FirstOfMatcher; | |
import org.parboiled.matchers.Matcher; | |
import org.pegdown.Parser; | |
import org.pegdown.ast.InlineHtmlNode; | |
import org.pegdown.plugins.InlinePluginParser; | |
/** | |
* Pluggable parser for Pegdown that parsers an inline XHTML/XML. | |
* | |
* <p>This is intended to replace the Inline HTML feature of the Pegdown parser which is too lenient | |
* and so very hard to escape correctly in renderer. XML syntax is more strict and so more suitable | |
* for an inline (X)HTML in the Markdown syntax.</p> | |
* | |
* <p>There are some restrictions and one relaxation:</p> | |
* <ul> | |
* <li>Only tags, attributes and comments are supported.</li> | |
* <li>No new lines are permitted inside tags.</li> | |
* <li>An attribute value may not be quoted when it doesn't contain spaces or '>' characters.</li> | |
* </ul> | |
*/ | |
public class InlineXHtmlPegdownPluginParser extends Parser implements InlinePluginParser | |
{ | |
public InlineXHtmlPegdownPluginParser() | |
{ | |
super(ALL, 1000l, DefaultParseRunnerProvider); | |
} | |
@Override | |
public Rule[] inlinePluginRules() | |
{ | |
// HideInlineHtmlRule must be after InlineXHtml and before InlineHtml | |
return new Rule[]{ InlineXHtml(), HideInlineHtmlRule() }; | |
} | |
/** | |
* Rule that "hides" the {@link Parser#InlineHtml() InlineHtml} rule from the Pegdown parser. | |
* | |
* <p>This rule tests {@link Parser#HtmlTag() HtmlTag} against an input and when it succeeds, | |
* then it tries all of the subrules from the {@link Parser#NonLinkInline() NonLinkInline} | |
* except the {@code InlineHtml} rule. This means that the parser never reaches the | |
* {@code InlineHtml} rule and an inline HTML is parsed as a normal text.</p> | |
* | |
* <p>Why so complicated? Parsing of an inline HTML cannot be simply disabled in Pegdown, only | |
* suppressed, i.e. HTML is parsed and then dropped. The {@link Parser} class cannot be smoothly | |
* subclassed (see <a href="https://github.com/sirthias/pegdown/issues/54"> #54</a>) so there's | |
* no a straightforward way how to override the {@code InlineHtml} rule. Therefore this is | |
* probably the most elegant way how to do it without patching the Pegdown parser.</p> | |
*/ | |
public Rule HideInlineHtmlRule() | |
{ | |
FirstOfMatcher nonLinkInline = (FirstOfMatcher) NonLinkInline(); | |
List<Matcher> matchers = new ArrayList<Matcher>(); | |
// Copy all matchers but InlineHtml | |
for (Matcher matcher : nonLinkInline.getChildren()) { | |
if (! "InlineHtml".equals(matcher.getLabel())) { | |
matchers.add(matcher); | |
} | |
} | |
return NodeSequence( | |
Test(InlineHtml()), | |
// This is basically NonLinkInline rule but without InlineHtml | |
FirstOf(matchers.toArray()) | |
); | |
} | |
/** | |
* Rule for an inline XHTML/XML. | |
*/ | |
public Rule InlineXHtml() | |
{ | |
return NodeSequence( | |
FirstOf( | |
HtmlComment(), //comments are same in XML and HTML, so reuse it | |
XmlTag() | |
), push(new InlineHtmlNode(match())) | |
); | |
} | |
/** | |
* Rule for an XML tag. | |
* It must not contain new lines. | |
*/ | |
public Rule XmlTag() | |
{ | |
return Sequence( | |
'<', Optional('/'), | |
OneOrMore( | |
Alphanumeric() | |
), | |
Sp(), | |
ZeroOrMore( | |
XmlAttribute(), | |
Sp() | |
), | |
Optional('/'), '>' | |
); | |
} | |
/** | |
* Rule for a XML attribute. | |
* An attribute value may not be quoted when it doesn't contain spaces or '>' characters. | |
*/ | |
public Rule XmlAttribute() | |
{ | |
return Sequence( | |
OneOrMore( | |
FirstOf(Alphanumeric(), '-', '_') | |
), | |
Sp(), '=', Sp(), | |
FirstOf( | |
Quoted(), | |
// Non-quoted value is not valid in XML, but we may not be so strict | |
OneOrMore( | |
TestNot('>'), | |
Nonspacechar() | |
) | |
) | |
); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment