Skip to content

Instantly share code, notes, and snippets.

@honnix
Created April 13, 2012 09:59
Show Gist options
  • Save honnix/2375506 to your computer and use it in GitHub Desktop.
Save honnix/2375506 to your computer and use it in GitHub Desktop.
simple xpath alike parser in scala combinator
// _[@type=="baoc" and ts10[@value=="1" and a==2] and ts20]
package com.honnix.xml.transformer
import scala.xml.NodeSeq
import scala.util.parsing.combinator.JavaTokenParsers
object XPathParser {
def apply(conditionalPath: String, topSelector: (NodeSeq, String) => NodeSeq) =
new XPathParser(conditionalPath, topSelector)
}
class XPathParser(conditionalPath: String, topSelector: (NodeSeq, String) => NodeSeq) extends JavaTokenParsers {
def expr: Parser[NodeSeq => NodeSeq] = ident~("["~>term<~"]") ^^
{ case ident~term => topSelector(_, ident).filter(term) }
def term: Parser[NodeSeq => Boolean] = subTerm~rep("or"~>subTerm) ^^
{ case subTerm~subTermList => (x: NodeSeq) => subTerm(x) || subTermList.exists(_(x)) }
def subTerm: Parser[NodeSeq => Boolean] = conditionalNode~rep("and"~>conditionalNode) ^^ {
case conditionalNode~conditionalNodeList =>
(x: NodeSeq) => conditionalNode(x) && conditionalNodeList.forall(_(x))
}
def conditionalNode: Parser[NodeSeq => Boolean] = (
expr ^^ (x => !x(_: NodeSeq).isEmpty)
| selector ^^ (x => !x(_: NodeSeq).isEmpty)
| condition ^^ (x => x(_))
| "("~>term<~")" ^^ (x => x(_))
)
def selector: Parser[NodeSeq => NodeSeq] = ident~"\\"~(
selector ^^ (x => !x(_: NodeSeq).isEmpty)
| condition ^^ (x => x(_))
) ^^
{ case ident~"\\"~f => _ \ ident filter f }
def condition: Parser[NodeSeq => Boolean] = ident~opt(op~right) ^^ {
case field~Some("=="~(value: String)) => (x: NodeSeq) => (x \ field text) == value
case field~Some("!="~(value: String)) => (x: NodeSeq) => (x \ field text) != value
case field~Some(">"~(value: String)) => (x: NodeSeq) => (x \ field text) > value
case field~Some("<"~(value: String)) => (x: NodeSeq) => (x \ field text) < value
case field~Some(">="~(value: String)) => (x: NodeSeq) => (x \ field text) >= value
case field~Some("<="~(value: String)) => (x: NodeSeq) => (x \ field text) <= value
case field~Some("=="~(value: Float)) => (x: NodeSeq) =>
val text = x \ field text;
if (!text.isEmpty) text.toFloat == value else false
case field~Some("!="~(value: Float)) => (x: NodeSeq) =>
val text = x \ field text;
if (!text.isEmpty) text.toFloat != value else false
case field~Some(">"~(value: Float)) => (x: NodeSeq) =>
val text = x \ field text;
if (!text.isEmpty) text.toFloat > value else false
case field~Some("<"~(value: Float)) => (x: NodeSeq) =>
val text = x \ field text;
if (!text.isEmpty) text.toFloat < value else false
case field~Some(">="~(value: Float)) => (x: NodeSeq) =>
val text = x \ field text;
if (!text.isEmpty) text.toFloat >= value else false
case field~Some("<="~(value: Float)) => (x: NodeSeq) =>
val text = x \ field text;
if (!text.isEmpty) text.toFloat <= value else false
case field~None => (x: NodeSeq) => !(x \ field isEmpty)
}
def op: Parser[String] = """[!=<>]{1,2}""".r
def right: Parser[Any] = (
floatingPointNumber ^^ (_.toFloat)
| stringLiteral ^^ (x => x.substring(1, x.length - 1))
)
override def ident: Parser[String] = """@?[a-zA-Z_]\w*""".r
def parse(nodeSeq: NodeSeq): NodeSeq = {
parseAll(expr, conditionalPath) match {
case Success(r, _) => r(nodeSeq)
case Failure(_, _) => null
case Error(_, _) => null
}
}
}
object XmlTransformer {
private def copyNodeSeq(nodeSeq: NodeSeq): NodeSeq =
for (node <- nodeSeq) yield copyNode(node)
private def copyNode(node: Node): Node = node match {
case e: Elem =>
Elem(e.prefix,
e.label,
e.attributes,
e.scope,
(for (n <- e.child) yield copyNode(n)):_*)
case t: Text =>
Text(t.text)
case d: PCData =>
PCData(d.text)
case _ => null
}
implicit def domToElem(dom: Document) = {
val charWriter = new CharArrayWriter
TransformerFactory.newInstance.newTransformer.transform(new DOMSource(dom),
new StreamResult(charWriter))
XML.load(new CharArrayReader(charWriter.toCharArray))
}
implicit def elemToDom(elem: Elem) = {
val writer = new StringWriter
XML.write(writer, elem, "UTF-8", false, null)
XMLUtil.getDocument(writer.toString)
}
implicit def nodeWrapper(node: Node) = new {
def !(): Node = copyNode(node)
}
implicit def nodeSeqWrapper(nodeSeq: NodeSeq) = new {
def !(): NodeSeq = if (!nodeSeq.isEmpty) copyNodeSeq(nodeSeq) else NodeSeq.Empty
def \%(conditionalPath: String): NodeSeq = {
XPathParser(conditionalPath, (_: NodeSeq) \ (_: String)).parse(nodeSeq)
}
def \\%(conditionalPath: String): NodeSeq = {
XPathParser(conditionalPath, (_: NodeSeq) \\ (_: String)).parse(nodeSeq)
}
}
}
trait XmlTransformer {
protected def doTransform(input: Elem): Elem
def transform(input: Document): Document = {
import XmlTransformer.{domToElem, elemToDom}
doTransform(input)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment