Skip to content

Instantly share code, notes, and snippets.

@seralf
Forked from datagraph/Grammar.scala
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seralf/2966ce1432ab692d9268 to your computer and use it in GitHub Desktop.
Save seralf/2966ce1432ab692d9268 to your computer and use it in GitHub Desktop.
/*
* A parser combinator SPARQL grammar for Scala.
* Written in November 2009 by Arto Bendiken <http://ar.to/>
*
* This is free and unencumbered software released into the public domain.
* For more information, please refer to <http://unlicense.org/>
*/
package org.datagraph.sparql
import java.io.FileReader
import scala.util.parsing.combinator._
import scala.collection.mutable.ListBuffer
/**
* @see http://www.w3.org/TR/rdf-sparql-query/#grammar
*/
object Grammar extends RegexParsers {
def main(args: Array[String]) {
for (arg <- args) {
println(parseAll(Query, new FileReader(arg)))
}
}
// [1]
def Query: Parser[List[Any]] =
Prologue ~ (SelectQuery | ConstructQuery | DescribeQuery | AskQuery) ^^ {
case List() ~ query => query
case prolog ~ query => prolog ++ query
}
// [2]
def Prologue: Parser[List[Any]] =
(BaseDecl?) ~ (PrefixDecl*) ^^ {
case None ~ List() => List()
case base ~ List() => List(base)
case None ~ prefixes => List(prefixes)
case base ~ prefixes => List(base, prefixes)
}
// [3]
def BaseDecl: Parser[List[Any]] =
"BASE" ~ IRI_REF ^^ {
case _ ~ iri => List('BASE, iri)
}
// [4]
def PrefixDecl: Parser[List[Any]] =
"PREFIX" ~ PNAME_NS ~ IRI_REF ^^ {
case _ ~ ns ~ iri => List('PREFIX, ns, iri)
}
// [5]
def SelectQuery: Parser[List[Any]] =
"SELECT" ~ (("DISTINCT" | "REDUCED")?) ~ ("*" | (Var+)) ~ (DatasetClause*) ~ WhereClause ~ SolutionModifier ^^ {
case _ ~ None ~ vs ~ List() ~ w ~ None => List('SELECT, vs, w)
case _ ~ None ~ vs ~ List() ~ w ~ s => List('SELECT, vs, w, s)
case _ ~ dr ~ vs ~ d ~ w ~ s => List('SELECT, dr, vs, d, w, s)
}
// [6]
def ConstructQuery: Parser[List[Any]] =
"CONSTRUCT" ~ ConstructTemplate ~ (DatasetClause*) ~ WhereClause ~ SolutionModifier ^^ {
case _ => List('CONSTRUCT) // TODO
}
// [7]
def DescribeQuery: Parser[List[Any]] =
"DESCRIBE" ~ ("*" | (VarOrIRIref+)) ~ (DatasetClause*) ~ (WhereClause?) ~ SolutionModifier ^^ {
case _ => List('DESCRIBE) // TODO
}
// [8]
def AskQuery: Parser[List[Any]] =
"ASK" ~ (DatasetClause*) ~ WhereClause ^^ {
case _ => List('ASK) // TODO
}
// [9]
def DatasetClause: Parser[List[Any]] =
"FROM" ~> (DefaultGraphClause | NamedGraphClause) ^^ (List('FROM, _))
// [10]
def DefaultGraphClause: Parser[Any] =
SourceSelector
// [11]
def NamedGraphClause: Parser[List[Any]] =
"NAMED" ~> SourceSelector ^^ (List('NAMED, _))
// [12]
def SourceSelector: Parser[Any] =
IRIref
// [13]
def WhereClause: Parser[List[Any]] =
("WHERE"?) ~> GroupGraphPattern ^^ (List('WHERE, _))
// [14]
def SolutionModifier: Parser[Any] =
(OrderClause?) ~ (LimitOffsetClauses?) ^^ {
case None ~ None => None
case xy => xy
}
// [15]
def LimitOffsetClauses: Parser[Any] =
(LimitClause ~ (OffsetClause?)) | (OffsetClause ~ (LimitClause?))
// [16]
def OrderClause: Parser[List[Any]] =
"ORDER" ~ "BY" ~ (OrderCondition+) ^^ {
case _ ~ _ ~ condition => List('ORDERBY, condition)
}
// [17]
def OrderCondition: Parser[Any] =
(("ASC" | "DESC") ~ BrackettedExpression) | (Constraint | Var)
// [18]
def LimitClause: Parser[List[Any]] =
"LIMIT" ~> INTEGER ^^ (List('LIMIT, _))
// [19]
def OffsetClause: Parser[List[Any]] =
"OFFSET" ~> INTEGER ^^ (List('OFFSET, _))
// [20]
def GroupGraphPattern: Parser[List[Any]] =
"{" ~> (TriplesBlock?) ~ (((GraphPatternNotTriples | Filter) <~ ("."?) ~> (TriplesBlock?))*) <~ "}" ^^ {
case Some(a) ~ List() => a
case a ~ b => List(a, b) // FIXME
}
// [21]
def TriplesBlock: Parser[List[Any]] =
TriplesSameSubject ~ (("." ~ (TriplesBlock?))?) ^^ {
case a ~ None => List('BGP) ++ a
case a ~ b => List('BGP, a, b) // FIXME
}
// [22]
def GraphPatternNotTriples: Parser[Any] =
OptionalGraphPattern | GroupOrUnionGraphPattern | GraphGraphPattern
// [23]
def OptionalGraphPattern: Parser[List[Any]] =
"OPTIONAL" ~> GroupGraphPattern ^^ (List('OPTIONAL, _))
// [24]
def GraphGraphPattern: Parser[List[Any]] =
"GRAPH" ~ VarOrIRIref ~ GroupGraphPattern ^^ {
case _ ~ varOrIRI ~ bgp => List('GRAPH, varOrIRI, bgp)
}
// [25]
def GroupOrUnionGraphPattern: Parser[Any] =
GroupGraphPattern ~ (("UNION" ~ GroupGraphPattern)*)
// [26]
def Filter: Parser[List[Any]] =
"FILTER" ~> Constraint ^^ (List('FILTER, _))
// [27]
def Constraint: Parser[Any] =
BrackettedExpression | BuiltInCall | FunctionCall
// [28]
def FunctionCall: Parser[Any] =
IRIref ~ ArgList
// [29]
def ArgList: Parser[Any] =
NIL | ("(" ~> repsep(Expression, ",") <~ ")")
// [30]
def ConstructTemplate: Parser[Any] =
"{" ~> (ConstructTriples?) <~ "}"
// [31]
def ConstructTriples: Parser[Any] =
TriplesSameSubject ~ (("." ~> (ConstructTriples?))?)
// [32]
def TriplesSameSubject: Parser[List[Any]] =
TriplesSameSubject1 | TriplesSameSubject2
def TriplesSameSubject1: Parser[List[(Any, Any, Any)]] =
(VarOrTerm ~ PropertyListNotEmpty) ^^ {
case s ~ polist =>
var out = new ListBuffer[(Any, Any, Any)]
for ((p, os) <- polist) {
for (o <- os) out += (s, p, o)
}
out.toList
}
def TriplesSameSubject2: Parser[List[Any]] =
(TriplesNode ~ PropertyList) ^^ {
case a ~ b => List(a, b) // FIXME
}
// [33]
def PropertyListNotEmpty: Parser[List[(Any, List[Any])]] =
repsep(Verb ~ ObjectList ^^ { case p ~ olist => (p, olist) }, ";") <~ (";"?)
// [34]
def PropertyList: Parser[Any] =
PropertyListNotEmpty?
// [35]
def ObjectList: Parser[List[Any]] =
repsep(Object, ",")
// [36]
def Object: Parser[Any] =
GraphNode
// [37]
def Verb: Parser[Any] =
VarOrIRIref | "a" // TODO: "a" => rdf:type
// [38]
def TriplesNode: Parser[Any] =
Collection | BlankNodePropertyList
// [39]
def BlankNodePropertyList: Parser[Any] =
"[" ~> PropertyListNotEmpty <~ "]"
// [40]
def Collection: Parser[Any] =
"(" ~> (GraphNode+) <~ ")"
// [41]
def GraphNode: Parser[Any] =
VarOrTerm | TriplesNode
// [42]
def VarOrTerm: Parser[Any] =
Var | GraphTerm
// [43]
def VarOrIRIref: Parser[Any] =
Var | IRIref
// [44]
def Var: Parser[Any] = //Parser[Symbol] =
VAR1 | VAR2 ^^ { case x => println(x); ('VAR, x) }
// [45]
def GraphTerm: Parser[Any] =
IRIref | RDFLiteral | NumericLiteral | BooleanLiteral | BlankNode | NIL
// [46]
def Expression: Parser[Any] =
ConditionalOrExpression
// [47]
def ConditionalOrExpression: Parser[Any] =
ConditionalAndExpression ~ (("||" ~ ConditionalAndExpression)*)
// [48]
def ConditionalAndExpression: Parser[Any] =
ValueLogical ~ (("&&" ~ ValueLogical)*)
// [49]
def ValueLogical: Parser[Any] =
RelationalExpression
// [50]
def RelationalExpression: Parser[Any] =
NumericExpression ~
((("=" ~ NumericExpression) |
("!=" ~ NumericExpression) |
("<" ~ NumericExpression) |
(">" ~ NumericExpression) |
("<=" ~ NumericExpression) |
(">=" ~ NumericExpression))?)
// [51]
def NumericExpression: Parser[Any] =
AdditiveExpression
// [52]
def AdditiveExpression: Parser[Any] =
MultiplicativeExpression ~
((("+" ~ MultiplicativeExpression) |
("-" ~ MultiplicativeExpression) |
NumericLiteralPositive |
NumericLiteralNegative)*)
// [53]
def MultiplicativeExpression: Parser[Any] =
UnaryExpression ~ ((("*" ~ UnaryExpression) | ("/" ~ UnaryExpression))*)
// [54]
def UnaryExpression: Parser[Any] =
"!" ~ PrimaryExpression |
"+" ~ PrimaryExpression |
"-" ~ PrimaryExpression |
PrimaryExpression
// [55]
def PrimaryExpression: Parser[Any] =
BrackettedExpression | BuiltInCall | IRIrefOrFunction | RDFLiteral | NumericLiteral | BooleanLiteral | Var
// [56]
def BrackettedExpression: Parser[Any] =
"(" ~> Expression <~ ")"
// [57]
def BuiltInCall: Parser[Any] =
"STR" ~ "(" ~> Expression <~ ")" |
"LANG" ~ "(" ~> Expression <~ ")" |
"LANGMATCHES" ~ "(" ~> Expression <~ "," ~> Expression <~ ")" |
"DATATYPE" ~ "(" ~> Expression <~ ")" |
"BOUND" ~ "(" ~> Var <~ ")" |
"sameTerm" ~ "(" ~> Expression <~ "," ~> Expression <~ ")" |
"isIRI" ~ "(" ~> Expression <~ ")" |
"isURI" ~ "(" ~> Expression <~ ")" |
"isBLANK" ~ "(" ~> Expression <~ ")" |
"isLITERAL" ~ "(" ~> Expression <~ ")" |
RegexExpression
// [58]
def RegexExpression: Parser[Any] =
"REGEX" ~ "(" ~ Expression <~ "," ~> Expression ~ (("," ~> Expression)?) ~ ")"
// [59]
def IRIrefOrFunction: Parser[Any] =
IRIref ~ (ArgList?)
// [60]
def RDFLiteral: Parser[Any] =
String ~ ((LANGTAG | ("^^" ~> IRIref))?)
// [61]
def NumericLiteral: Parser[Any] =
NumericLiteralUnsigned | NumericLiteralPositive | NumericLiteralNegative
// [62]
def NumericLiteralUnsigned: Parser[Any] =
INTEGER | DECIMAL | DOUBLE
// [63]
def NumericLiteralPositive: Parser[Any] =
INTEGER_POSITIVE | DECIMAL_POSITIVE | DOUBLE_POSITIVE
// [64]
def NumericLiteralNegative: Parser[Any] =
INTEGER_NEGATIVE | DECIMAL_NEGATIVE | DOUBLE_NEGATIVE
// [65]
def BooleanLiteral: Parser[Any] =
"true" | "false"
// [66]
def String: Parser[Any] =
STRING_LITERAL1 | STRING_LITERAL2 | STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2
// [67]
def IRIref: Parser[Any] =
IRI_REF | PrefixedName
// [68]
def PrefixedName: Parser[Any] =
PNAME_LN | PNAME_NS
// [69]
def BlankNode: Parser[Any] =
BLANK_NODE_LABEL | ANON
// [70]
def IRI_REF: Parser[Any] =
"<" ~> """([^<>"{}|^`\])*""".r <~ ">" // FIXME
// [71]
def PNAME_NS: Parser[Any] =
(PN_PREFIX?) <~ ":"
// [72]
def PNAME_LN: Parser[Any] =
PNAME_NS ~ PN_LOCAL
// [73]
def BLANK_NODE_LABEL: Parser[Any] =
"_:" ~> PN_LOCAL
// [74]
def VAR1: Parser[Symbol] =
"?" ~> VARNAME
// [75]
def VAR2: Parser[Symbol] =
"$" ~> VARNAME
// [76]
def LANGTAG: Parser[Any] =
"@" ~> "[a-zA-Z]+".r ~ (("-" ~ "[a-zA-Z0-9]+".r)*)
// [77]
def INTEGER: Parser[Int] =
"[0-9]+".r ^^ (_.toInt)
// [78]
def DECIMAL: Parser[Any] =
("[0-9]+".r ~ "." ~ "[0-9]*".r) | ("." ~ "[0-9]+".r)
// [79]
def DOUBLE: Parser[Any] =
("[0-9]+".r ~ "." ~ "[0-9]*".r ~ EXPONENT) | ("." ~ "([0-9])+".r ~ EXPONENT) | ("([0-9])+".r ~ EXPONENT)
// [80]
def INTEGER_POSITIVE: Parser[Int] =
"+" ~> INTEGER
// [81]
def DECIMAL_POSITIVE: Parser[Any] =
"+" ~> DECIMAL
// [82]
def DOUBLE_POSITIVE: Parser[Any] =
"+" ~> DOUBLE
// [83]
def INTEGER_NEGATIVE: Parser[Int] =
"-" ~> INTEGER ^^ (_ * -1)
// [84]
def DECIMAL_NEGATIVE: Parser[Any] =
"-" ~ DECIMAL // TODO
// [85]
def DOUBLE_NEGATIVE: Parser[Any] =
"-" ~ DOUBLE // TODO
// [86]
def EXPONENT: Parser[Any] =
"[eE]".r ~ "[+-]?".r ~ "[0-9]+".r
// [87]
def STRING_LITERAL1: Parser[String] =
"'" ~> TODO <~ "'"
// [88]
def STRING_LITERAL2: Parser[String] =
"\"" ~> TODO <~ "\""
// [89]
def STRING_LITERAL_LONG1: Parser[String] =
"'''" ~> TODO <~ "'''"
// [90]
def STRING_LITERAL_LONG2: Parser[String] =
"\"\"\"" ~> TODO <~ "\"\"\""
// [91]
def ECHAR: Parser[Any] =
"\\" ~ """[tbnrf\"']""".r
// [92]
def NIL: Parser[Any] =
"(" ~ (WS*) ~ ")"
// [93]
def WS: Parser[Any] =
"""\s+""".r // FIXME?
// [94]
def ANON: Parser[Any] =
"[" ~ (WS*) ~ "]"
// [95]
def PN_CHARS_BASE: Parser[Any] =
"[A-Z]".r | "[a-z]".r | TODO
// [96]
def PN_CHARS_U: Parser[Any] =
PN_CHARS_BASE | "_"
// [97]
def VARNAME: Parser[Symbol] =
"""[a-zA-Z_]\w*""".r ^^ (Symbol(_)) // FIXME
// [98]
def PN_CHARS: Parser[Any] =
PN_CHARS_U | "-" | "[0-9]".r | TODO
// [99]
def PN_PREFIX: Parser[Any] =
PN_CHARS_BASE ~ ((((PN_CHARS | ".")*) ~ PN_CHARS)?)
// [100]
def PN_LOCAL: Parser[Any] =
(PN_CHARS_U | "[0-9]".r) ~ ((((PN_CHARS | ".")*) ~ PN_CHARS)?)
def TODO: Parser[String] = "TODO".r
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment