Skip to content

Instantly share code, notes, and snippets.

@datagraph
Created April 2, 2010 23:12
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save datagraph/353854 to your computer and use it in GitHub Desktop.
Save datagraph/353854 to your computer and use it in GitHub Desktop.
A parser combinator SPARQL grammar for Scala (work-in-progress).
/*
* A parser combinator SPARQL grammar for Scala.
* Written in November 2009 by Arto Bendiken <http://ar.to/>
*
* This is free and unencumbered software released into the public domain.
* For more information, please refer to <http://unlicense.org/>
*/
package org.datagraph.sparql
import java.io.FileReader
import scala.util.parsing.combinator._
import scala.collection.mutable.ListBuffer
/**
* @see http://www.w3.org/TR/rdf-sparql-query/#grammar
*/
object Grammar extends RegexParsers {
def main(args: Array[String]) {
for (arg <- args) {
println(parseAll(Query, new FileReader(arg)))
}
}
// [1]
def Query: Parser[List[Any]] =
Prologue ~ (SelectQuery | ConstructQuery | DescribeQuery | AskQuery) ^^ {
case List() ~ query => query
case prolog ~ query => prolog ++ query
}
// [2]
def Prologue: Parser[List[Any]] =
(BaseDecl?) ~ (PrefixDecl*) ^^ {
case None ~ List() => List()
case base ~ List() => List(base)
case None ~ prefixes => List(prefixes)
case base ~ prefixes => List(base, prefixes)
}
// [3]
def BaseDecl: Parser[List[Any]] =
"BASE" ~ IRI_REF ^^ {
case _ ~ iri => List('BASE, iri)
}
// [4]
def PrefixDecl: Parser[List[Any]] =
"PREFIX" ~ PNAME_NS ~ IRI_REF ^^ {
case _ ~ ns ~ iri => List('PREFIX, ns, iri)
}
// [5]
def SelectQuery: Parser[List[Any]] =
"SELECT" ~ (("DISTINCT" | "REDUCED")?) ~ ("*" | (Var+)) ~ (DatasetClause*) ~ WhereClause ~ SolutionModifier ^^ {
case _ ~ None ~ vs ~ List() ~ w ~ None => List('SELECT, vs, w)
case _ ~ None ~ vs ~ List() ~ w ~ s => List('SELECT, vs, w, s)
case _ ~ dr ~ vs ~ d ~ w ~ s => List('SELECT, dr, vs, d, w, s)
}
// [6]
def ConstructQuery: Parser[List[Any]] =
"CONSTRUCT" ~ ConstructTemplate ~ (DatasetClause*) ~ WhereClause ~ SolutionModifier ^^ {
case _ => List('CONSTRUCT) // TODO
}
// [7]
def DescribeQuery: Parser[List[Any]] =
"DESCRIBE" ~ ("*" | (VarOrIRIref+)) ~ (DatasetClause*) ~ (WhereClause?) ~ SolutionModifier ^^ {
case _ => List('DESCRIBE) // TODO
}
// [8]
def AskQuery: Parser[List[Any]] =
"ASK" ~ (DatasetClause*) ~ WhereClause ^^ {
case _ => List('ASK) // TODO
}
// [9]
def DatasetClause: Parser[List[Any]] =
"FROM" ~> (DefaultGraphClause | NamedGraphClause) ^^ (List('FROM, _))
// [10]
def DefaultGraphClause: Parser[Any] =
SourceSelector
// [11]
def NamedGraphClause: Parser[List[Any]] =
"NAMED" ~> SourceSelector ^^ (List('NAMED, _))
// [12]
def SourceSelector: Parser[Any] =
IRIref
// [13]
def WhereClause: Parser[List[Any]] =
("WHERE"?) ~> GroupGraphPattern ^^ (List('WHERE, _))
// [14]
def SolutionModifier: Parser[Any] =
(OrderClause?) ~ (LimitOffsetClauses?) ^^ {
case None ~ None => None
case xy => xy
}
// [15]
def LimitOffsetClauses: Parser[Any] =
(LimitClause ~ (OffsetClause?)) | (OffsetClause ~ (LimitClause?))
// [16]
def OrderClause: Parser[List[Any]] =
"ORDER" ~ "BY" ~ (OrderCondition+) ^^ {
case _ ~ _ ~ condition => List('ORDERBY, condition)
}
// [17]
def OrderCondition: Parser[Any] =
(("ASC" | "DESC") ~ BrackettedExpression) | (Constraint | Var)
// [18]
def LimitClause: Parser[List[Any]] =
"LIMIT" ~> INTEGER ^^ (List('LIMIT, _))
// [19]
def OffsetClause: Parser[List[Any]] =
"OFFSET" ~> INTEGER ^^ (List('OFFSET, _))
// [20]
def GroupGraphPattern: Parser[List[Any]] =
"{" ~> (TriplesBlock?) ~ (((GraphPatternNotTriples | Filter) <~ ("."?) ~> (TriplesBlock?))*) <~ "}" ^^ {
case Some(a) ~ List() => a
case a ~ b => List(a, b) // FIXME
}
// [21]
def TriplesBlock: Parser[List[Any]] =
TriplesSameSubject ~ (("." ~ (TriplesBlock?))?) ^^ {
case a ~ None => List('BGP) ++ a
case a ~ b => List('BGP, a, b) // FIXME
}
// [22]
def GraphPatternNotTriples: Parser[Any] =
OptionalGraphPattern | GroupOrUnionGraphPattern | GraphGraphPattern
// [23]
def OptionalGraphPattern: Parser[List[Any]] =
"OPTIONAL" ~> GroupGraphPattern ^^ (List('OPTIONAL, _))
// [24]
def GraphGraphPattern: Parser[List[Any]] =
"GRAPH" ~ VarOrIRIref ~ GroupGraphPattern ^^ {
case _ ~ varOrIRI ~ bgp => List('GRAPH, varOrIRI, bgp)
}
// [25]
def GroupOrUnionGraphPattern: Parser[Any] =
GroupGraphPattern ~ (("UNION" ~ GroupGraphPattern)*)
// [26]
def Filter: Parser[List[Any]] =
"FILTER" ~> Constraint ^^ (List('FILTER, _))
// [27]
def Constraint: Parser[Any] =
BrackettedExpression | BuiltInCall | FunctionCall
// [28]
def FunctionCall: Parser[Any] =
IRIref ~ ArgList
// [29]
def ArgList: Parser[Any] =
NIL | ("(" ~> repsep(Expression, ",") <~ ")")
// [30]
def ConstructTemplate: Parser[Any] =
"{" ~> (ConstructTriples?) <~ "}"
// [31]
def ConstructTriples: Parser[Any] =
TriplesSameSubject ~ (("." ~> (ConstructTriples?))?)
// [32]
def TriplesSameSubject: Parser[List[Any]] =
TriplesSameSubject1 | TriplesSameSubject2
def TriplesSameSubject1: Parser[List[(Any, Any, Any)]] =
(VarOrTerm ~ PropertyListNotEmpty) ^^ {
case s ~ polist =>
var out = new ListBuffer[(Any, Any, Any)]
for ((p, os) <- polist) {
for (o <- os) out += (s, p, o)
}
out.toList
}
def TriplesSameSubject2: Parser[List[Any]] =
(TriplesNode ~ PropertyList) ^^ {
case a ~ b => List(a, b) // FIXME
}
// [33]
def PropertyListNotEmpty: Parser[List[(Any, List[Any])]] =
repsep(Verb ~ ObjectList ^^ { case p ~ olist => (p, olist) }, ";") <~ (";"?)
// [34]
def PropertyList: Parser[Any] =
PropertyListNotEmpty?
// [35]
def ObjectList: Parser[List[Any]] =
repsep(Object, ",")
// [36]
def Object: Parser[Any] =
GraphNode
// [37]
def Verb: Parser[Any] =
VarOrIRIref | "a" // TODO: "a" => rdf:type
// [38]
def TriplesNode: Parser[Any] =
Collection | BlankNodePropertyList
// [39]
def BlankNodePropertyList: Parser[Any] =
"[" ~> PropertyListNotEmpty <~ "]"
// [40]
def Collection: Parser[Any] =
"(" ~> (GraphNode+) <~ ")"
// [41]
def GraphNode: Parser[Any] =
VarOrTerm | TriplesNode
// [42]
def VarOrTerm: Parser[Any] =
Var | GraphTerm
// [43]
def VarOrIRIref: Parser[Any] =
Var | IRIref
// [44]
def Var: Parser[Any] = //Parser[Symbol] =
VAR1 | VAR2 ^^ { case x => println(x); ('VAR, x) }
// [45]
def GraphTerm: Parser[Any] =
IRIref | RDFLiteral | NumericLiteral | BooleanLiteral | BlankNode | NIL
// [46]
def Expression: Parser[Any] =
ConditionalOrExpression
// [47]
def ConditionalOrExpression: Parser[Any] =
ConditionalAndExpression ~ (("||" ~ ConditionalAndExpression)*)
// [48]
def ConditionalAndExpression: Parser[Any] =
ValueLogical ~ (("&&" ~ ValueLogical)*)
// [49]
def ValueLogical: Parser[Any] =
RelationalExpression
// [50]
def RelationalExpression: Parser[Any] =
NumericExpression ~
((("=" ~ NumericExpression) |
("!=" ~ NumericExpression) |
("<" ~ NumericExpression) |
(">" ~ NumericExpression) |
("<=" ~ NumericExpression) |
(">=" ~ NumericExpression))?)
// [51]
def NumericExpression: Parser[Any] =
AdditiveExpression
// [52]
def AdditiveExpression: Parser[Any] =
MultiplicativeExpression ~
((("+" ~ MultiplicativeExpression) |
("-" ~ MultiplicativeExpression) |
NumericLiteralPositive |
NumericLiteralNegative)*)
// [53]
def MultiplicativeExpression: Parser[Any] =
UnaryExpression ~ ((("*" ~ UnaryExpression) | ("/" ~ UnaryExpression))*)
// [54]
def UnaryExpression: Parser[Any] =
"!" ~ PrimaryExpression |
"+" ~ PrimaryExpression |
"-" ~ PrimaryExpression |
PrimaryExpression
// [55]
def PrimaryExpression: Parser[Any] =
BrackettedExpression | BuiltInCall | IRIrefOrFunction | RDFLiteral | NumericLiteral | BooleanLiteral | Var
// [56]
def BrackettedExpression: Parser[Any] =
"(" ~> Expression <~ ")"
// [57]
def BuiltInCall: Parser[Any] =
"STR" ~ "(" ~> Expression <~ ")" |
"LANG" ~ "(" ~> Expression <~ ")" |
"LANGMATCHES" ~ "(" ~> Expression <~ "," ~> Expression <~ ")" |
"DATATYPE" ~ "(" ~> Expression <~ ")" |
"BOUND" ~ "(" ~> Var <~ ")" |
"sameTerm" ~ "(" ~> Expression <~ "," ~> Expression <~ ")" |
"isIRI" ~ "(" ~> Expression <~ ")" |
"isURI" ~ "(" ~> Expression <~ ")" |
"isBLANK" ~ "(" ~> Expression <~ ")" |
"isLITERAL" ~ "(" ~> Expression <~ ")" |
RegexExpression
// [58]
def RegexExpression: Parser[Any] =
"REGEX" ~ "(" ~ Expression <~ "," ~> Expression ~ (("," ~> Expression)?) ~ ")"
// [59]
def IRIrefOrFunction: Parser[Any] =
IRIref ~ (ArgList?)
// [60]
def RDFLiteral: Parser[Any] =
String ~ ((LANGTAG | ("^^" ~> IRIref))?)
// [61]
def NumericLiteral: Parser[Any] =
NumericLiteralUnsigned | NumericLiteralPositive | NumericLiteralNegative
// [62]
def NumericLiteralUnsigned: Parser[Any] =
INTEGER | DECIMAL | DOUBLE
// [63]
def NumericLiteralPositive: Parser[Any] =
INTEGER_POSITIVE | DECIMAL_POSITIVE | DOUBLE_POSITIVE
// [64]
def NumericLiteralNegative: Parser[Any] =
INTEGER_NEGATIVE | DECIMAL_NEGATIVE | DOUBLE_NEGATIVE
// [65]
def BooleanLiteral: Parser[Any] =
"true" | "false"
// [66]
def String: Parser[Any] =
STRING_LITERAL1 | STRING_LITERAL2 | STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2
// [67]
def IRIref: Parser[Any] =
IRI_REF | PrefixedName
// [68]
def PrefixedName: Parser[Any] =
PNAME_LN | PNAME_NS
// [69]
def BlankNode: Parser[Any] =
BLANK_NODE_LABEL | ANON
// [70]
def IRI_REF: Parser[Any] =
"<" ~> """([^<>"{}|^`\])*""".r <~ ">" // FIXME
// [71]
def PNAME_NS: Parser[Any] =
(PN_PREFIX?) <~ ":"
// [72]
def PNAME_LN: Parser[Any] =
PNAME_NS ~ PN_LOCAL
// [73]
def BLANK_NODE_LABEL: Parser[Any] =
"_:" ~> PN_LOCAL
// [74]
def VAR1: Parser[Symbol] =
"?" ~> VARNAME
// [75]
def VAR2: Parser[Symbol] =
"$" ~> VARNAME
// [76]
def LANGTAG: Parser[Any] =
"@" ~> "[a-zA-Z]+".r ~ (("-" ~ "[a-zA-Z0-9]+".r)*)
// [77]
def INTEGER: Parser[Int] =
"[0-9]+".r ^^ (_.toInt)
// [78]
def DECIMAL: Parser[Any] =
("[0-9]+".r ~ "." ~ "[0-9]*".r) | ("." ~ "[0-9]+".r)
// [79]
def DOUBLE: Parser[Any] =
("[0-9]+".r ~ "." ~ "[0-9]*".r ~ EXPONENT) | ("." ~ "([0-9])+".r ~ EXPONENT) | ("([0-9])+".r ~ EXPONENT)
// [80]
def INTEGER_POSITIVE: Parser[Int] =
"+" ~> INTEGER
// [81]
def DECIMAL_POSITIVE: Parser[Any] =
"+" ~> DECIMAL
// [82]
def DOUBLE_POSITIVE: Parser[Any] =
"+" ~> DOUBLE
// [83]
def INTEGER_NEGATIVE: Parser[Int] =
"-" ~> INTEGER ^^ (_ * -1)
// [84]
def DECIMAL_NEGATIVE: Parser[Any] =
"-" ~ DECIMAL // TODO
// [85]
def DOUBLE_NEGATIVE: Parser[Any] =
"-" ~ DOUBLE // TODO
// [86]
def EXPONENT: Parser[Any] =
"[eE]".r ~ "[+-]?".r ~ "[0-9]+".r
// [87]
def STRING_LITERAL1: Parser[String] =
"'" ~> TODO <~ "'"
// [88]
def STRING_LITERAL2: Parser[String] =
"\"" ~> TODO <~ "\""
// [89]
def STRING_LITERAL_LONG1: Parser[String] =
"'''" ~> TODO <~ "'''"
// [90]
def STRING_LITERAL_LONG2: Parser[String] =
"\"\"\"" ~> TODO <~ "\"\"\""
// [91]
def ECHAR: Parser[Any] =
"\\" ~ """[tbnrf\"']""".r
// [92]
def NIL: Parser[Any] =
"(" ~ (WS*) ~ ")"
// [93]
def WS: Parser[Any] =
"""\s+""".r // FIXME?
// [94]
def ANON: Parser[Any] =
"[" ~ (WS*) ~ "]"
// [95]
def PN_CHARS_BASE: Parser[Any] =
"[A-Z]".r | "[a-z]".r | TODO
// [96]
def PN_CHARS_U: Parser[Any] =
PN_CHARS_BASE | "_"
// [97]
def VARNAME: Parser[Symbol] =
"""[a-zA-Z_]\w*""".r ^^ (Symbol(_)) // FIXME
// [98]
def PN_CHARS: Parser[Any] =
PN_CHARS_U | "-" | "[0-9]".r | TODO
// [99]
def PN_PREFIX: Parser[Any] =
PN_CHARS_BASE ~ ((((PN_CHARS | ".")*) ~ PN_CHARS)?)
// [100]
def PN_LOCAL: Parser[Any] =
(PN_CHARS_U | "[0-9]".r) ~ ((((PN_CHARS | ".")*) ~ PN_CHARS)?)
def TODO: Parser[String] = "TODO".r
}
@danyaljj
Copy link

Nice work! Wish you'd finish this and document its usage.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment