Skip to content

Instantly share code, notes, and snippets.

@rgchris
Last active December 6, 2020 19:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rgchris/f9f070c15afb64022d1361f191baa16c to your computer and use it in GitHub Desktop.
Save rgchris/f9f070c15afb64022d1361f191baa16c to your computer and use it in GitHub Desktop.
SQL Lexer
Statement ::= (Newline | Whitespace | '(' | ')' | ',' | '.' | Value | ';')*
Value ::= Comment-Line | Comment | String-Single | String-Double | Literal | Variable | Word | Number | Misc
Comment-Line ::= ('--' | '#') [^#xA#xD]*
Comment ::= '/*' ( [^*] | '*'+ [^*/] )* '*'* '*/'
String-Single ::= "'" ([^'\]+ | '\\' | "\'" | "''")* "'"
String-Double ::= '"' ([^"\]+ | '\\' | '\"' | '""')* '"'
Literal ::= '`' [^`]+ '`'
Variable ::= '@' '@'? ([a-z] | '.' | '_')+ /* include '$' here? */
Word ::= [A-Za-z] [0-9_A-Za-z]* /* include '$' here? */
Number ::= '-'? [0-9]+ ('.' [0-9]+)? ([Ee] '-'? [0-9]+)?
Misc ::= [^#x9#xA#xD "'(),.;`]+
Newline ::= #xA | #xD #xA?
Whitespace ::= (#x9 | #x20)+
Rebol [
Title: "SQL Lexer"
Date: 24-Jul-2020
Author: "Christopher Ross-Gill"
]
lexer: make object! [
whitespace: charset " ^-"
delimiters: charset {"'(),.;`}
chars: complement union union whitespace delimiters charset "^/^M"
digit: charset "0123456789"
lower-alpha: charset [#"a" - #"z"]
upper-alpha: charset [#"A" - #"Z"]
alpha: union lower-alpha upper-alpha
word-literal: complement charset "`"
comment-line: complement charset "^/^M"
comment: complement charset "*"
quoted-single: complement charset {'\}
quoted-double: complement charset {"\}
type: _
part: _
delimiter: ";"
value: [
"--" any comment-line
(type: 'comment-line)
|
"/*" any [some comment | #"*" not #"/"] "*/"
(type: 'comment)
|
#"#" any comment-line ; Used by MySQL
(type: 'comment-hash)
|
#"'" any [some quoted-single | "\\" | "\'" | "''"] #"'"
(type: 'string-single)
|
#"^"" any [some quoted-double | "\\" | {\"} | {""}] #"^""
(type: 'string-double)
|
#"`" some word-literal #"`"
(type: 'literal)
|
#"@" opt #"@" some [some lower-alpha | #"." | #"_"] ; include "$" here?
(type: 'variable)
|
alpha any [some alpha | digit | #"_"] ; include #"$" here?
(type: 'word)
|
opt #"-" some digit
opt ["." some digit]
opt [[#"e" | #"E"] opt #"-" some digit]
(type: 'number)
|
copy part some chars ; misc
(
type: switch part [
"*" ['star]
"+" "-" "/" ['math]
"=" "!=" ">=" ">" "<=" "<>" "<" ['operator]
default ['other]
]
)
]
statement: [
any [
#"^/" | change [#"^M" opt #"^/"] #"^/"
|
change copy part some whitespace (detab part)
|
delimiter
|
#"(" | #")"
|
#"," | #"."
|
change copy part value (rejoin ["«" form type "»«" part "»"])
|
end
|
(print "SKIP: Should Not Happen") ?? skip
]
]
]
parse-sql: func [
statement [text!]
<local> is-sql
][
assert [
all [
is-sql: parse/case copy statement lexer/statement
tail? is-sql
]
]
head is-sql
]
probe parse-sql {Select * From `something` Where x < 10; @Foo @bar -- Test}
Red [
Title: "SQL Lexer"
Date: 24-Jul-2020
Author: "Christopher Ross-Gill"
]
lexer: make object! [
whitespace: charset " ^-"
delimiters: charset {"'(),.;`}
chars: complement union union whitespace delimiters charset "^/^M"
digit: charset "0123456789"
lower-alpha: charset [#"a" - #"z"]
upper-alpha: charset [#"A" - #"Z"]
alpha: union lower-alpha upper-alpha
word-literal: complement charset "`"
comment-line: complement charset "^/^M"
comment: complement charset "*"
quoted-single: complement charset {'\}
quoted-double: complement charset {"\}
type: none
part: none
delimiter: ";"
value: [
"--" any comment-line
(type: 'comment-line)
|
"/*" any [some comment | #"*" not #"/"] "*/"
(type: 'comment)
|
#"#" any comment-line ; Used by MySQL
(type: 'comment-hash)
|
#"'" any [some quoted-single | "\\" | "\'" | "''"] #"'"
(type: 'string-single)
|
#"^"" any [some quoted-double | "\\" | {\"} | {""}] #"^""
(type: 'string-double)
|
#"`" some word-literal #"`"
(type: 'literal)
|
#"@" opt #"@" some [some lower-alpha | #"." | #"_"] ; include "$" here?
(type: 'variable)
|
alpha any [some alpha | digit | #"_"] ; include #"$" here?
(type: 'word)
|
opt #"-" some digit
opt ["." some digit]
opt [[#"e" | #"E"] opt #"-" some digit]
(type: 'number)
|
copy part some chars ; misc
(
type: switch part [
"*" ['star]
"+" "-" "/" ['math]
"=" "!=" ">=" ">" "<=" "<>" "<" ['operator]
default ['other]
]
)
]
statement: [
any [
#"^/" | change [#"^M" opt #"^/"] #"^/"
|
change copy part some whitespace (detab part)
|
delimiter
|
#"(" | #")"
|
#"," | #"."
|
change copy part value (rejoin ["«" form any [type '_] "»«" part "»"])
|
end
|
(print "SKIP: Should Not Happen") ?? skip
]
]
]
parse-sql: func [
statement [string!]
][
all [
parse/case statement: copy statement lexer/statement
statement
]
]
probe parse-sql {Select * From `something` Where x < 10; @Foo @bar -- Test}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment