Created
October 18, 2011 21:15
-
-
Save stt/1296753 to your computer and use it in GitHub Desktop.
Jison rules for turning boolean expressions into SQL clauses
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Jison rules for parsing simple boolean expressions | |
* (c)2011, <samuli@tuomola.net> | |
* | |
* Usage: npm install jison && jison sqlaus.jison && node test-sqlaus.js | |
* | |
* Meant primarily for generating SQL clauses for m:m table structure. | |
* | |
* Depends on shared scope to provide.. | |
* functions: | |
* yy.join (takes an array and returns a string, for quoting), | |
* yy.synonyms (takes a string and returns an array, for alternative tags) | |
* strings: | |
* pregroup, postgroup, prefix, and_suffix, or_suffix and likecol. | |
* (If and_suffix contains ?-character it's replaced with number of values.) | |
* | |
* Note: Tags can only appear within groups and logic operators exclude other | |
* operators from appearing in the same group, so (fast&car|lambo) is illegal. | |
* Also nested groups are not allowed. | |
* | |
* Example 1: we want videos about sport cars, only some have tags with models | |
* while others don't, so we could search.. ( fast & car )|( ferrari | lambo ) | |
* | |
* Example 2: with a backend that knows about synonymous tags more abstract | |
* searches can be made with tilde, e.g (~bike) can find bicycle and cycle | |
* | |
* Example 3: string surrounded with single-quotes is interpreted to mean a | |
* LIKE-query which are handled as their own group (meaning they can't be mixed | |
* in tag group) and queries against a separate column specified by yy.likecol | |
* So "(bike) & (!'%trick%')" could search for bike tags w/o "trick" in title, | |
* or "('%formula%' | '%race%')" to look for texts containing alternate words. | |
* Note: Spaces are preserved within single-quotes. All but single-quote within | |
* the search text is allowed, to search for single-quote provide it twice. | |
* Hint: likecol could be a combination of columns e.g. "title||description". | |
*/ | |
/* grammar */ | |
%lex | |
%% | |
\'(\'\'|[^'])+\' return 'LIKE'; | |
\s+ /* skip whitespace */ | |
[-a-zA-Z0-9]+ return 'TAG'; | |
"&" return '&'; | |
"|" return '|'; | |
"(" return '('; | |
")" return ')'; | |
"!" return '!'; | |
"~" return '~'; | |
<<EOF>> return 'EOF'; | |
. return 'FAIL'; | |
/lex | |
%start query | |
/* language */ | |
%% | |
query | |
: expr | |
| expr EOF { return $expr; } | |
| EOF { return null; } | |
; | |
andtags | |
: tags '&' tags { $$ = $1; $$ = $$.concat($3); } | |
| andtags '&' tags { $$ = $andtags; $$ = $$.concat($tags); } | |
; | |
ortags | |
: tags '|' tags { $$ = $1; $$ = $$.concat($3); } | |
| ortags '|' tags { $$ = $ortags; $$ = $$.concat($tags); } | |
; | |
tags | |
: TAG { $$ = [$TAG]; } | |
| '~' TAG { $$ = yy.synonyms($TAG); } | |
; | |
likes | |
: like '&' like { $$ = $1 + " AND " + $3; } | |
| likes '&' like { $$ = $1 + " AND " + $3; } | |
| like '|' like { $$ = $1 + " OR " + $3; } | |
| likes '|' like { $$ = $1 + " OR " + $3; } | |
; | |
like | |
: LIKE { $$ = yy.likecol +" LIKE "+$LIKE; } | |
| '!' LIKE { $$ = yy.likecol +" NOT LIKE "+$LIKE; } | |
; | |
// different types of groups | |
group | |
: '(' andtags ')' { | |
$$ = yy.prefix +" "+ yy.join($andtags); | |
if($andtags.length > 1) | |
$$ += yy.and_suffix.replace('?', $andtags.length); | |
} | |
| '(' ortags ')' { $$ = yy.prefix +" "+ yy.join($ortags) + yy.or_suffix; } | |
| '(' tags ')' { $$ = yy.prefix +" "+ yy.join($tags) + yy.or_suffix; } | |
; | |
// wrap groups with SQL provided in shared scope | |
wrap | |
: group { $$ = yy.pregroup +" "+ $group + yy.postgroup; } | |
| '!' group { $$ = yy.pregroup +" NOT "+ $group + yy.postgroup; } | |
| '(' likes ')' { $$ = "("+ $likes + ")"; } | |
| '(' like ')' { $$ = $like; } | |
; | |
// relations between groups | |
expr | |
: wrap | |
| wrap '&' wrap { $$ = $1 +" AND "+ $3; } | |
| wrap '|' wrap { $$ = $1 +" OR "+ $3; } | |
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var parser = require('./sqlaus').parser; | |
var assert = require("assert"); | |
/* | |
-- .schema for an sqlite db that could be used with the queries generated in the tests | |
CREATE TABLE video (id integer primary key autoincrement, title text, url text unique); | |
CREATE TABLE video_tag (video_id references video(id), tag references tag(tag), primary key (video_id,tag)); | |
CREATE TABLE tag (tag text primary key); | |
CREATE TABLE synonym (id integer, tag references tag(tag), primary key (id,tag)); | |
*/ | |
parser.yy = { | |
join: function(r) { return "IN ('"+ r.join("','") +"')"; }, | |
synonyms: function(t) { | |
// in real world this would be db backed | |
var b = ['bike','bicycle','cycle']; | |
var c = ['car','ride','automobile']; | |
if(b.indexOf(t) >= 0) return b; | |
if(c.indexOf(t) >= 0) return c; | |
return [t]; | |
}, | |
pregroup: "id", | |
prefix: "IN (select video_id from video_tag where tag", | |
and_suffix: " group by video_id having count(distinct tag)=?", | |
or_suffix: "", | |
postgroup: ")", | |
likecol: "title" | |
}; | |
exports["test: (and) and not(or)"] = function () { | |
var result = parser.parse("(a&b&c) & !(c|d)"); | |
var expected = "id IN (select video_id from video_tag where tag IN ('a','b','c') "+ | |
"group by video_id having count(distinct tag)=3) AND "+ | |
"id NOT IN (select video_id from video_tag where tag IN ('c','d'))"; | |
assert.equal(result, expected, "Damn"); | |
} | |
exports["test: (or synonym)"] = function () { | |
var result = parser.parse("(~trike | ~bike)"); | |
var expected = "id IN (select video_id from video_tag where tag IN ('trike','bike','bicycle','cycle'))"; | |
assert.equal(result, expected, "Damn"); | |
} | |
exports["test: (like)"] = function () { | |
var result = parser.parse("('bla%bla')"); | |
var expected = "title LIKE 'bla%bla'"; | |
assert.equal(result, expected, "Damn"); | |
} | |
exports["test like: special chars & preserve whitespace"] = function () { | |
var result = parser.parse("('bla%blö\n €!' & !'f o o')"); | |
var expected = "(title LIKE 'bla%blö\n €!' AND title NOT LIKE 'f o o')"; | |
assert.equal(result, expected, "Damn"); | |
} | |
exports["test like: escaped quotes"] = function () { | |
var result = parser.parse("('tim o''reilly')|(tag)"); | |
var expected = "title LIKE 'tim o''reilly' OR id IN (select video_id from video_tag where tag IN ('tag'))"; | |
assert.equal(result, expected, "Damn"); | |
} | |
if(require.main === module) { | |
require("test").run(exports); | |
//console.log(parser.parse(process.argv[2])); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Added support for LIKE conditions and renamed the scripts from tagsql to sqlaus (since it's no more just about tags).
In case there's going to be some opensource project based on this parser I'll probably move these to a real repository.