Skip to content

Instantly share code, notes, and snippets.

@stt
Created October 18, 2011 21:15
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stt/1296753 to your computer and use it in GitHub Desktop.
Save stt/1296753 to your computer and use it in GitHub Desktop.
Jison rules for turning boolean expressions into SQL clauses
/*
* Jison rules for parsing simple boolean expressions
* (c)2011, <samuli@tuomola.net>
*
* Usage: npm install jison && jison sqlaus.jison && node test-sqlaus.js
*
* Meant primarily for generating SQL clauses for m:m table structure.
*
* Depends on shared scope to provide..
* functions:
* yy.join (takes an array and returns a string, for quoting),
* yy.synonyms (takes a string and returns an array, for alternative tags)
* strings:
* pregroup, postgroup, prefix, and_suffix, or_suffix and likecol.
* (If and_suffix contains ?-character it's replaced with number of values.)
*
* Note: Tags can only appear within groups and logic operators exclude other
* operators from appearing in the same group, so (fast&car|lambo) is illegal.
* Also nested groups are not allowed.
*
* Example 1: we want videos about sport cars, only some have tags with models
* while others don't, so we could search.. ( fast & car )|( ferrari | lambo )
*
* Example 2: with a backend that knows about synonymous tags more abstract
* searches can be made with tilde, e.g (~bike) can find bicycle and cycle
*
* Example 3: string surrounded with single-quotes is interpreted to mean a
* LIKE-query which are handled as their own group (meaning they can't be mixed
* in tag group) and queries against a separate column specified by yy.likecol
* So "(bike) & (!'%trick%')" could search for bike tags w/o "trick" in title,
* or "('%formula%' | '%race%')" to look for texts containing alternate words.
* Note: Spaces are preserved within single-quotes. All but single-quote within
* the search text is allowed, to search for single-quote provide it twice.
* Hint: likecol could be a combination of columns e.g. "title||description".
*/
/* grammar */
%lex
%%
\'(\'\'|[^'])+\' return 'LIKE';
\s+ /* skip whitespace */
[-a-zA-Z0-9]+ return 'TAG';
"&" return '&';
"|" return '|';
"(" return '(';
")" return ')';
"!" return '!';
"~" return '~';
<<EOF>> return 'EOF';
. return 'FAIL';
/lex
%start query
/* language */
%%
query
: expr
| expr EOF { return $expr; }
| EOF { return null; }
;
andtags
: tags '&' tags { $$ = $1; $$ = $$.concat($3); }
| andtags '&' tags { $$ = $andtags; $$ = $$.concat($tags); }
;
ortags
: tags '|' tags { $$ = $1; $$ = $$.concat($3); }
| ortags '|' tags { $$ = $ortags; $$ = $$.concat($tags); }
;
tags
: TAG { $$ = [$TAG]; }
| '~' TAG { $$ = yy.synonyms($TAG); }
;
likes
: like '&' like { $$ = $1 + " AND " + $3; }
| likes '&' like { $$ = $1 + " AND " + $3; }
| like '|' like { $$ = $1 + " OR " + $3; }
| likes '|' like { $$ = $1 + " OR " + $3; }
;
like
: LIKE { $$ = yy.likecol +" LIKE "+$LIKE; }
| '!' LIKE { $$ = yy.likecol +" NOT LIKE "+$LIKE; }
;
// different types of groups
group
: '(' andtags ')' {
$$ = yy.prefix +" "+ yy.join($andtags);
if($andtags.length > 1)
$$ += yy.and_suffix.replace('?', $andtags.length);
}
| '(' ortags ')' { $$ = yy.prefix +" "+ yy.join($ortags) + yy.or_suffix; }
| '(' tags ')' { $$ = yy.prefix +" "+ yy.join($tags) + yy.or_suffix; }
;
// wrap groups with SQL provided in shared scope
wrap
: group { $$ = yy.pregroup +" "+ $group + yy.postgroup; }
| '!' group { $$ = yy.pregroup +" NOT "+ $group + yy.postgroup; }
| '(' likes ')' { $$ = "("+ $likes + ")"; }
| '(' like ')' { $$ = $like; }
;
// relations between groups
expr
: wrap
| wrap '&' wrap { $$ = $1 +" AND "+ $3; }
| wrap '|' wrap { $$ = $1 +" OR "+ $3; }
;
var parser = require('./sqlaus').parser;
var assert = require("assert");
/*
-- .schema for an sqlite db that could be used with the queries generated in the tests
CREATE TABLE video (id integer primary key autoincrement, title text, url text unique);
CREATE TABLE video_tag (video_id references video(id), tag references tag(tag), primary key (video_id,tag));
CREATE TABLE tag (tag text primary key);
CREATE TABLE synonym (id integer, tag references tag(tag), primary key (id,tag));
*/
parser.yy = {
join: function(r) { return "IN ('"+ r.join("','") +"')"; },
synonyms: function(t) {
// in real world this would be db backed
var b = ['bike','bicycle','cycle'];
var c = ['car','ride','automobile'];
if(b.indexOf(t) >= 0) return b;
if(c.indexOf(t) >= 0) return c;
return [t];
},
pregroup: "id",
prefix: "IN (select video_id from video_tag where tag",
and_suffix: " group by video_id having count(distinct tag)=?",
or_suffix: "",
postgroup: ")",
likecol: "title"
};
exports["test: (and) and not(or)"] = function () {
var result = parser.parse("(a&b&c) & !(c|d)");
var expected = "id IN (select video_id from video_tag where tag IN ('a','b','c') "+
"group by video_id having count(distinct tag)=3) AND "+
"id NOT IN (select video_id from video_tag where tag IN ('c','d'))";
assert.equal(result, expected, "Damn");
}
exports["test: (or synonym)"] = function () {
var result = parser.parse("(~trike | ~bike)");
var expected = "id IN (select video_id from video_tag where tag IN ('trike','bike','bicycle','cycle'))";
assert.equal(result, expected, "Damn");
}
exports["test: (like)"] = function () {
var result = parser.parse("('bla%bla')");
var expected = "title LIKE 'bla%bla'";
assert.equal(result, expected, "Damn");
}
exports["test like: special chars & preserve whitespace"] = function () {
var result = parser.parse("('bla%blö\n €!' & !'f o o')");
var expected = "(title LIKE 'bla%blö\n €!' AND title NOT LIKE 'f o o')";
assert.equal(result, expected, "Damn");
}
exports["test like: escaped quotes"] = function () {
var result = parser.parse("('tim o''reilly')|(tag)");
var expected = "title LIKE 'tim o''reilly' OR id IN (select video_id from video_tag where tag IN ('tag'))";
assert.equal(result, expected, "Damn");
}
if(require.main === module) {
require("test").run(exports);
//console.log(parser.parse(process.argv[2]));
}
@stt
Copy link
Author

stt commented Oct 18, 2011

At first I was like..

function tagAnd(tags) {
var andSql = tagOr(tags).replace(/)$/, " group by video_id having count(distinct tag)=?)");
return andSql.replace("?", tags.length);
}
function tagOr(tags) {
var orSql = "(select video_id from video_tag vt where tag IN (?))";
return orSql.replace("?","'"+tags.join("','")+"'");
}
function handleAjax(req, res) {
var qstr = url.parse(req.url, true).query['q'];
var tagre = /([a-z-0-9]+)/gi;
var tags = qstr.match(tagre);
var tagSql = (/&/.test(qstr) ? tagAnd(tags) : tagOr(tags));
// ...
}

But then I serious'd. :)

@stt
Copy link
Author

stt commented Nov 4, 2011

Added support for LIKE conditions and renamed the scripts from tagsql to sqlaus (since it's no more just about tags).
In case there's going to be some opensource project based on this parser I'll probably move these to a real repository.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment