Skip to content

Instantly share code, notes, and snippets.

@deoxxa
Created November 6, 2016 22:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save deoxxa/95eb3561a4e04e51f764e43b769c0163 to your computer and use it in GitHub Desktop.
Save deoxxa/95eb3561a4e04e51f764e43b769c0163 to your computer and use it in GitHub Desktop.
// @flow
/* eslint-disable no-use-before-define */
const TOKEN_AND = 'TOKEN_AND';
const TOKEN_ATOM = 'TOKEN_ATOM';
const TOKEN_BANG = 'TOKEN_BANG';
const TOKEN_COLON = 'TOKEN_COLON';
const TOKEN_CURLY_CLOSE = 'TOKEN_CURLY_CLOSE';
const TOKEN_CURLY_OPEN = 'TOKEN_CURLY_OPEN';
const TOKEN_ERROR = 'TOKEN_ERROR';
const TOKEN_EXISTS = 'TOKEN_EXISTS';
const TOKEN_MINUS = 'TOKEN_MINUS';
const TOKEN_MISSING = 'TOKEN_MISSING';
const TOKEN_NUMBER = 'TOKEN_NUMBER';
const TOKEN_OR = 'TOKEN_OR';
const TOKEN_PAREN_CLOSE = 'TOKEN_PAREN_CLOSE';
const TOKEN_PAREN_OPEN = 'TOKEN_PAREN_OPEN';
const TOKEN_PLUS = 'TOKEN_PLUS';
const TOKEN_SQUARE_CLOSE = 'TOKEN_SQUARE_CLOSE';
const TOKEN_SQUARE_OPEN = 'TOKEN_SQUARE_OPEN';
const TOKEN_STAR = 'TOKEN_STAR';
const TOKEN_STRING = 'TOKEN_STRING';
const TOKEN_TILDE = 'TOKEN_TILDE';
const TOKEN_TO = 'TOKEN_TO';
const TOKEN_WS = 'TOKEN_WS';
const literals = {
'&&': TOKEN_AND,
'AND': TOKEN_AND,
'!': TOKEN_BANG,
':': TOKEN_COLON,
'}': TOKEN_CURLY_CLOSE,
'{': TOKEN_CURLY_OPEN,
'OR': TOKEN_OR,
'||': TOKEN_OR,
')': TOKEN_PAREN_CLOSE,
'(': TOKEN_PAREN_OPEN,
'+': TOKEN_PLUS,
']': TOKEN_SQUARE_CLOSE,
'[': TOKEN_SQUARE_OPEN,
'*': TOKEN_STAR,
'~': TOKEN_TILDE,
'TO': TOKEN_TO,
'_missing_': TOKEN_MISSING,
'_exists_': TOKEN_EXISTS,
};
type Token = { type: string, text: string, start: number, end: number };
type LexResult = { token: Token, consumed: number };
// limited to 1000 iterations in case there's an infinite loop bug
function lexAll(str: string): Array<Token> {
const arr = [];
let offset = 0;
for (let i = 0; i < 1000; i++) {
const res = lex(str, offset);
if (!res) {
break;
}
const { token, consumed } = res;
arr.push(token);
offset += consumed;
}
return arr;
}
function lex(str: string, offset: number): LexResult|null {
if (str.length === offset) {
return null;
}
switch (true) {
case /^\s/.test(str.substr(offset)):
return lexWhitespace(str, offset);
case /^"/.test(str.substr(offset)):
return lexString(str, offset);
case /^[-0123456789]/.test(str.substr(offset)):
return lexNumberOrMinus(str, offset);
case /^[A-Za-z_]/.test(str.substr(offset)):
return lexAtom(str, offset);
case !!literals[str[offset]]:
return {
token: {
type: literals[str[offset]],
text: str[offset],
start: offset,
end: offset + 1,
},
consumed: 1,
};
default:
return {
token: {
type: TOKEN_ERROR,
text: '',
start: offset,
end: offset,
},
consumed: str.length - offset,
};
}
}
function lexWhitespace(str: string, offset: number): LexResult {
const m = /^(\s+)/.exec(str.substr(offset));
return {
token: {
type: TOKEN_WS,
text: m[1],
start: offset,
end: offset + m[1].length,
},
consumed: m[1].length,
};
}
function lexString(str: string, offset: number): LexResult|null {
let i = 0;
if (str[offset + i] !== '"') {
return null;
}
i++;
for (; offset + i < str.length; i++) {
if (str[offset + i] === '\\') {
i++;
continue;
}
if (str[offset + i] === '"') {
break;
}
}
if (str[offset + i] !== '"') {
return {
token: {
type: TOKEN_ERROR,
text: '',
start: offset + i,
end: offset + i,
},
consumed: str.length - offset,
};
}
i++;
const raw = str.substr(offset, i);
let text = null;
try { text = JSON.parse(raw); } catch (e) { /* nothing */ }
return {
token: {
type: TOKEN_STRING,
text: String(text),
raw: raw,
start: offset,
end: offset + i,
},
consumed: i,
};
}
function lexNumberOrMinus(str: string, offset: number): LexResult {
const m = /^(-?([0-9]+(\.[0-9]+)?)?)/.exec(str.substr(offset));
if (m[1] === '-') {
return {
token: {
type: TOKEN_MINUS,
text: '-',
start: offset,
end: offset,
},
consumed: 1,
};
}
return {
token: {
type: TOKEN_NUMBER,
text: m[1],
start: offset,
end: offset + m[1].length,
},
consumed: m[1].length,
};
}
function lexAtom(str: string, offset: number): LexResult {
const m = /^[A-Za-z_][A-Za-z_0-9\.]*/.exec(str.substr(offset));
const text = m[0];
const type = literals[text] || TOKEN_ATOM;
return {
token: {
type: type,
text: text,
start: offset,
end: offset + text.length,
},
consumed: text.length,
};
}
export function tokenise(str: string) {
return lexAll(str);
}
function describeToken(t: ?Token): string {
if (!t) {
return 'EOF';
}
return `${t.type} @ ${t.start}`;
}
function failed(token, ...types) {
return new Error(`expected one of ${types.join(', ')}; got ${describeToken(token)}`);
}
function expect(token, ...types) {
if (!types.includes(token.type)) {
throw failed(token, ...types);
}
}
type ParseResult = {
ast: Node,
consumed: number,
};
type GroupNode = {
type: 'group',
nodes: Array<Node>,
};
type ExistenceNode = {
type: 'existence',
modifier: string,
target: string,
};
type TargetedNode = {
type: 'targeted',
target: string,
condition: Node,
};
type BareNode = {
type: 'bare',
text: string,
};
type StringNode = {
type: 'string',
text: string,
};
type RangeNode = {
type: 'range',
left: Token,
right: Token,
};
type BooleanNode = {
type: 'boolean',
operator: string,
left: Node,
right: Node,
};
type ImplicitGroupNode = {
type: 'implicitGroup',
left: Node,
right: Node,
};
type EmptyNode = {
type: 'empty',
};
export type Node
= GroupNode
| ExistenceNode
| TargetedNode
| BareNode
| StringNode
| RangeNode
| BooleanNode
| ImplicitGroupNode
| EmptyNode;
function parseGroup(tokens) {
const nodes: Array<Node> = [];
expect(tokens[0], TOKEN_PAREN_OPEN);
let i = 1;
let last = null;
for (; i < tokens.length; i++) {
last = parseImplicitGroup(tokens.slice(i), last, TOKEN_PAREN_CLOSE);
const { ast, consumed } = last;
nodes.push(ast);
i += consumed;
if (tokens[i].type === TOKEN_PAREN_CLOSE) {
break;
}
}
expect(tokens[i], TOKEN_PAREN_CLOSE);
return {
ast: {
type: 'group',
nodes,
},
consumed: i + 2,
};
}
function parseExistence(tokens) {
expect(tokens[0], TOKEN_EXISTS, TOKEN_MISSING);
expect(tokens[1], TOKEN_COLON);
expect(tokens[2], TOKEN_ATOM);
return {
ast: {
type: 'existence',
modifier: tokens[0].text,
target: tokens[2].text,
},
consumed: 3,
};
}
function parseTargeted(tokens): ParseResult {
expect(tokens[0], TOKEN_ATOM);
expect(tokens[1], TOKEN_COLON);
const condition = parseSingle(tokens.slice(2));
return {
ast: {
type: 'targeted',
target: tokens[0].text,
condition: condition.ast,
},
consumed: 2 + condition.consumed,
};
}
function parseBare(tokens): ParseResult {
expect(tokens[0], TOKEN_ATOM);
return {
ast: {
type: 'bare',
text: tokens[0].text,
},
consumed: 1,
};
}
function parseString(tokens): ParseResult {
expect(tokens[0], TOKEN_STRING);
return {
ast: {
type: 'string',
text: tokens[0].text,
},
consumed: 1,
};
}
function parseRange(tokens): ParseResult {
expect(tokens[0], TOKEN_SQUARE_OPEN);
expect(tokens[1], TOKEN_ATOM, TOKEN_NUMBER, TOKEN_STRING, TOKEN_STAR);
expect(tokens[2], TOKEN_TO);
expect(tokens[3], TOKEN_ATOM, TOKEN_NUMBER, TOKEN_STRING, TOKEN_STAR);
expect(tokens[4], TOKEN_SQUARE_CLOSE);
return {
ast: {
type: 'range',
left: tokens[1],
right: tokens[3],
},
consumed: 5,
};
}
function parseSingle(tokens): ParseResult {
switch (tokens[0].type) {
case TOKEN_ATOM:
return parseBare(tokens);
case TOKEN_STRING:
return parseString(tokens);
case TOKEN_SQUARE_OPEN:
return parseRange(tokens);
default:
throw failed(tokens[0], TOKEN_ATOM, TOKEN_STRING, TOKEN_SQUARE_OPEN);
}
}
function parseBooleanPair(tokens: Array<Token>, left: ParseResult, ignore: ?string): ParseResult {
const right = parseImplicitGroup(tokens.slice(1), null, ignore);
return {
ast: {
type: 'boolean',
operator: tokens[0].text,
left: left.ast,
right: right.ast,
},
consumed: left.consumed + 1 + right.consumed,
};
}
function parseImplicitGroup(tokens: Array<Token>, inputLeft: ParseResult|null, ignore: ?string): ParseResult {
let left = inputLeft;
if (tokens.length === 0 || tokens[0].type === ignore) {
if (left === null) {
return {
ast: { type: 'empty' },
consumed: 0,
};
}
return left;
}
let right: ?ParseResult = null;
switch (tokens[0].type) {
case TOKEN_EXISTS:
case TOKEN_MISSING:
right = parseExistence(tokens);
break;
case TOKEN_ATOM:
if (tokens.length > 1 && tokens[1].type === 'TOKEN_COLON') {
right = parseTargeted(tokens);
break;
} else {
right = parseBare(tokens);
break;
}
case TOKEN_STRING:
right = parseString(tokens);
break;
case TOKEN_PAREN_OPEN:
right = parseGroup(tokens);
break;
case TOKEN_AND:
case TOKEN_OR:
if (!left) {
throw new Error('boolean operator must be preceeded by another expression');
}
right = parseBooleanPair(tokens, left, ignore);
left = null;
break;
default:
throw failed(tokens[0], TOKEN_EXISTS, TOKEN_MISSING, TOKEN_ATOM, TOKEN_STRING, TOKEN_PAREN_OPEN);
}
if (!left) {
return parseImplicitGroup(tokens.slice(right.consumed), right, ignore);
}
return parseImplicitGroup(tokens.slice(right.consumed), {
ast: {
type: 'implicitGroup',
left: left.ast,
right: right.ast,
},
consumed: left.consumed + right.consumed,
}, ignore);
}
export function parse(str: string): Node {
return parseImplicitGroup(tokenise(str).filter((e) => e.type !== TOKEN_WS), null, null).ast;
}
export function format(node: Node): string {
switch (node.type) {
case 'group':
return [
'(',
...node.nodes.map(format).join('').split('\n').map((e) => ' ' + e),
')',
].join('\n');
case 'existence':
return `${node.modifier}:${node.target}`;
case 'targeted':
return `${node.target}:${format(node.condition)}`;
case 'bare':
return node.text;
case 'string':
return JSON.stringify(node.text);
case 'range':
return `[${node.left.text} TO ${node.right.text}]`;
case 'boolean':
return `${format(node.left)} ${node.operator} ${format(node.right)}`;
case 'implicitGroup':
return `${format(node.left)}\n${format(node.right)}`;
case 'emptyNode':
return '';
default: return '';
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment