Created
February 3, 2017 00:47
-
-
Save pygy/4fa68245989585c22791de4b12a24bcc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var c = require("compose-regexp") | |
function normalize(matcher) { | |
if (typeof matcher === 'function') return matcher | |
if (({}).toString.call(matcher) === '[object Object]') return grammar(matchers) | |
var flags = 'g' | |
if (matcher instanceof RegExp) flags += matcher.flags.replace('g', '') | |
matcher = c.flags(flags, | |
c.either( | |
matcher, | |
/()/ | |
) | |
) | |
return function(subject, index, state) { | |
matcher.lastIndex = index | |
var res = matcher.exec(subject) | |
if (res == null || res[res.length -1] === '') { | |
state.success = false | |
return index | |
} else { | |
state.success = true | |
return index + res[0].length | |
} | |
} | |
} | |
function sequence() { | |
var matchers = [].map.call(arguments, normalize) | |
return function(subject, index, state) { | |
var _index = index | |
state.success = true | |
for(var i = 0; i < matchers.length && state.success; i++) { | |
index = matchers[i](subject, index, state) | |
} | |
return state.success ? index : _index | |
} | |
} | |
function either() { | |
var matchers = [].map.call(arguments, normalize) | |
return function(subject, index, state) { | |
var _index = index | |
state.success = false | |
for(var i = 0; i < matchers.length; i++) { | |
index = matchers[i](subject, index, state) | |
if (state.success) return index | |
} | |
return _index | |
} | |
} | |
//positive lookahead | |
function lookFor(matcher) { | |
matcher = normalize(matcher) | |
return function(subject, index, state) { | |
matcher(subject, index, state) | |
return index | |
} | |
} | |
// negative lookahead | |
function not(matcher) { | |
matcher = normalize(matcher) | |
return function(subject, index, state) { | |
matcher(subject, index, state) | |
state.success = !state.success | |
return index | |
} | |
} | |
// ? | |
function maybe(matcher) { | |
matcher = normalize(matcher) | |
return function(subject, index, state) { | |
index = matcher(subject, index, state) | |
state.success = true | |
return index | |
} | |
} | |
// the Kleene star* | |
function zeroplus(matcher) { | |
matcher = normalize(matcher) | |
return function(subject, index, state) { | |
do { | |
index = matcher(subject, index, state) | |
} while (state.success) | |
state.success = true | |
return index | |
} | |
} | |
function grammar(entry, build) { | |
var refs = {} | |
var grm | |
function ref(name) { | |
if (!refs[name]) refs[name] = function(subject, index, state) { | |
return grm[name](subject, index, state) | |
} | |
return refs[name] | |
} | |
grm = build(ref) | |
for (var r in grm) if (grm.hasOwnProperty(r)) { | |
grm[r] = normalize(grm[r]) | |
if (!refs.hasOwnProperty) console.warn('orphan rule '+JSON.stringify(entry)) | |
} | |
for (r in refs) if (refs.hasOwnProperty(r) && !grm.hasOwnProperty(r)) throw new Error("invalid reference " + JSON.stringify(r)) | |
if(!grm.hasOwnProperty(entry)) throw new Error("missing entry point " + JSON.stringify(entry)) | |
return grm[entry] | |
} | |
function check(matcher, validate) { | |
matcher = normalize(matcher) | |
return function(subject, index, state) { | |
var _index = matcher(subject, index, state) | |
if (state.success) _index = validate(subject, index, _index, state) | |
return _index | |
} | |
} | |
// The parser itself | |
// lex | |
var lineComment = c.sequence('//', /.*\n/) | |
var blockComment = c.sequence('/*', /[\s\S]*?/, '*/') | |
var space = c.either(/\s+/, lineComment, blockComment) | |
var _ = c.greedy('*', space) | |
var __ = c.greedy('*', c.either(space, ';')) | |
var funcNames = c.either.apply(null, Object.keys(c)) | |
var declKeywords = c.either('var', 'const', 'let') | |
var identifier = /[a-zA-Z_][\w]*/ | |
var string1 = c.sequence( | |
"'", | |
c.greedy('*', | |
/\\[\S\s]|[^'\n]/ | |
), | |
"'" | |
) | |
var string2 = c.sequence( | |
'"', | |
c.greedy('*', | |
/\\[\S\s]|[^"\n]/ | |
), | |
'"' | |
) | |
var regexp = check(c.sequence( | |
'/', | |
c.frugal('+', | |
c.either(/\\\\[\s\S]/, /./) | |
), | |
'/', /[a-z]+/ | |
), | |
function(subject, start, end, success) { | |
var err | |
try { | |
new RegExp(subject.slice(start + 1, end).replace(/\/[a-z]*$/, '')) | |
} catch (e) { | |
e.stack = e.message + "\n "// + findLineCol(subject, start) | |
throw e; | |
} | |
return end | |
} | |
) | |
var names | |
function init() {names = Object.assign({}, c)} | |
function register(subject, start, end, state) { | |
var name = subject.slice(start, end) | |
if (names.hasOwnProperty(name)) throw new Error("Attempt to redefine variable " + name) | |
names[name] = true | |
return end | |
} | |
function known(subject, start, end, state) { | |
var name = subject.slice(start, end) | |
if (!names.hasOwnProperty(name)) { | |
state.success = false; | |
return start | |
} | |
return end | |
} | |
// The grammar | |
var pseudojs = grammar('block', function(ref) { | |
return { | |
block: | |
sequence(zeroplus(ref('declaration')), ref('returnStatement'), __, /$/), | |
declaration: | |
sequence(__, declKeywords, space, ref('assignment'), zeroplus(sequence(_, ',', _, ref('assignment')))), | |
assignment: | |
sequence(_, check(identifier, register), _, '=', _, ref('expression')), | |
returnStatement: | |
sequence(__, 'return', _, ref('expression')), | |
expression: | |
sequence(_, either( | |
string1, string2, regexp, | |
sequence(funcNames, _, '(', maybe(ref('expressionList')), _, ')'), | |
check(identifier, known) | |
)), | |
expressionList: | |
sequence( | |
_, ref('expression'), zeroplus(sequence(_ , ',', _, ref('expression'))) | |
) | |
} | |
}) | |
// match attempt | |
var state = {success:false} | |
init() | |
console.log(pseudojs('var a = /i/g; let b =/*foo*/"c", c = either(a, greedy("*", b)); return c', 0, state)) | |
console.log(state) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment