Skip to content

Instantly share code, notes, and snippets.

@pygy
Created February 3, 2017 00:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pygy/4fa68245989585c22791de4b12a24bcc to your computer and use it in GitHub Desktop.
Save pygy/4fa68245989585c22791de4b12a24bcc to your computer and use it in GitHub Desktop.
var c = require("compose-regexp")
function normalize(matcher) {
if (typeof matcher === 'function') return matcher
if (({}).toString.call(matcher) === '[object Object]') return grammar(matchers)
var flags = 'g'
if (matcher instanceof RegExp) flags += matcher.flags.replace('g', '')
matcher = c.flags(flags,
c.either(
matcher,
/()/
)
)
return function(subject, index, state) {
matcher.lastIndex = index
var res = matcher.exec(subject)
if (res == null || res[res.length -1] === '') {
state.success = false
return index
} else {
state.success = true
return index + res[0].length
}
}
}
function sequence() {
var matchers = [].map.call(arguments, normalize)
return function(subject, index, state) {
var _index = index
state.success = true
for(var i = 0; i < matchers.length && state.success; i++) {
index = matchers[i](subject, index, state)
}
return state.success ? index : _index
}
}
function either() {
var matchers = [].map.call(arguments, normalize)
return function(subject, index, state) {
var _index = index
state.success = false
for(var i = 0; i < matchers.length; i++) {
index = matchers[i](subject, index, state)
if (state.success) return index
}
return _index
}
}
//positive lookahead
function lookFor(matcher) {
matcher = normalize(matcher)
return function(subject, index, state) {
matcher(subject, index, state)
return index
}
}
// negative lookahead
function not(matcher) {
matcher = normalize(matcher)
return function(subject, index, state) {
matcher(subject, index, state)
state.success = !state.success
return index
}
}
// ?
function maybe(matcher) {
matcher = normalize(matcher)
return function(subject, index, state) {
index = matcher(subject, index, state)
state.success = true
return index
}
}
// the Kleene star*
function zeroplus(matcher) {
matcher = normalize(matcher)
return function(subject, index, state) {
do {
index = matcher(subject, index, state)
} while (state.success)
state.success = true
return index
}
}
function grammar(entry, build) {
var refs = {}
var grm
function ref(name) {
if (!refs[name]) refs[name] = function(subject, index, state) {
return grm[name](subject, index, state)
}
return refs[name]
}
grm = build(ref)
for (var r in grm) if (grm.hasOwnProperty(r)) {
grm[r] = normalize(grm[r])
if (!refs.hasOwnProperty) console.warn('orphan rule '+JSON.stringify(entry))
}
for (r in refs) if (refs.hasOwnProperty(r) && !grm.hasOwnProperty(r)) throw new Error("invalid reference " + JSON.stringify(r))
if(!grm.hasOwnProperty(entry)) throw new Error("missing entry point " + JSON.stringify(entry))
return grm[entry]
}
function check(matcher, validate) {
matcher = normalize(matcher)
return function(subject, index, state) {
var _index = matcher(subject, index, state)
if (state.success) _index = validate(subject, index, _index, state)
return _index
}
}
// The parser itself
// lex
var lineComment = c.sequence('//', /.*\n/)
var blockComment = c.sequence('/*', /[\s\S]*?/, '*/')
var space = c.either(/\s+/, lineComment, blockComment)
var _ = c.greedy('*', space)
var __ = c.greedy('*', c.either(space, ';'))
var funcNames = c.either.apply(null, Object.keys(c))
var declKeywords = c.either('var', 'const', 'let')
var identifier = /[a-zA-Z_][\w]*/
var string1 = c.sequence(
"'",
c.greedy('*',
/\\[\S\s]|[^'\n]/
),
"'"
)
var string2 = c.sequence(
'"',
c.greedy('*',
/\\[\S\s]|[^"\n]/
),
'"'
)
var regexp = check(c.sequence(
'/',
c.frugal('+',
c.either(/\\\\[\s\S]/, /./)
),
'/', /[a-z]+/
),
function(subject, start, end, success) {
var err
try {
new RegExp(subject.slice(start + 1, end).replace(/\/[a-z]*$/, ''))
} catch (e) {
e.stack = e.message + "\n "// + findLineCol(subject, start)
throw e;
}
return end
}
)
var names
function init() {names = Object.assign({}, c)}
function register(subject, start, end, state) {
var name = subject.slice(start, end)
if (names.hasOwnProperty(name)) throw new Error("Attempt to redefine variable " + name)
names[name] = true
return end
}
function known(subject, start, end, state) {
var name = subject.slice(start, end)
if (!names.hasOwnProperty(name)) {
state.success = false;
return start
}
return end
}
// The grammar
var pseudojs = grammar('block', function(ref) {
return {
block:
sequence(zeroplus(ref('declaration')), ref('returnStatement'), __, /$/),
declaration:
sequence(__, declKeywords, space, ref('assignment'), zeroplus(sequence(_, ',', _, ref('assignment')))),
assignment:
sequence(_, check(identifier, register), _, '=', _, ref('expression')),
returnStatement:
sequence(__, 'return', _, ref('expression')),
expression:
sequence(_, either(
string1, string2, regexp,
sequence(funcNames, _, '(', maybe(ref('expressionList')), _, ')'),
check(identifier, known)
)),
expressionList:
sequence(
_, ref('expression'), zeroplus(sequence(_ , ',', _, ref('expression')))
)
}
})
// match attempt
var state = {success:false}
init()
console.log(pseudojs('var a = /i/g; let b =/*foo*/"c", c = either(a, greedy("*", b)); return c', 0, state))
console.log(state)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment