Skip to content

Instantly share code, notes, and snippets.

@liyu1981
Last active December 26, 2015 20:29
Show Gist options
  • Save liyu1981/7209495 to your computer and use it in GitHub Desktop.
Save liyu1981/7209495 to your computer and use it in GitHub Desktop.
Simple regex implementation use DFA (http://swtch.com/~rsc/regexp/regexp1.html).
/*
Simple regex implementation use DFA (http://swtch.com/~rsc/regexp/regexp1.html).
It support following simple rules
1. '.' match all characters
2. '*' match 0+ occurrences, e.g., a*
3. '?' match 0 or 1 occurrences, e.g., a?
*/
function parse(re) {
var rl = re.length;
var s = [], m = s;
for (var i = 0; i < rl; ) {
if (((i + 1 < rl) && (re[i + 1] === '*')) ||
((i + 1 < rl) && (re[i + 1] === '?'))) {
if (re[i + 1] === '*') {
var e = { v: re[i], next: s };
s.push(e);
} else if (re[i + 1] === '?') {
var nexts = [];
var e1 = { v: re[i], next: nexts };
var e2 = { v: null, next: nexts };
s.push(e1);
s.push(e2);
s = nexts;
}
i += 2;
} else {
var nexts = [];
var e = { v: re[i], next: nexts };
s.push(e);
s = nexts;
i += 1;
}
}
return m;
}
function print(m) {
var s = m;
while (true) {
if (s.length <= 0) {
console.log('END []');
break;
} else {
console.log('STATE: ');
var n = null;
for (var i=0; i<s.length; ++i) {
if (s[i].next !== s) {
n = s[i].next;
}
console.log('e: { v: ', s[i].v, ', next: ', (s[i].next === s) ? 'self' : 'next', '}');
}
s = n;
}
}
}
function walk(s, c) {
//console.log('now', c, 's is', s);
if (s.length <= 0) {
return null;
} else {
for (var i=0; i<s.length; i++) {
if (s[i].v === c || s[i].v === '.' || s[i].v === null) {
//console.log('matched', s[i].v, s[i].next);
return s[i].next;
}
}
return null;
}
}
function match(m, str) {
var s = m;
var i = 0;
for (; i<str.length; ++i) {
var r = walk(s, str[i]);
if (r === null) {
return false;
}
s = r;
//console.log('next s is', s);
}
if (i < str.length || s.length > 0) {
return false;
}
return true;
}
console.log('pattern is', 'a*bc?d');
var m = parse('a*bc?d');
print(m);
console.log('match abc', match(m, 'abc'));
console.log('match bcd', match(m, 'bcd'));
console.log('match aabd', match(m, 'bcd'));
console.log('match bcde', match(m, 'bcd'));
console.log('match c', match(m, 'c'));
console.log('match d', match(m, 'd'));
@liyu1981
Copy link
Author

expected output:

[liyu@hk153 ~/DevCamp]$ node reg.js 
pattern is a*bc?d
STATE: 
e: { v:  a , next:  self }
e: { v:  b , next:  next }
STATE: 
e: { v:  c , next:  next }
e: { v:  null , next:  next }
STATE: 
e: { v:  d , next:  next }
END []
match abc false
match bcd true
match aabd true
match bcde true
match c false
match d false

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment