Toy regular expression matcher in Lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "match" | |
test("Basic match", | |
function() | |
assert(("floob"):match("flo")) | |
assert(not ("floob"):match("nar")) | |
end) | |
test("Caret match", | |
function() | |
assert(("floob"):match("^flo")) | |
assert(not ("floob"):match("^ob")) | |
end) | |
test("Dollar match", | |
function() | |
assert(("floob"):match("ob$")) | |
assert(not ("floob"):match("fl$")) | |
end) | |
test("Star match", | |
function() | |
assert(("floob"):match("lo*b")) | |
assert(("flooooob"):match("floo*b")) | |
assert(("floob"):match("fla*")) | |
end) | |
test("Dot match", | |
function() | |
assert(("floob"):match("f...b")) | |
assert(("floob"):match("f.*b")) | |
assert(("floob"):match(".*b$")) | |
assert(not ("floobaaa"):match("fl.*c.*$")) | |
assert(("floobaaa"):match("fl.*b.*$")) | |
end) | |
test("Complicated matches", | |
function() | |
local s = " {Flerp = 5} " | |
local s2 = " {Flerp = 5}\n" | |
assert(s:match("^ *{ *.* *= *.*} *$")) | |
assert(s2:match("{.* = .*}")) | |
end) | |
test("Char classes", | |
function() | |
assert(("floob"):match("f[lo]*b")) | |
assert(not ("floob"):match("f[aeo]*b")) | |
assert(("10010110100"):match("^1[01]*0$")) | |
end) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function string.at(str, n) return str:sub(n, n) end | |
function string.match(str, pattern) | |
local matches, matchstar, matchhere, grabtoken | |
-- Returns whether a given char of input matches a single | |
-- pattern token: | |
matches = | |
function(ch, token) | |
if token.chars then | |
return token.chars:find(ch) | |
else | |
return token == "." or token == ch | |
end | |
end | |
grabtoken = | |
function(pattern) | |
local token, rest | |
if pattern:at(1) == "[" then -- A char class! | |
local close = pattern:find("]") | |
token = {chars = pattern:sub(2, close-1)} | |
rest = pattern:sub(close+1) | |
else -- Not a class | |
token = pattern:at(1) | |
rest = pattern:sub(2) | |
end | |
local modifier = rest:at(1) | |
if modifier == "*" then rest = rest:sub(2) end | |
return token, modifier, rest | |
end | |
-- Try to find a match starting at the beginning of the string | |
-- We'll call this for every character in the string until we get a match | |
matchhere = | |
function(str, pattern) | |
-- Everything matches an empty pattern | |
if pattern == "" then return true end | |
-- Try to match end-of-string | |
if pattern == "$" then return str == "" end | |
-- Get the character class we want to see at the start, the | |
-- character right afterwards (because it could be a star), | |
-- and the position of the portion of the regex after that. | |
-- We'll extend this later but right now this is just the first | |
-- two characters of the string and either 2 or 3, depending, | |
local first_token, modifier, rest = grabtoken(pattern) | |
if modifier == "*" then | |
-- This is a star match, we have a separate function for that: | |
return matchstar(str, first_token, rest) | |
elseif matches(str:at(1), first_token) then | |
-- Ordinary token, and it matches, move everything forward a char | |
return matchhere(str:sub(2), rest) | |
end | |
end | |
-- Called to deal with matching stars | |
matchstar = | |
function(str, token, rest) | |
while true do | |
-- Find a match for rest starting here | |
if matchhere(str, rest) then return true end | |
-- If this matches neither rest nor token, or | |
-- it's empty, then bail | |
if not matches(str:at(1), token) or str == "" then break end | |
str = str:sub(2) -- Next char | |
end | |
end | |
-- First, check for a caret, and just cut it off. If we're here then | |
-- we know we're at the start of the string | |
if pattern:at(1) == "^" then | |
return matchhere(str, pattern:sub(2)) | |
else | |
repeat | |
-- Try to find a match starting at every character | |
if matchhere(str, pattern) then return true end | |
str = str:sub(2) | |
until str == "" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment