Skip to content

Instantly share code, notes, and snippets.

@randrews
Created June 10, 2011 06:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save randrews/1018317 to your computer and use it in GitHub Desktop.
Save randrews/1018317 to your computer and use it in GitHub Desktop.
Toy regular expression matcher in Lua
require "match"
test("Basic match",
function()
assert(("floob"):match("flo"))
assert(not ("floob"):match("nar"))
end)
test("Caret match",
function()
assert(("floob"):match("^flo"))
assert(not ("floob"):match("^ob"))
end)
test("Dollar match",
function()
assert(("floob"):match("ob$"))
assert(not ("floob"):match("fl$"))
end)
test("Star match",
function()
assert(("floob"):match("lo*b"))
assert(("flooooob"):match("floo*b"))
assert(("floob"):match("fla*"))
end)
test("Dot match",
function()
assert(("floob"):match("f...b"))
assert(("floob"):match("f.*b"))
assert(("floob"):match(".*b$"))
assert(not ("floobaaa"):match("fl.*c.*$"))
assert(("floobaaa"):match("fl.*b.*$"))
end)
test("Complicated matches",
function()
local s = " {Flerp = 5} "
local s2 = " {Flerp = 5}\n"
assert(s:match("^ *{ *.* *= *.*} *$"))
assert(s2:match("{.* = .*}"))
end)
test("Char classes",
function()
assert(("floob"):match("f[lo]*b"))
assert(not ("floob"):match("f[aeo]*b"))
assert(("10010110100"):match("^1[01]*0$"))
end)
function string.at(str, n) return str:sub(n, n) end
function string.match(str, pattern)
local matches, matchstar, matchhere, grabtoken
-- Returns whether a given char of input matches a single
-- pattern token:
matches =
function(ch, token)
if token.chars then
return token.chars:find(ch)
else
return token == "." or token == ch
end
end
grabtoken =
function(pattern)
local token, rest
if pattern:at(1) == "[" then -- A char class!
local close = pattern:find("]")
token = {chars = pattern:sub(2, close-1)}
rest = pattern:sub(close+1)
else -- Not a class
token = pattern:at(1)
rest = pattern:sub(2)
end
local modifier = rest:at(1)
if modifier == "*" then rest = rest:sub(2) end
return token, modifier, rest
end
-- Try to find a match starting at the beginning of the string
-- We'll call this for every character in the string until we get a match
matchhere =
function(str, pattern)
-- Everything matches an empty pattern
if pattern == "" then return true end
-- Try to match end-of-string
if pattern == "$" then return str == "" end
-- Get the character class we want to see at the start, the
-- character right afterwards (because it could be a star),
-- and the position of the portion of the regex after that.
-- We'll extend this later but right now this is just the first
-- two characters of the string and either 2 or 3, depending,
local first_token, modifier, rest = grabtoken(pattern)
if modifier == "*" then
-- This is a star match, we have a separate function for that:
return matchstar(str, first_token, rest)
elseif matches(str:at(1), first_token) then
-- Ordinary token, and it matches, move everything forward a char
return matchhere(str:sub(2), rest)
end
end
-- Called to deal with matching stars
matchstar =
function(str, token, rest)
while true do
-- Find a match for rest starting here
if matchhere(str, rest) then return true end
-- If this matches neither rest nor token, or
-- it's empty, then bail
if not matches(str:at(1), token) or str == "" then break end
str = str:sub(2) -- Next char
end
end
-- First, check for a caret, and just cut it off. If we're here then
-- we know we're at the start of the string
if pattern:at(1) == "^" then
return matchhere(str, pattern:sub(2))
else
repeat
-- Try to find a match starting at every character
if matchhere(str, pattern) then return true end
str = str:sub(2)
until str == ""
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment