Created
April 14, 2014 17:45
-
-
Save luiseduardohd/10668729 to your computer and use it in GitHub Desktop.
LuaRegex A True Regular Expression for Lua http://apostata.web.fc2.com/luaregex/index-en.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- luaregex.lua ver.130911 | |
-- A true, python-like regular expression for Lua | |
-- | |
-- Usage: | |
-- local re = dofile("luaregex.lua") | |
-- local regex = re.compile("\\w+") | |
-- for match in regex:finditer("Hello, World!") do | |
-- print(match:group(0)) | |
-- end | |
-- | |
-- If you find bugs, report them to omawarisan.bokudesu _AT_ live.jp. | |
-- | |
-- The author releases this script in the public domain, | |
-- but he would appreciate your mercy if you remove or change the e-mail address above | |
-- when you publish some modified version of this script. | |
--[[ | |
or-exp: | |
pair-exp | |
or-exp "|" pair-exp | |
pair-exp: | |
repeat-exp_opt | |
pair-exp repeat-exp | |
repeat-exp: | |
primary-exp | |
repeat-exp repeater | |
repeat-exp repeater "?" | |
primary-exp: | |
"(?:" or-exp ")" | |
"(?P<" identifier ">" or-exp ")" | |
"(?P=" name ")" | |
"(?=" or-exp ")" | |
"(?!" or-exp ")" | |
"(?<=" or-exp ")" | |
"(?<!" or-exp ")" | |
"(?(" name ")" pair-exp "|" pair-exp ")" | |
"(?(" name ")" pair-exp ")" | |
"(" or-exp ")" | |
char-class | |
non-terminal | |
terminal-str | |
repeater: | |
"*" | |
"+" | |
"?" | |
"{" number_opt "," number_opt "}" | |
"{" number "}" | |
char-class: | |
"[^" user-char-class "]" | |
"[" user-char-class "]" | |
user-char-class: | |
user-char-range | |
user-char-class user-char-range | |
user-char-range: | |
user-char "-" user-char_opt | |
user-char | |
user-char: | |
class-escape-sequence | |
CHARACTER OTHER THAN | |
\, ] | |
class-escape-sequence: | |
term-escape-sequence | |
"\b" | |
terminal-str: | |
terminal | |
terminal-str terminal | |
terminal: | |
term-escape-sequence | |
CHARACTER OTHER THAN | |
^, $, \, |, [, ], {, }, (, ), *, +, ? | |
term-escape-sequence: | |
"\a" | |
"\f" | |
"\n" | |
"\r" | |
"\t" | |
"\v" | |
"\\" | |
"\" ascii-puncuation-char | |
"\x" hex-number | |
non-terminal: | |
"^" | |
"$" | |
"." | |
"\d" | |
"\D" | |
"\s" | |
"\S" | |
"\w" | |
"\W" | |
"\A" | |
"\b" | |
"\B" | |
"\Z" | |
"\" number | |
name: | |
identifier | |
number | |
number: | |
STRING THAT MATCHES REGEX /[0-9]+/ | |
identifier: | |
STRING THAT MATCHES REGEX /[A-Za-z_][A-Za-z_0-9]*/ | |
ascii-puncuation-char: | |
CHAR THAT MATCHES REGEX /[!-~]/ and also /[^A-Za-z0-9]/ | |
hex-number: | |
STRING THAT MATCHES REGEX /[0-9A-Fa-f]{1,2}/ | |
]] | |
local unpack = table.unpack or unpack | |
-- the base class of all | |
local __base_class__ = {} | |
function __base_class__:__init__() end | |
-- Creates a new class, deriving from a base (optional) | |
local function class(base) | |
base = base or __base_class__ | |
local cls = {} | |
setmetatable(cls, { ["__index"] = base }) | |
return cls | |
end | |
--- Creates a new object of a class | |
local function new(cls, ...) | |
--- cls: the class | |
--- ...: arguments for the constructor | |
local self = {} | |
setmetatable(self, { ["__index"] = cls }) | |
self:__init__(...) | |
return self | |
end | |
-- Get object's class | |
local function classof(object) | |
return rawget(getmetatable(object), "__index") | |
end | |
----------------------------------------------------------------------------- | |
-- Nodes of expression tree | |
-- | |
-- expression's base | |
-- | |
local Expression = class() | |
function Expression:SetMatchee(matchee, pos) | |
-- Resets the state of the self and set matchee. | |
-- setting pos = nil just resets the expression | |
-- (or, lets NextMatch(submatches, flags) return false) | |
self.matchee = matchee | |
self.pos = pos | |
self:OnSetMatchee() | |
end | |
function Expression:NextMatch(submatches, flags) | |
-- Before first calling this function, | |
-- the user should have called self:SetMatchee(matchee, pos). | |
-- (otherwise, this function just returns false) | |
-- | |
-- This function enumerates possible matches for the self. | |
-- Each time this is called, this returns (isOK, nextPos). | |
-- - if isOK == true, | |
-- nextPos denotes the position for the next expression. | |
-- - if isOK == false, | |
-- there was no match left. | |
-- | |
-- Look also at the comment of Expression:OnNextMatch | |
local pos = self.pos | |
local isOK, nextPos | |
if pos then | |
isOK, nextPos = self:OnNextMatch(submatches, flags) | |
if not isOK then | |
self.pos = nil | |
end | |
end | |
if self.name then | |
if isOK then | |
submatches[self.name] = { pos, nextPos } | |
else | |
submatches[self.name] = nil | |
end | |
end | |
return isOK, nextPos | |
end | |
function Expression:SetName(name) | |
-- name: number or string | |
self.name = name | |
end | |
function Expression:CloneCoreStateTo(clone) | |
-- This should be called by Clone() of derived classes. | |
-- Clones the core states into 'clone' | |
clone.matchee = self.matchee | |
clone.pos = self.pos | |
clone.name = self.name | |
end | |
-- Override this if necessary | |
function Expression:OnSetMatchee() end | |
-- Define following functions in derived classes | |
-- function Expression:Clone() | |
-- Return a clone object of the self. | |
-- The state of the clone shall be the same as the self. | |
-- If the self has sub-objects, the sub-objects shall also be cloned. | |
-- | |
-- function Expression:IsFixedLength() | |
-- Checks if the expression's length is fixed. | |
-- This functions returns (isFixed, length) | |
-- | |
-- function Expression:OnNextMatch(submatches, flags) | |
-- When this function is called, | |
-- self.matchee and self.pos refer to the string to be matched. | |
-- | |
-- - If there are one or more matches for the self, then | |
-- this function shall return (true, NEXT_POSITION), | |
-- in the favored order, one by one, each time it is called. | |
-- - If there are no matches, or if there are no matches left, | |
-- then this function shall return false. | |
-- | |
-- It is guaranteed that this function is never called | |
-- after | |
-- - this function returns false, or | |
-- - this function sets self.pos = nil, | |
-- until the user calls self:SetMatchee again. | |
-- | |
-- A matched group named 'name' (string or number) | |
-- can be obtained by | |
-- pos, nextPos = unpack(submatches[name]) | |
-- str = self.matchee:sub(pos, nextPos-1) | |
-- | |
-- expression AB | |
-- | |
local ExpPair = class(Expression) | |
function ExpPair:__init__(sub1, sub2) | |
self.sub1 = sub1 | |
self.sub2 = sub2 | |
end | |
function ExpPair:OnSetMatchee() | |
self.sub1:SetMatchee(self.matchee, self.pos) | |
self.sub2:SetMatchee(nil, nil) | |
end | |
function ExpPair:Clone() | |
clone = new(classof(self), self.sub1:Clone(), self.sub2:Clone()) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpPair:IsFixedLength() | |
local b, len1 = self.sub1:IsFixedLength() | |
local len2 | |
if b then | |
b, len2 = self.sub2:IsFixedLength() | |
if b then | |
return true, len1 + len2 | |
end | |
end | |
return false | |
end | |
function ExpPair:OnNextMatch(submatches, flags) | |
local isOK, nextPos = self.sub2:NextMatch(submatches, flags) | |
if isOK then | |
return isOK, nextPos | |
end | |
repeat | |
isOK, nextPos = self.sub1:NextMatch(submatches, flags) | |
if not isOK then | |
return false | |
end | |
self.sub2:SetMatchee(self.matchee, nextPos) | |
isOK, nextPos = self.sub2:NextMatch(submatches, flags) | |
until isOK | |
return isOK, nextPos | |
end | |
-- | |
-- expression A|B | |
-- | |
local ExpOr = class(Expression) | |
function ExpOr:__init__(sub1, sub2) | |
self.sub1 = sub1 | |
self.sub2 = sub2 | |
end | |
function ExpOr:OnSetMatchee() | |
self.sub1:SetMatchee(self.matchee, self.pos) | |
self.sub2:SetMatchee(self.matchee, self.pos) | |
end | |
function ExpOr:Clone() | |
clone = new(classof(self), self.sub1:Clone(), self.sub2:Clone()) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpOr:IsFixedLength() | |
local b, len1 = self.sub1:IsFixedLength() | |
local len2 | |
if b then | |
b, len2 = self.sub2:IsFixedLength() | |
if b and len1 == len2 then | |
return true, len1 | |
end | |
end | |
return false | |
end | |
function ExpOr:OnNextMatch(submatches, flags) | |
local isOK, nextPos = self.sub1:NextMatch(submatches, flags) | |
if isOK then | |
return isOK, nextPos | |
end | |
return self.sub2:NextMatch(submatches, flags) | |
end | |
-- | |
-- expression A{a,b}, which includes: | |
-- A* = A{,} | |
-- A+ = A{1,} | |
-- A? = A{,1} | |
-- A{n} = A{n,n} | |
-- a,b when omitted are assumed to be 0 and infinity, respectively | |
-- | |
local ExpRepeat = class(Expression) | |
function ExpRepeat:__init__(sub, min, max) | |
self.sub = sub | |
self.min = min or 0 | |
self.max = max | |
end | |
function ExpRepeat:OnSetMatchee() | |
local clone = self.sub:Clone() | |
clone:SetMatchee(self.matchee, self.pos) | |
self.stack = { {clone, self.pos} } | |
end | |
function ExpRepeat:Clone() | |
clone = new(classof(self), self.sub:Clone(), self.min, self.max) | |
self:CloneCoreStateTo(clone) | |
if self.stack then | |
local cloneStack = {} | |
for i,v in ipairs(self.stack) do | |
local sub, pos = unpack(v) | |
cloneStack[#cloneStack + 1] = {sub:Clone(), pos} | |
end | |
clone.stack = cloneStack | |
end | |
return clone | |
end | |
function ExpRepeat:IsFixedLength() | |
if self.min == self.max then | |
local b, len = self.sub:IsFixedLength() | |
if b then | |
return len * self.min | |
end | |
end | |
return false | |
end | |
function ExpRepeat:OnNextMatch(submatches, flags) | |
local stack = self.stack | |
local max = self.max | |
local isOK, nextPos | |
while self.pos do | |
local sub, pos | |
while true do | |
sub, pos = unpack(stack[#stack]) | |
isOK, nextPos = sub:NextMatch(submatches, flags) | |
if isOK then | |
if not max or #stack < max then | |
local clone = self.sub:Clone() | |
clone:SetMatchee(self.matchee, nextPos) | |
stack[#stack+1] = {clone, nextPos} | |
else | |
break | |
end | |
else | |
stack[#stack] = nil | |
nextPos = pos | |
break | |
end | |
end | |
local iteration = #stack | |
if iteration == 0 then | |
self.pos = nil | |
end | |
if (self.min <= iteration) | |
and (not self.max or iteration <= self.max) | |
then | |
return true, nextPos | |
end | |
end | |
return false | |
end | |
-- | |
-- expression A{a,b}?, which includes: | |
-- A*? = A{,}? | |
-- A+? = A{1,}? | |
-- A?? = A{,1}? | |
-- a,b when omitted are assumed to be 0 and infinity, respectively | |
-- | |
local ExpVigorless = class(Expression) | |
function ExpVigorless:__init__(sub, min, max) | |
self.sub = sub | |
self.min = min or 0 | |
self.max = max | |
end | |
function ExpVigorless:OnSetMatchee() | |
self.sub:SetMatchee(self.matchee, self.pos) | |
self.queue = nil | |
self.curExp = nil | |
self.curDepth = 0 | |
end | |
function ExpVigorless:Clone() | |
clone = new(classof(self), self.sub:Clone(), self.min, self.max) | |
self:CloneCoreStateTo(clone) | |
if self.queue then | |
local cloneQ = {} | |
for i,v in ipairs(self.queue) do | |
cloneQ[#cloneQ + 1] = v | |
end | |
clone.queue = cloneQ | |
end | |
clone.curExp = self.curExp | |
clone.curDepth = self.curDepth | |
return clone | |
end | |
function ExpVigorless:IsFixedLength() | |
if self.min == self.max then | |
local b, len = self.sub:IsFixedLength() | |
if b then | |
return len * self.min | |
end | |
end | |
return false | |
end | |
function ExpVigorless:OnNextMatch(submatches, flags) | |
local min = self.min | |
local max = self.max | |
if not self.queue then | |
self.queue = { {self.pos, 1} } | |
if (min <= 0) | |
and (not max or 0 <= max) | |
then | |
return true, self.pos | |
end | |
end | |
local queue = self.queue | |
while true do | |
if self.curExp then | |
isOK, nextPos = self.curExp:NextMatch(submatches, flags) | |
if isOK then | |
if not max or self.curDepth < max then | |
queue[#queue+1] = { nextPos, self.curDepth + 1 } | |
end | |
if (min <= self.curDepth) | |
and (not max or self.curDepth <= max) | |
then | |
return isOK, nextPos | |
end | |
else | |
self.curExp = nil | |
end | |
elseif #queue > 0 then | |
nextPos, self.curDepth = unpack(table.remove(queue, 1)) | |
local clone = self.sub:Clone() | |
clone:SetMatchee(self.matchee, nextPos) | |
self.curExp = clone | |
else | |
return false | |
end | |
end | |
end | |
-- | |
-- expression (?=A), (?!A) | |
-- | |
local ExpLookAhead = class(Expression) | |
function ExpLookAhead:__init__(sub, affirmative) | |
self.sub = sub | |
self.aff = affirmative | |
end | |
function ExpLookAhead:OnSetMatchee() | |
self.sub:SetMatchee(self.matchee, self.pos) | |
end | |
function ExpLookAhead:Clone() | |
clone = new(classof(self), self.sub:Clone()) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpLookAhead:IsFixedLength() | |
return true, 0 | |
end | |
function ExpLookAhead:OnNextMatch(submatches, flags) | |
local isOK, nextPos = self.sub:NextMatch(submatches, flags) | |
if (not self.aff) == (not isOK) then | |
nextPos = self.pos | |
self.pos = nil | |
return true, nextPos | |
end | |
return false | |
end | |
-- | |
-- expression (?<=A), (?<!A) | |
-- | |
local ExpLookBack = class(Expression) | |
function ExpLookBack:__init__(sub, affirmative) | |
local isFixed, len = sub:IsFixedLength() | |
assert(isFixed) | |
self.sub = sub | |
self.len = len | |
self.aff = affirmative | |
end | |
function ExpLookBack:OnSetMatchee() | |
if self.len < self.pos then | |
self.sub:SetMatchee(self.matchee, self.pos - self.len) | |
else | |
self.sub:SetMatchee(nil, nil) | |
end | |
end | |
function ExpLookBack:Clone() | |
clone = new(classof(self), self.sub:Clone()) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpLookBack:IsFixedLength() | |
return true, 0 | |
end | |
function ExpLookBack:OnNextMatch(submatches, flags) | |
local isOK, nextPos = self.sub:NextMatch(submatches, flags) | |
if (not self.aff) == (not isOK) then | |
nextPos = self.pos | |
self.pos = nil | |
return true, nextPos | |
end | |
return false | |
end | |
-- | |
-- expression (?(NAME)A|B) | |
-- "|B" can be omitted | |
-- | |
local ExpConditional = class(Expression) | |
function ExpConditional:__init__(refname, sub1, sub2) | |
self.refname = refname | |
self.sub1 = sub1 | |
self.sub2 = sub2 | |
end | |
function ExpConditional:OnSetMatchee() | |
self.sub1:SetMatchee(self.matchee, self.pos) | |
if self.sub2 then | |
self.sub2:SetMatchee(self.matchee, self.pos) | |
end | |
end | |
function ExpConditional:Clone() | |
local cloneSub1 = self.sub1:Clone() | |
local cloneSub2 | |
if self.sub2 then | |
cloneSub2 = self.sub2:Clone() | |
end | |
clone = new(classof(self), self.refname, cloneSub1, cloneSub2) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpConditional:IsFixedLength() | |
local b, len1 = self.sub1:IsFixedLength() | |
if b then | |
if self.sub2 then | |
local len2 | |
b, len2 = self.sub2:IsFixedLength() | |
if b and len1 == len2 then | |
return true, len1 | |
end | |
elseif len1 == 0 then | |
return true, 0 | |
end | |
end | |
return false | |
end | |
function ExpConditional:OnNextMatch(submatches, flags) | |
if submatches[self.refname] then | |
return self.sub1:NextMatch(submatches, flags) | |
elseif self.sub2 then | |
return self.sub2:NextMatch(submatches, flags) | |
else | |
local pos = self.pos | |
self.pos = nil | |
return true, pos | |
end | |
end | |
-- | |
-- expression (?P=NAME) | |
-- | |
local ExpReference = class(Expression) | |
function ExpReference:__init__(refname) | |
self.refname = refname | |
end | |
function ExpReference:Clone() | |
clone = new(classof(self), self.refname) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpReference:IsFixedLength() | |
return false | |
end | |
function ExpReference:OnNextMatch(submatches, flags) | |
local pos = self.pos | |
self.pos = nil | |
local refRange = submatches[self.refname] | |
if refRange then | |
local refBeg, refEnd = unpack(refRange) | |
local len = refEnd - refBeg | |
if self.matchee:sub(pos, pos + len - 1) == self.matchee:sub(refBeg, refEnd-1) then | |
return true, pos + len | |
else | |
return false | |
end | |
else | |
return true, pos | |
end | |
end | |
-- | |
-- expression that matches just one char | |
-- | |
local ExpOneChar = class(Expression) | |
function ExpOneChar:__init__(fnIsMatch) | |
-- fnIsMatch(char:byte()) -> bool | |
self.fnIsMatch = fnIsMatch | |
end | |
function ExpOneChar:Clone() | |
clone = new(classof(self), self.fnIsMatch) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpOneChar:IsFixedLength() | |
return true, 1 | |
end | |
function ExpOneChar:OnNextMatch(submatches, flags) | |
local pos = self.pos | |
self.pos = nil | |
if pos > #self.matchee then return false end | |
if self.fnIsMatch(self.matchee:byte(pos)) then | |
return true, pos + 1 | |
else | |
return false | |
end | |
end | |
-- | |
-- expression ^ | |
-- | |
local ExpLineBegin = class(Expression) | |
function ExpLineBegin:Clone() | |
clone = new(classof(self)) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpLineBegin:IsFixedLength() | |
return true, 0 | |
end | |
function ExpLineBegin:OnNextMatch(submatches, flags) | |
local pos = self.pos | |
self.pos = nil | |
-- ^ matches even a null string | |
if pos == 1 then | |
return true, pos | |
end | |
if self.matchee:sub(pos-1, pos-1) == '\n' then | |
return true, pos | |
end | |
return false | |
end | |
-- | |
-- expression $ | |
-- | |
local ExpLineEnd = class(Expression) | |
function ExpLineEnd:Clone() | |
clone = new(classof(self)) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpLineEnd:IsFixedLength() | |
return true, 0 | |
end | |
function ExpLineEnd:OnNextMatch(submatches, flags) | |
local pos = self.pos | |
self.pos = nil | |
-- $ matches even a null string | |
if pos == #self.matchee + 1 then | |
return true, pos | |
end | |
if self.matchee:sub(pos, pos) == '\n' then | |
return true, pos | |
end | |
return false | |
end | |
-- | |
-- expression \A | |
-- | |
local ExpBegin = class(Expression) | |
function ExpBegin:Clone() | |
clone = new(classof(self)) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpBegin:IsFixedLength() | |
return true, 0 | |
end | |
function ExpBegin:OnNextMatch(submatches, flags) | |
local pos = self.pos | |
self.pos = nil | |
-- ^ matches even a null string | |
if pos == 1 then | |
return true, pos | |
end | |
return false | |
end | |
-- | |
-- expression \Z | |
-- | |
local ExpEnd = class(Expression) | |
function ExpEnd:Clone() | |
clone = new(classof(self)) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpEnd:IsFixedLength() | |
return true, 0 | |
end | |
function ExpEnd:OnNextMatch(submatches, flags) | |
local pos = self.pos | |
self.pos = nil | |
-- $ matches even a null string | |
if pos == #self.matchee + 1 then | |
return true, pos | |
end | |
return false | |
end | |
-- | |
-- expression \b | |
-- | |
local ExpBorder = class(Expression) | |
function ExpBorder:Clone() | |
clone = new(classof(self)) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpBorder:IsFixedLength() | |
return true, 0 | |
end | |
function ExpBorder:OnNextMatch(submatches, flags) | |
local pos = self.pos | |
self.pos = nil | |
if self:IsWordAt(pos-1) ~= self:IsWordAt(pos) then | |
return true, pos | |
end | |
return false | |
end | |
function ExpBorder:IsWordAt(pos) | |
if pos <= 0 then return false end | |
local value = self.matchee:byte(pos) | |
if not value then return false end | |
local zero, nine, A, Z, a, z, ubar = ("09AZaz_"):byte(1,7) | |
return (zero <= value and value <= nine) | |
or (A <= value and value <= Z) | |
or (a <= value and value <= z) | |
or value == ubar | |
end | |
-- | |
-- expression \B | |
-- | |
local ExpNegBorder = class(ExpBorder) | |
function ExpNegBorder:OnNextMatch(submatches, flags) | |
local pos = self.pos | |
self.pos = nil | |
if self:IsWordAt(pos-1) == self:IsWordAt(pos) then | |
return true, pos | |
end | |
return false | |
end | |
-- | |
-- expression that matches a terminal string | |
-- | |
local ExpTerminals = class(Expression) | |
function ExpTerminals:__init__(str) | |
self.str = str | |
end | |
function ExpTerminals:Clone() | |
clone = new(classof(self), self.str) | |
self:CloneCoreStateTo(clone) | |
return clone | |
end | |
function ExpTerminals:IsFixedLength() | |
return true, #self.str | |
end | |
function ExpTerminals:OnNextMatch(submatches, flags) | |
local pos = self.pos | |
self.pos = nil | |
local len = #self.str | |
if self.matchee:sub(pos, pos + len - 1) == self.str then | |
return true, pos + len | |
else | |
return false | |
end | |
end | |
----------------------------------------------------------------------------- | |
-- Parser to compile regex-string to expression-tree | |
local Parser = class() | |
function Parser:__init__(regexp, flags) | |
self.regexp = regexp | |
self.flags = flags | |
self.nextCapture = 1 | |
local expOr, nextPos = self:GetExpOr(1) | |
if not expOr then | |
return | |
end | |
if nextPos ~= #regexp + 1 then | |
if not self.errMsg then | |
self.errMsg = "cannot compile" | |
self.errPos = nextPos | |
end | |
return | |
end | |
self.errMsg = nil | |
self.errPos = nil | |
self.exp = expOr | |
end | |
function Parser:Error() | |
return self.errMsg, self.errPos | |
end | |
function Parser:Expression() | |
return self.exp | |
end | |
function Parser:GetExpOr(pos) | |
local expOr, nextPos = self:GetExpPair(pos) | |
if not expOr then return nil end | |
local expPair | |
while self.regexp:sub(nextPos,nextPos) == '|' do | |
expPair, nextPos = self:GetExpPair(nextPos + 1) | |
if not expPair then return nil end | |
expOr = new(ExpOr, expOr, expPair) | |
end | |
return expOr, nextPos | |
end | |
function Parser:GetExpPair(pos) | |
local expPair, nextPos = self:GetExpRepeat(pos) | |
if not expPair then | |
return new(ExpTerminals, ""), pos | |
end | |
pos = nextPos | |
local expRepeat | |
while true do | |
expRepeat, nextPos = self:GetExpRepeat(pos) | |
if not expRepeat then | |
return expPair, pos | |
end | |
expPair = new(ExpPair, expPair, expRepeat) | |
pos = nextPos | |
end | |
end | |
function Parser:GetExpRepeat(pos) | |
local expRepeat, nextPos = self:GetExpPrimary(pos) | |
if not expRepeat then return nil end | |
pos = nextPos | |
local repeater | |
while true do | |
repeater, nextPos = self:GetRepeater(pos) | |
if not repeater then | |
return expRepeat, pos | |
end | |
local clsExp | |
if self.regexp:sub(nextPos, nextPos) == '?' then | |
clsExp = ExpVigorless | |
nextPos = nextPos + 1 | |
else | |
clsExp = ExpRepeat | |
end | |
local min = repeater.min | |
local max = repeater.max | |
expRepeat = new(clsExp, expRepeat, min, max) | |
pos = nextPos | |
end | |
end | |
function Parser:GetExpPrimary(pos) | |
local regexp = self.regexp | |
if regexp:sub(pos,pos) == '(' then | |
pos = pos+1 | |
local subExp, nextPos | |
if regexp:sub(pos,pos) == '?' then | |
pos = pos+1 | |
if regexp:sub(pos,pos) == ':' then | |
subExp, nextPos = self:GetUnnamedGroup(pos+1) | |
elseif regexp:sub(pos,pos+1) == 'P<' then | |
subExp, nextPos = self:GetUserNamedGroup(pos+2) | |
elseif regexp:sub(pos,pos+1) == 'P=' then | |
subExp, nextPos = self:GetUserNamedRef(pos+2) | |
elseif regexp:sub(pos,pos) == '=' then | |
subExp, nextPos = self:GetLookAhead(pos+1) | |
elseif regexp:sub(pos,pos) == '!' then | |
subExp, nextPos = self:GetNegLookAhead(pos+1) | |
elseif regexp:sub(pos,pos+1) == '<=' then | |
subExp, nextPos = self:GetLookBack(pos+2) | |
elseif regexp:sub(pos,pos+1) == '<!' then | |
subExp, nextPos = self:GetNegLookBack(pos+2) | |
elseif regexp:sub(pos,pos) == '(' then | |
subExp, nextPos = self:GetConditional(pos+1) | |
else | |
self.errMsg = "invalid char" | |
self.errPos = pos | |
return nil | |
end | |
else | |
subExp, nextPos = self:GetNamedGroup(pos) | |
end | |
if not subExp then return nil end | |
if self.regexp:sub(nextPos,nextPos) == ')' then | |
return subExp, nextPos+1 | |
else | |
self.errMsg = ") expected" | |
self.errPos = nextPos | |
return nil | |
end | |
end | |
local subExp, nextPos = self:GetCharClass(pos) | |
if subExp then | |
return subExp, nextPos | |
end | |
subExp, nextPos = self:GetNonTerminal(pos) | |
if subExp then | |
return subExp, nextPos | |
end | |
subExp, nextPos = self:GetTerminalStr(pos) | |
if subExp then | |
return subExp, nextPos | |
end | |
return nil | |
end | |
function Parser:GetUnnamedGroup(pos) | |
return self:GetExpOr(pos) | |
end | |
function Parser:GetUserNamedGroup(pos) | |
local name, nextPos = self:GetIdentifier(pos) | |
if not name then return nil end | |
if self.regexp:sub(nextPos,nextPos) ~= '>' then | |
self.errMsg = "> expected" | |
self.errPos = nextPos | |
return nil | |
end | |
local expOr | |
expOr, nextPos = self:GetExpOr(nextPos + 1) | |
if expOr then | |
expOr:SetName(name) | |
end | |
return expOr, nextPos | |
end | |
function Parser:GetUserNamedRef(pos) | |
local name, nextPos = self:GetName(pos) | |
if not name then return nil end | |
return new(ExpReference, name), nextPos | |
end | |
function Parser:GetLookAhead(pos) | |
local expOr, nextPos = self:GetExpOr(pos) | |
if expOr then | |
expOr = new(ExpLookAhead, expOr, true) | |
end | |
return expOr, nextPos | |
end | |
function Parser:GetNegLookAhead(pos) | |
local expOr, nextPos = self:GetExpOr(pos) | |
if expOr then | |
expOr = new(ExpLookAhead, expOr, false) | |
end | |
return expOr, nextPos | |
end | |
function Parser:GetLookBack(pos) | |
local expOr, nextPos = self:GetExpOr(pos) | |
if not expOr then return nil end | |
if not expOr:IsFixedLength() then | |
self.errMsg = "length must be fixed" | |
self.errPos = pos+1 | |
return nil | |
end | |
return new(ExpLookBack, expOr, true), nextPos | |
end | |
function Parser:GetNegLookBack(pos) | |
local expOr, nextPos = self:GetExpOr(pos) | |
if not expOr then return nil end | |
if not expOr:IsFixedLength() then | |
self.errMsg = "length must be fixed" | |
self.errPos = pos+1 | |
return nil | |
end | |
return new(ExpLookBack, expOr, false), nextPos | |
end | |
function Parser:GetConditional(pos) | |
local name, nextPos = self:GetName(pos) | |
if not name then return nil end | |
if self.regexp:sub(nextPos,nextPos) ~= ')' then | |
self.errMsg = ") expected" | |
self.errPos = nextPos | |
return nil | |
end | |
local exp1 | |
exp1, nextPos = self:GetExpPair(nextPos + 1) | |
if not exp1 then return nil end | |
local exp2 | |
if self.regexp:sub(nextPos,nextPos) == '|' then | |
exp2, nextPos = self:GetExpPair(nextPos + 1) | |
if not exp2 then return nil end | |
end | |
return new(ExpConditional, name, exp1, exp2), nextPos | |
end | |
function Parser:GetNamedGroup(pos) | |
local id = self.nextCapture | |
self.nextCapture = self.nextCapture + 1 | |
local expOr, nextPos = self:GetExpOr(pos) | |
if expOr then | |
expOr:SetName(id) | |
else | |
-- restore 'nextCapture' | |
self.nextCapture = id | |
end | |
return expOr, nextPos | |
end | |
function Parser:GetRepeater(pos) | |
local regexp = self.regexp | |
if pos > #regexp then return nil end | |
local c = regexp:sub(pos, pos) | |
if c == '*' then | |
return {}, pos+1 | |
end | |
if c == '+' then | |
return {["min"] = 1}, pos+1 | |
end | |
if c == '?' then | |
return {["max"] = 1}, pos+1 | |
end | |
if c ~= '{' then | |
return nil | |
end | |
pos = pos + 1 | |
local min, max, nextPos | |
min, nextPos = self:GetNumber(pos) | |
if min then | |
pos = nextPos | |
end | |
c = regexp:sub(pos, pos) | |
if c == '' or (c ~= ',' and c ~= '}') then | |
self.errMsg = ", or } expected" | |
self.errPos = pos | |
return nil | |
end | |
if not min and c == '}' then | |
self.errMsg = "iteration number expected" | |
self.errPos = pos | |
return nil | |
end | |
pos = pos + 1 | |
if c == ',' then | |
max, nextPos = self:GetNumber(pos) | |
if max then | |
pos = nextPos | |
end | |
c = regexp:sub(pos, pos) | |
if c == '' or c ~= '}' then | |
self.errMsg = "} expected" | |
self.errPos = pos | |
return nil | |
end | |
pos = pos + 1 | |
else | |
max = min | |
end | |
return {["min"] = min, ["max"] = max}, pos | |
end | |
function Parser:GetCharClass(pos) | |
local regexp = self.regexp | |
if regexp:sub(pos,pos) ~= '[' then | |
return nil | |
end | |
pos = pos+1 | |
local affirmative | |
if regexp:sub(pos,pos) == '^' then | |
affirmative = false | |
pos = pos+1 | |
else | |
affirmative = true | |
end | |
local fnIsMatch, nextPos = self:GetUserCharClass(pos) | |
if not fnIsMatch then return nil end | |
if regexp:sub(nextPos,nextPos) ~= ']' then | |
self.errMsg = "] expected" | |
self.errPos = nextPos | |
return nil | |
end | |
local fn | |
if affirmative then | |
fn = fnIsMatch | |
else | |
fn = function(c) return not fnIsMatch(c) end | |
end | |
return new(ExpOneChar, fn), nextPos+1 | |
end | |
function Parser:GetUserCharClass(pos) | |
local fnIsMatch, nextPos = self:GetUserCharRange(pos) | |
if not fnIsMatch then | |
self.errMsg = "empty class not allowed" | |
self.errPos = pos | |
return nil | |
end | |
local aFn = { fnIsMatch } | |
pos = nextPos | |
while true do | |
-- the following 'local' is mandatory | |
local fnIsMatch, nextPos = self:GetUserCharRange(pos) | |
if not fnIsMatch then | |
local fn = function(c) | |
for i,v in ipairs(aFn) do | |
if v(c) then return true end | |
end | |
return false | |
end | |
return fn, pos | |
end | |
aFn[#aFn+1] = fnIsMatch | |
pos = nextPos | |
end | |
end | |
function Parser:GetUserCharRange(pos) | |
local char1, nextPos = self:GetUserChar(pos) | |
if not char1 then return nil end | |
if self.regexp:sub(nextPos,nextPos) ~= '-' then | |
return function(c) return c == char1 end, nextPos | |
end | |
pos = nextPos + 1 | |
local char2, nextPos = self:GetUserChar(pos) | |
if char2 then | |
return function(c) return char1 <= c and c <= char2 end, nextPos | |
else | |
char2 = ('-'):byte() | |
return function(c) return char1 == c or c == char2 end, pos | |
end | |
end | |
function Parser:GetUserChar(pos) | |
local value, nextPos = self:GetClassEscSeq(pos) | |
if value then | |
return value, nextPos | |
end | |
local c = self.regexp:sub(pos, pos) | |
if c ~= '' and c ~= '\\' and c ~= ']' then | |
return c:byte(), pos + 1 | |
else | |
return nil | |
end | |
end | |
function Parser:GetClassEscSeq(pos) | |
local value, nextPos = self:GetTermEscSeq(pos) | |
if value then | |
return value, nextPos | |
end | |
if self.regexp:sub(pos,pos+1) == "\\b" then | |
return 0x08, pos+2 | |
else | |
return nil | |
end | |
end | |
function Parser:GetNonTerminal(pos) | |
local regexp = self.regexp | |
local c = regexp:sub(pos,pos) | |
if c == '^' then | |
return new(ExpLineBegin), pos+1 | |
end | |
if c == '$' then | |
return new(ExpLineEnd), pos+1 | |
end | |
if c == '.' then | |
local nl = ('\n'):byte() | |
return new(ExpOneChar, function(c) return c ~= nl end), pos+1 | |
end | |
if c ~= '\\' then | |
return nil | |
end | |
local zero, nine, A, Z, a, z, ubar = ("09AZaz_"):byte(1,7) | |
local ff, nl, cr, ht, vt, ws = ("\f\n\r\t\v "):byte(1,6) | |
c = regexp:sub(pos+1, pos+1) | |
if c == 'd' then | |
local fn = function(c) return zero <= c and c <= nine end | |
return new(ExpOneChar, fn), pos+2 | |
end | |
if c == 'D' then | |
local fn = function(c) return not(zero <= c and c <= nine) end | |
return new(ExpOneChar, fn), pos+2 | |
end | |
if c == 's' then | |
local fn = function(c) | |
-- check it in the order of likeliness | |
return c == ws or c == nl or c == ht | |
or c == cr or c == vt or c == ff | |
end | |
return new(ExpOneChar, fn), pos+2 | |
end | |
if c == 'S' then | |
local fn = function(c) | |
-- check it in the order of likeliness | |
return not(c == ws or c == nl or c == ht | |
or c == cr or c == vt or c == ff | |
) | |
end | |
return new(ExpOneChar, fn), pos+2 | |
end | |
if c == 'w' then | |
local fn = function(c) | |
return (a <= c and c <= z) | |
or (A <= c and c <= Z) | |
or (zero <= c and c <= nine) | |
or c == ubar | |
end | |
return new(ExpOneChar, fn), pos+2 | |
end | |
if c == 'W' then | |
local fn = function(c) | |
return not( | |
(a <= c and c <= z) | |
or (A <= c and c <= Z) | |
or (zero <= c and c <= nine) | |
or c == ubar | |
) | |
end | |
return new(ExpOneChar, fn), pos+2 | |
end | |
if c == 'A' then | |
return new(ExpBegin), pos+2 | |
end | |
if c == 'b' then | |
return new(ExpBorder), pos+2 | |
end | |
if c == 'B' then | |
return new(ExpNegBorder), pos+2 | |
end | |
if c == 'Z' then | |
return new(ExpEnd), pos+2 | |
end | |
local value, nextPos = self:GetNumber(pos+1) | |
if value then | |
return new(ExpReference, value), nextPos | |
end | |
self.errMsg = "invalid escape sequence" | |
self.errPos = pos | |
return nil | |
end | |
function Parser:GetTerminalStr(pos) | |
local value, nextPos = self:GetTerminal(pos) | |
if not value then return nil end | |
local list = { value } | |
pos = nextPos | |
while true do | |
value, nextPos = self:GetTerminal(pos) | |
if not value then | |
local exp = new(ExpTerminals, | |
self.regexp.char(unpack(list)) | |
) | |
return exp, pos | |
end | |
list[#list+1] = value | |
pos = nextPos | |
end | |
end | |
local g_nonTerminal_Parser_GetTerminal = { | |
[('^'):byte()] = true, | |
[('$'):byte()] = true, | |
[('\\'):byte()] = true, | |
[('|'):byte()] = true, | |
[('['):byte()] = true, | |
[(']'):byte()] = true, | |
[('{'):byte()] = true, | |
[('}'):byte()] = true, | |
[('('):byte()] = true, | |
[(')'):byte()] = true, | |
[('*'):byte()] = true, | |
[('+'):byte()] = true, | |
[('?'):byte()] = true, | |
} | |
function Parser:GetTerminal(pos) | |
local value, nextPos = self:GetTermEscSeq(pos) | |
if value then | |
return value, nextPos | |
end | |
value = self.regexp:byte(pos,pos) | |
if not value then return nil end | |
local nonTerminal = g_nonTerminal_Parser_GetTerminal | |
if nonTerminal[value] then return nil end | |
return value, pos+1 | |
end | |
local g_entity_Parser_GetTermEscSeq = { | |
[('a'):byte()] = 0x07, | |
[('f'):byte()] = 0x0c, | |
[('n'):byte()] = 0x0a, | |
[('r'):byte()] = 0x0d, | |
[('t'):byte()] = 0x09, | |
[('v'):byte()] = 0x0b, | |
[('!'):byte()] = ('!'):byte(), | |
[('"'):byte()] = ('"'):byte(), | |
[('#'):byte()] = ('#'):byte(), | |
[('$'):byte()] = ('$'):byte(), | |
[('%'):byte()] = ('%'):byte(), | |
[('&'):byte()] = ('&'):byte(), | |
[("'"):byte()] = ("'"):byte(), | |
[('('):byte()] = ('('):byte(), | |
[(')'):byte()] = (')'):byte(), | |
[('*'):byte()] = ('*'):byte(), | |
[('+'):byte()] = ('+'):byte(), | |
[(','):byte()] = (','):byte(), | |
[('-'):byte()] = ('-'):byte(), | |
[('.'):byte()] = ('.'):byte(), | |
[('/'):byte()] = ('/'):byte(), | |
[(':'):byte()] = (':'):byte(), | |
[(';'):byte()] = (';'):byte(), | |
[('<'):byte()] = ('<'):byte(), | |
[('='):byte()] = ('='):byte(), | |
[('>'):byte()] = ('>'):byte(), | |
[('?'):byte()] = ('?'):byte(), | |
[('@'):byte()] = ('@'):byte(), | |
[('['):byte()] = ('['):byte(), | |
[('\\'):byte()] =('\\'):byte(), | |
[(']'):byte()] = (']'):byte(), | |
[('^'):byte()] = ('^'):byte(), | |
[('_'):byte()] = ('_'):byte(), | |
[('`'):byte()] = ('`'):byte(), | |
[('{'):byte()] = ('{'):byte(), | |
[('|'):byte()] = ('|'):byte(), | |
[('}'):byte()] = ('}'):byte(), | |
[('~'):byte()] = ('~'):byte(), | |
} | |
function Parser:GetTermEscSeq(pos) | |
local regexp = self.regexp | |
if regexp:sub(pos,pos) ~= '\\' then return nil end | |
local entity = g_entity_Parser_GetTermEscSeq | |
local c = regexp:byte(pos+1) | |
local value = entity[c] | |
if value then | |
return value, pos+2 | |
end | |
if c == ('x'):byte() then | |
value, nextPos = self:GetHexNumber(pos+2, 2) | |
if not value then | |
self.errMsg = "hexadecimal number expected" | |
self.errPos = pos+2 | |
return nil | |
end | |
return value, nextPos | |
end | |
self.errMsg = "invalid escape sequence" | |
self.errPos = pos | |
return nil | |
end | |
function Parser:GetName(pos) | |
local name, nextPos = self:GetIdentifier(pos) | |
if name then | |
return name, nextPos | |
end | |
name, nextPos = self:GetNumber(pos) | |
if name then | |
return name, nextPos | |
end | |
return nil | |
end | |
function Parser:GetIdentifier(pos) | |
local regexp = self.regexp | |
local zero, nine, A, Z, a, z, bar = ('09AZaz_'):byte(1,7) | |
local value | |
local c = regexp:byte(pos) | |
if not c then return nil end | |
if (A <= c and c <= Z) | |
or (a <= c and c <= z) | |
or c == bar | |
then | |
value = { c } | |
else | |
return nil | |
end | |
local nextPos = pos + 1 | |
while true do | |
c = regexp:byte(nextPos) | |
if not c then | |
break | |
end | |
if (A <= c and c <= Z) | |
or (a <= c and c <= z) | |
or (zero <= c and c <= nine) | |
or c == bar | |
then | |
value[#value + 1] = c | |
nextPos = nextPos + 1 | |
else | |
break | |
end | |
end | |
return regexp.char(unpack(value)), nextPos | |
end | |
function Parser:GetNumber(pos) | |
local regexp = self.regexp | |
local zero, nine = ('09'):byte(1,2) | |
local nextPos = pos | |
local value = 0 | |
while true do | |
local digit = regexp:byte(nextPos) | |
if not digit then | |
break | |
end | |
if not (zero <= digit and digit <= nine) then | |
break | |
end | |
value = 10*value + (digit - zero) | |
nextPos = nextPos + 1 | |
end | |
if pos == nextPos then return nil end | |
return value, nextPos | |
end | |
function Parser:GetHexNumber(pos, maxDigits) | |
local regexp = self.regexp | |
local zero, nine, A, F, a, f = ('09AFaf'):byte(1,6) | |
local nextPos = pos | |
local value = 0 | |
local i = 0 | |
while not maxDigits or i < maxDigits do | |
local digit = regexp:byte(nextPos) | |
if not digit then | |
break | |
end | |
if zero <= digit and digit <= nine then | |
value = 16*value + (digit - zero) | |
elseif A <= digit and digit <= F then | |
value = 16*value + (digit - A + 10) | |
elseif a <= digit and digit <= f then | |
value = 16*value + (digit - a + 10) | |
else | |
break | |
end | |
nextPos = nextPos + 1 | |
i = i + 1 | |
end | |
if pos == nextPos then return nil end | |
return value, nextPos | |
end | |
-------------------------------------------------------------------------- | |
-- Match class (represents submatches) | |
local Match = class() | |
function Match:__init__(matchee, submatches) | |
self.matchee = matchee | |
self.submatches = submatches | |
end | |
-- function Match:expand(format) end | |
-- This is defined later (to use Regex) | |
function Match:group(...) | |
-- /(a)(b)(c)/ matching "abc", then | |
-- group(0,1,2,3) returns "abc", "a", "b", "c" | |
-- group() is equivalent to group(0) | |
local args = {...} | |
if #args == 0 then args = { 0 } end | |
local matchee = self.matchee | |
local submatches = self.submatches | |
local groups = {} | |
for i = 1, #args do | |
local name = args[i] | |
local span = submatches[name] | |
if span then | |
local b,e = unpack(span) | |
groups[i] = matchee:sub(b,e-1) | |
end | |
end | |
return unpack(groups) | |
end | |
function Match:span(groupId) | |
-- Returns index pair (begin, end) of group 'groupId'. | |
-- Note 'end' is one past the end. | |
groupId = groupId or 0 | |
local span = self.submatches[groupId] | |
if span then | |
return unpack(span) | |
else | |
return nil | |
end | |
end | |
-------------------------------------------------------------------------- | |
-- Regex class | |
-- | |
local Regex = class() | |
Regex.__regex__ = true -- type marker | |
function Regex:__init__(regexp, flags) | |
local parser = new(Parser, regexp, flags) | |
local exp = parser:Expression() | |
if exp == nil then | |
msg, pos = parser:Error() | |
error(("regex at %d: %s"):format(pos, msg)) | |
end | |
self.exp = exp | |
self.flags = flags | |
end | |
function Regex:match(str, pos) | |
if not pos then | |
pos = 1 | |
elseif pos < 0 then | |
pos = #str - (pos + 1) | |
end | |
if pos < 0 then | |
pos = 1 | |
end | |
local submatches = {} | |
self.exp:SetMatchee(str, pos) | |
isOK, nextPos = self.exp:NextMatch(submatches, self.flags) | |
if not isOK then return nil end | |
submatches[0] = {pos, nextPos} | |
return new(Match, str, submatches) | |
end | |
function Regex:search(str, pos) | |
if not pos then | |
pos = 1 | |
elseif pos < 0 then | |
pos = #str - (pos + 1) | |
end | |
if pos < 0 then | |
pos = 1 | |
end | |
for p = pos, #str do | |
local match = self:match(str, p) | |
if match then return match end | |
end | |
return nil | |
end | |
function Regex:sub(repl, str, count) | |
if count and count <= 0 then return str, 0 end | |
local isFunc | |
if type(repl) == "function" then | |
isFunc = true | |
else | |
local meta = getmetatable(repl) | |
if meta and meta.__call then | |
isFunc = true | |
end | |
end | |
local list = {} | |
local nRepl = 0 | |
local prevPos = 1 | |
for match in self:finditer(str) do | |
local curBeg, curEnd = match:span() | |
list[#list+1] = str:sub(prevPos,curBeg-1) | |
local r | |
if isFunc then | |
r = repl(match) | |
if r then | |
r = tostring(r) | |
else | |
r = "" | |
end | |
else | |
r = match:expand(repl) | |
end | |
list[#list+1] = r | |
prevPos = curEnd | |
nRepl = nRepl + 1 | |
if count and count <= nRepl then break end | |
end | |
list[#list+1] = str:sub(prevPos,-1) | |
return table.concat(list), nRepl | |
end | |
function Regex:findall(str, pos) | |
local list = {} | |
for match in self:finditer(str, pos) do | |
list[#list+1] = match | |
end | |
return list | |
end | |
function Regex:finditer(str, pos) | |
pos = pos or 1 | |
local match = { | |
["matchee"] = str, | |
["span"] = function() return nil, pos end | |
} | |
return self.__finditer, self, match | |
end | |
function Regex:__finditer(match) | |
local prevBeg, prevEnd = match:span(0) | |
if prevBeg == prevEnd then | |
prevEnd = prevEnd + 1 | |
end | |
return self:search(match.matchee, prevEnd) | |
end | |
-- additional method of Match | |
local g_regex_Match_expand = | |
new(Regex, [[\\(?:(\d+)|g<(?:(\d+)|([A-Za-z_][A-Za-z0-9_]*))>|[xX]([0-9a-fA-F]{1,2})|([abfnrtv\\]))]]) | |
function Match:expand(format) | |
-- Replaces \number, \g<number>, \g<name> | |
-- to the corresponding groups | |
-- Also \a, \b, \f, \n, \r, \t, \v, \x## are recognized | |
local regex = g_regex_Match_expand | |
local function replace(match) | |
local group = match:group(1) or match:group(2) | |
if group then | |
local id = tonumber(group, 10) | |
return self:group(id) | |
end | |
group = match:group(3) | |
if group then | |
return self:group(group) | |
end | |
group = match:group(4) | |
if group then | |
return match.matchee.char(tonumber("0x" .. group)) | |
end | |
group = match:group(5) | |
if group == 'a' then return '\a' | |
elseif group == 'b' then return '\b' | |
elseif group == 'f' then return '\f' | |
elseif group == 'n' then return '\n' | |
elseif group == 'r' then return '\r' | |
elseif group == 't' then return '\t' | |
elseif group == 'v' then return '\v' | |
elseif group == '\\' then return '\\' | |
end | |
end | |
return (regex:sub(replace, format)) | |
end | |
-------------------------------------------------------------------------- | |
-- Exported object | |
local re = {} | |
function re.compile(regexp, flags) | |
return new(Regex, regexp, flags) | |
end | |
function re.match(regexp, str, pos, flags) | |
return re.__getRegex(regexp, flags):match(str, pos) | |
end | |
function re.search(regexp, str, pos, flags) | |
return re.__getRegex(regexp, flags):search(str, pos) | |
end | |
function re.sub(regexp, repl, str, count, flags) | |
return re.__getRegex(regexp, flags):sub(repl, str, count) | |
end | |
function re.findall(regexp, str, pos, flags) | |
return re.__getRegex(regexp, flags):findall(str, pos) | |
end | |
function re.finditer(regexp, str, pos, flags) | |
return re.__getRegex(regexp, flags):finditer(str, pos) | |
end | |
function re.__getRegex(regexp, flags) | |
if regexp.__regex__ then | |
return regexp | |
else | |
return re.__compile(regexp, flags) | |
end | |
end | |
re.cacheSize = 100 -- this is the size of regex cache | |
local g_sourceCache_re = {} | |
local g_objectCache_re = {} | |
function re.__compile(regexp, flags) | |
local sourceCache = g_sourceCache_re | |
local objectCache = g_objectCache_re | |
local obj = objectCache[regexp] | |
if obj then | |
-- flags must be considered: | |
-- anyway, flags does not work for now | |
local theI = 0 | |
for i,v in ipairs(sourceCache) do | |
if v == regexp then | |
theI = i | |
break | |
end | |
end | |
if theI > 1 then | |
for i = theI, 2, -1 do | |
sourceCache[i] = sourceCache[i-1] | |
end | |
sourceCache[1] = regexp | |
end | |
return obj | |
end | |
obj = re.compile(regexp, flags) | |
local cacheSize = re.cacheSize | |
local size = #sourceCache | |
while cacheSize <= size do | |
local name = sourceCache[size] | |
sourceCache[size] = nil | |
objectCache[name] = nil | |
size = size - 1 | |
end | |
table.insert(sourceCache, 1, regexp) | |
objectCache[regexp] = obj | |
return obj | |
end | |
-- export re | |
return re | |
--THE END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment