Created
October 6, 2018 01:37
-
-
Save rodel77/0d6ac15dfe57edcccfa18788c4e84953 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local TAG_MATCH = "[%a%d#=_-]+"; | |
function tagize(tag) | |
if tag:sub(1, 2)=="</" then | |
local pos1, pos2 = tag:find(TAG_MATCH); | |
return { | |
tag = tag:sub(pos1, pos2), | |
mode = 0, | |
}; | |
else | |
local pos1, pos2, name, attribute = tag:find("("..TAG_MATCH..")=("..TAG_MATCH..")"); | |
return { | |
tag = name, | |
mode = 1, | |
attribute = attribute, | |
}; | |
end | |
end | |
function tokenize(str) | |
local result = {}; | |
while true do | |
local pos1, pos2 = str:find("<[/]*"..TAG_MATCH..">"); | |
if pos1==nil then | |
if #result==0 then | |
return str; | |
end | |
if #str>0 then | |
result[#result+1] = str; | |
end | |
return result; | |
end | |
local normal = str:sub(0, pos1-1); | |
if #normal>0 then | |
result[#result+1] = normal; | |
end | |
local token = str:sub(pos1, pos2); | |
if #token>0 then | |
result[#result+1] = tagize(token); | |
end | |
str = str:sub(pos2+1); | |
end | |
end | |
-- This is an example of MarkOrc | |
tokens = tokenize("This text is <color=#FF0000>red</color> and rad lovely"); | |
-- `tokens` is now | |
--[[{ "This text is ", { | |
attribute = "#FF0000", | |
mode = 1, | |
tag = "color" | |
}, "red", { | |
mode = 0, | |
tag = "color" | |
}, " and rad lovely" }]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment