Last active
August 31, 2015 01:24
-
-
Save HyroVitalyProtago/db12d385fe541832d31c to your computer and use it in GitHub Desktop.
Tokenizer for Codea
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
just for the name of the gist project |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Tokenizer | |
function setup() | |
required.path 'Tokenizer' | |
local Tokenizer = required.import 'Tokenizer' | |
local rules = { | |
token = { | |
{"\t", Tokenizer.pass}, | |
{"\n", Tokenizer.pass}, | |
{" ", Tokenizer.pass}, | |
{"\\%a[%a%-%d]*", Tokenizer.arg("T_COMMAND")}, | |
{"{", Tokenizer.cons("T_LACCOL")}, | |
{"}", Tokenizer.cons("T_RACCOL")}, | |
{"[^%s{}]+", Tokenizer.arg("T_WORD")} | |
} | |
} | |
local next_token = Tokenizer.tokenize(rules, [[ | |
\container-fluid { | |
\row { | |
\h1 { Title \small{with a little text!} } | |
\p { A little paragraph. } | |
} | |
}]]) | |
local tokens = {} | |
while true do | |
local token = next_token() | |
if not token then break end | |
table.insert(tokens, token) | |
end | |
table.print(tokens) | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"id":"db12d385fe541832d31c", | |
"description":"Tokenizer for Codea", | |
"author":"HyroVitalyProtago", | |
"codea":"2.3.1", | |
"name":"Tokenizer", | |
"dependencies":[], | |
"version":"1.0.1" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
return (function() | |
local Tokenizer = {} | |
function Tokenizer.pass(arg) return nil end | |
function Tokenizer.cons(token) return (function(arg) return token end) end | |
function Tokenizer.arg(token) return (function(arg) return {token, arg} end) end | |
function Tokenizer.tokenize(rules, input) | |
local tokens = {} | |
local iterator = function(str) | |
local ruleName, rule = next(rules) | |
local i = 1 | |
local function f() | |
if i > str:len() then return end | |
for _,v in ipairs(rule) do | |
local regexp, ftoken = unpack(v) | |
local x,y = str:find(regexp, i) | |
if x == i then | |
i = y + 1 | |
if type(ftoken) == "function" then | |
local token = ftoken(str:sub(x, y)) | |
return (not token) and f() or token | |
else | |
rule = rules[ftoken] | |
return f() | |
end | |
end | |
end | |
error("No rules found for : (" .. i .. " => ".. str:sub(i, math.min(i + 3, str:len())) .. ")") | |
end | |
return f | |
end | |
return iterator(input) -- next_token | |
end | |
return Tokenizer | |
end) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment