Created
June 29, 2011 14:51
-
-
Save Yonaba/1053986 to your computer and use it in GitHub Desktop.
Burrows-Wheeler Transform
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Burrows-Wheeler Transform | |
-- Algorithm used for data compressing implemented through Lua | |
function encode(word) | |
local t={word} | |
local pos = 1 | |
local cat = string.len(word) | |
for i=cat-1,1,-1 do | |
local tmp = string.sub(word,1,i) | |
local comp = string.sub(word,i+1,cat) | |
local str = comp..tmp | |
table.insert(t,str) | |
end | |
table.sort(t) | |
local retStr = "" | |
for k in ipairs(t) do | |
if t[k]==word then pos = k end | |
retStr = retStr..string.sub(t[k],string.len(t[k]),string.len(t[k])) | |
end | |
return pos..retStr | |
end | |
function decode(encword) | |
local size = string.len(encword) | |
local offset = tonumber(string.sub(encword,1,1)) | |
local encword = string.sub(encword,2,size) | |
local t={} | |
for n=0,string.len(encword),1 do | |
for i=1,string.len(encword),1 do | |
if t[i] then t[i]=string.sub(encword,i,i)..t[i] | |
else t[i]="" | |
end | |
end | |
table.sort(t) | |
end | |
return (t[offset]) | |
end | |
--Sample Test | |
local str = "Roland" | |
for w in string.gfind(str, "%w+") do | |
local enc = encode(w) | |
local dec = decode(enc) | |
print("\nOriginal String: "..w) | |
print("B-W String: "..enc) | |
print("Decoded String: "..dec) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
There is a minor bug here!
local offset = tonumber(string.sub(encword,1,1)) local encword = string.sub(encword,2,size)
The above lines assume that the offset is a single digit value.
If the offset has more that 1 digit, it breaks it.