Last active
December 31, 2022 23:33
-
-
Save howmanysmall/de9088b98179c38cbb59554e83daa1cc to your computer and use it in GitHub Desktop.
Fastest (?) Lzw on Luau
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local Memoize = require(script.Memoize) | |
local StringRep = require(script.StringRep) | |
--[=[ | |
A utility library for compressing strings with LZW. This was made as fast | |
as possible by using fun tricks like *lots* of memoization. | |
@class Lzw | |
]=] | |
local Lzw = {} | |
local CharacterDictionary = {} | |
local DictionaryLength = 0 | |
for Index = 32, 127 do | |
if Index ~= 34 and Index ~= 92 then | |
local Character = string.char(Index) | |
CharacterDictionary[Character] = DictionaryLength | |
CharacterDictionary[DictionaryLength] = Character | |
DictionaryLength += 1 | |
end | |
end | |
local EscapeMap = {} | |
for Index = 1, 34 do | |
Index = ({34, 92, 127})[Index - 31] or Index | |
local Character = string.char(Index) | |
local Escaped = string.char(Index + 31) | |
EscapeMap[Character] = Escaped | |
EscapeMap[Escaped] = Character | |
end | |
local function EscapeFunction(Character: string) | |
return "\127" .. EscapeMap[Character] | |
end | |
local function UnescapeFunction(Character: string) | |
return EscapeMap[Character] | |
end | |
local function Escape(String: string) | |
return (string.gsub(String, "[%c\"\\]", EscapeFunction)) | |
end | |
local function Unescape(String: string) | |
return (string.gsub(String, "\127(.)", UnescapeFunction)) | |
end | |
local ToBase93 = Memoize(function(Number: number) | |
local Value = "" | |
repeat | |
local Remainder = Number%93 | |
Value = CharacterDictionary[Remainder] .. Value | |
Number = (Number - Remainder)/93 | |
until Number == 0 | |
return Value | |
end) | |
local Pow93 = Memoize(function(Value: number) | |
return 93^Value | |
end) | |
local ToBase10 = Memoize(function(Value: string) | |
local Number = 0 | |
for Index = 1, #Value do | |
Number += Pow93[Index - 1]*CharacterDictionary[string.sub(Value, -Index, -Index)] | |
end | |
return Number | |
end) | |
local function ListKey(Key: string, Dictionary, Width, Spans, Span, Sequence) | |
local Value = ToBase93[Dictionary[Key]] | |
local ValueLength = #Value | |
local NewWidth = Width | |
local NewSpan = Span | |
if ValueLength > Width then | |
NewWidth, NewSpan, Spans[Width] = ValueLength, 0, Span | |
end | |
table.insert(Sequence, StringRep(" ", NewWidth - ValueLength) .. Value) | |
NewSpan += 1 | |
return NewWidth, NewSpan | |
end | |
--[=[ | |
Compresses the given text using Lzw. | |
@param Text string -- The text you are compressing. | |
@return string | |
]=] | |
function Lzw.Compress(Text: string) | |
local Dictionary = table.clone(CharacterDictionary) | |
local Key = "" | |
local Sequence = {} | |
local Size = #Dictionary | |
local Width = 1 | |
local Spans = {} | |
local Span = 0 | |
Text = Escape(Text) | |
for Index = 1, #Text do | |
local Character = string.sub(Text, Index, Index) | |
local New = Key .. Character | |
if Dictionary[New] then | |
Key = New | |
else | |
Width, Span = ListKey(Key, Dictionary, Width, Spans, Span, Sequence) | |
Key = Character | |
Size += 1 | |
Dictionary[New], Dictionary[Size] = Size, New | |
end | |
end | |
Width, Span = ListKey(Key, Dictionary, Width, Spans, Span, Sequence) | |
Spans[Width] = Span | |
return table.concat(Spans, ",") .. "|" .. table.concat(Sequence) | |
end | |
--[=[ | |
Decompresses the given text using Lzw. | |
@param Text string -- The text you are decompressing. | |
@return string | |
]=] | |
function Lzw.Decompress(Text: string) | |
local Dictionary = table.clone(CharacterDictionary) | |
local Sequence = {} | |
local Spans, Content = string.match(Text, "(.-)|(.*)") | |
local Groups = {} | |
local Start = 1 | |
local Length = 0 | |
for Span in string.gmatch(Spans, "%d+") do | |
Length += 1 | |
Groups[Length] = string.sub(Content, Start, Start + Span*Length - 1) | |
Start += Span*Length | |
end | |
local Previous | |
for Width, Group in ipairs(Groups) do | |
for Value in string.gmatch(Group, StringRep(".", Width)) do | |
local Entry = Dictionary[ToBase10[Value]] | |
if Previous then | |
if Entry then | |
table.insert(Sequence, Entry) | |
table.insert(Dictionary, Previous .. string.sub(Entry, 1, 1)) | |
else | |
Entry = Previous .. string.sub(Previous, 1, 1) | |
end | |
else | |
Sequence[1] = Entry | |
end | |
Previous = Entry | |
end | |
end | |
return Unescape(table.concat(Sequence)) | |
end | |
table.freeze(Lzw) | |
return Lzw |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export type MemoizeFunction<T, U> = (Index: T) -> U | |
local function Memoize<T, U>(Function: MemoizeFunction<T, U>) | |
local Metatable = {} | |
function Metatable:__index(Index) | |
local Value = Function(Index) | |
self[Index] = Value | |
return Value | |
end | |
function Metatable:__call(Index) | |
return self[Index] | |
end | |
return (setmetatable({}, Metatable) :: any) :: MemoizeFunction<T, U> & {[T]: U} | |
end | |
return Memoize |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local RepeatCache = {} | |
local function StringRep(ToRepeat: string, Amount: number): string | |
local StringCache = RepeatCache[ToRepeat] | |
if StringCache == nil then | |
StringCache = {} | |
RepeatCache[ToRepeat] = StringCache | |
end | |
local RepeatString = StringCache[Amount] | |
if RepeatString == nil then | |
RepeatString = string.rep(ToRepeat, Amount) | |
StringCache[Amount] = RepeatString | |
end | |
return RepeatString | |
end | |
return StringRep |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment