Created
February 1, 2021 04:09
-
-
Save rgchris/28637475aea70881276a6a2da3d219fe to your computer and use it in GitHub Desktop.
LZW (De)Compression for R3C
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Rebol [ | |
Title: "LZW (De)Compression Routine" | |
File: %lzw.reb | |
Author: "Christopher Ross-Gill" | |
Date: 31-Jan-2021 | |
Version: 0.1.0 | |
Type: module | |
Name: rgchris.lzw | |
Exports: [lzw] | |
Needs: [%bitwise.reb] ; https://gist.github.com/rgchris/aeaf8b7467906844d4275e8b9af09397 | |
Notes: [ | |
"Made with strong consideration of the LZW functions by Marco Antoniazzi @luce80" | |
http://www.rebol.org/view-script.r?script=compression.r | |
"Overview of LZW with handy pseudocode" | |
https://www.geeksforgeeks.org/lzw-lempel-ziv-welch-compression-technique/ | |
"To Do: GIF can use tables smaller that 255 bytes for images with smaller palettes" | |
"To Do: Ensure the decoder can be hooked, e.g. map values to a colour palette in place" | |
] | |
] | |
lzw: make object! [ | |
table: make object! [ | |
size: 257 | |
table: collect [ | |
; skip code 0 to emulate 0-based table offsets | |
repeat i size [ | |
keep i | |
] | |
] | |
reset: does [ | |
size: 257 | |
clear skip table size | |
] | |
value-of: func [ | |
code [integer!] | |
][ | |
pick table code | |
] | |
code-of: func [ | |
value [binary!] | |
][ | |
case [ | |
tail? next value [ | |
value/1 | |
] | |
value: find table value [ | |
index-of value | |
] | |
] | |
] | |
append: func [ | |
run [binary!] | |
][ | |
insert tail table run | |
size: me + 1 | |
] | |
] | |
encoder: make object! [ | |
encoding: _ | |
width: _ | |
offset: _ | |
open: does [ | |
encoding: copy #{} | |
width: 9 | |
offset: 1 | |
table/reset | |
emit 256 | |
] | |
clear: does [ | |
emit 256 | |
width: 9 | |
table/reset | |
] | |
close: does [ | |
emit 257 | |
width: _ | |
offset: _ | |
head encoding also [encoding: _] | |
] | |
append: func [ | |
run [binary!] | |
][ | |
table/append run | |
switch table/size [ | |
0511 [width: 10] | |
1023 [width: 11] | |
2047 [width: 12] | |
4095 [clear] | |
] | |
] | |
emit: func [ | |
value [integer!] | |
<local> shifted result | |
][ | |
; shift 'value left to 'offset | |
shifted: signed32/shift value left 33 - width - offset | |
; overwrite dest binary with shifted number | |
result: change encoding encoding or+ signed32/encode shifted | |
; update offset and encoding position | |
offset: offset + width | |
while [offset > 8] [ | |
offset: offset - 8 | |
encoding: next encoding | |
] | |
head result | |
] | |
] | |
compress: func [ | |
"LZW compresses a string series and returns it." | |
value [binary! text!] "Data to compress" | |
<local> run extended-run | |
][ | |
if text? value [ | |
value: to binary! value | |
] | |
encoder/open | |
run: copy #{} | |
for-each byte value [ | |
extended-run: join copy run byte | |
either any [ | |
empty? run | |
find table/table extended-run | |
][ | |
run: extended-run | |
][ | |
encoder/emit table/code-of run | |
encoder/append extended-run | |
run: join copy #{} byte | |
] | |
] | |
encoder/emit table/code-of run | |
encoder/close | |
] | |
compress-test-stream: func [ | |
"Used to build test streams" | |
stream [block!] | |
][ | |
assert [did parse stream [some integer! end]] | |
encoder/open | |
encoder/emit 256 | |
for-each value stream [ | |
encoder/emit value | |
] | |
encoder/emit 257 | |
encoder/close | |
] | |
decoder: make object! [ | |
width: _ | |
offset: _ | |
source: _ | |
value: _ | |
open: func [ | |
encoding [binary!] | |
][ | |
table/reset | |
source: :encoding | |
width: 9 | |
offset: 1 | |
value: make binary! length-of encoding | |
] | |
clear: does [ | |
table/reset | |
width: 9 | |
] | |
close: does [ | |
width: _ | |
offset: _ | |
source: _ | |
head value also [value: _] | |
] | |
emit: func [ | |
run [integer! binary! block!] | |
][ | |
join value: tail value either all [ | |
integer? run | |
run > 257 | |
][ | |
table/value-of run | |
][ | |
run | |
] | |
copy value | |
] | |
append: func [ | |
run [binary! block!] | |
][ | |
table/append join copy #{} run | |
switch table/size [ | |
0510 [width: 10] | |
1022 [width: 11] | |
2046 [width: 12] | |
] | |
] | |
masks: make map! [ | |
9 [ | |
-8388608 2143289344 1071644672 535822336 | |
267911168 133955584 66977792 33488896 | |
] | |
10 [ | |
-4194304 2145386496 1072693248 536346624 | |
268173312 134086656 67043328 33521664 | |
] | |
11 [ | |
-2097152 2146435072 1073217536 536608768 | |
268304384 134152192 67076096 33538048 | |
] | |
12 [ | |
-1048576 2146959360 1073479680 536739840 | |
268369920 134184960 67092480 33546240 | |
] | |
] | |
add-mask: func [ | |
"Adds a mask for a given width" ; not used, for possible future use | |
width [integer!] | |
][ | |
assert [width < 32] | |
if not find masks width [ | |
put mask width collect [ | |
repeat offset 8 [ | |
; #80000000 | |
keep signed32/shift (signed32/shift -2147483648 right width - 1) logical offset - 1 | |
] | |
] | |
] | |
] | |
consume: func [ | |
<local> code | |
][ | |
if not empty? source [ ; otherwise infinite loop | |
; extract bits using mask and then shift them to get the right number | |
code: masks/:width/:offset and+ signed32/decode copy/part source 4 | |
code: signed32/shift code logical 33 - width - offset | |
offset: offset + width | |
while [offset > 8] [ | |
offset: offset - 8 | |
source: next source | |
] | |
code | |
] | |
] | |
] | |
decompress: func [ | |
"Decompress LZW compressed binary" | |
encoding [binary!] "Data to decompress" | |
<local> old code | |
][ | |
assert [parse encoding [#{80}]] | |
decoder/open encoding | |
while [ | |
code: decoder/consume | |
][ | |
old: case [ | |
code = 256 [ | |
decoder/clear | |
_ | |
] | |
code = 257 [ | |
break | |
] | |
code < 256 [ | |
code: decoder/emit code | |
if try :old [ | |
decoder/append [old code] | |
] | |
code | |
] | |
blank? try :old [ | |
fail "Invalid LZW Code Stream (need a leading 8-bit code point)" | |
] | |
code <= table/size [ | |
code: decoder/emit code | |
decoder/append [old first code] | |
code | |
] | |
<else> [ | |
code: decoder/emit [old first old] | |
decoder/append code | |
code | |
] | |
] | |
] | |
decoder/close | |
] | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment