Skip to content

Instantly share code, notes, and snippets.

@rgchris
Created February 1, 2021 04:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rgchris/28637475aea70881276a6a2da3d219fe to your computer and use it in GitHub Desktop.
Save rgchris/28637475aea70881276a6a2da3d219fe to your computer and use it in GitHub Desktop.
LZW (De)Compression for R3C
Rebol [
Title: "LZW (De)Compression Routine"
File: %lzw.reb
Author: "Christopher Ross-Gill"
Date: 31-Jan-2021
Version: 0.1.0
Type: module
Name: rgchris.lzw
Exports: [lzw]
Needs: [%bitwise.reb] ; https://gist.github.com/rgchris/aeaf8b7467906844d4275e8b9af09397
Notes: [
"Made with strong consideration of the LZW functions by Marco Antoniazzi @luce80"
http://www.rebol.org/view-script.r?script=compression.r
"Overview of LZW with handy pseudocode"
https://www.geeksforgeeks.org/lzw-lempel-ziv-welch-compression-technique/
"To Do: GIF can use tables smaller that 255 bytes for images with smaller palettes"
"To Do: Ensure the decoder can be hooked, e.g. map values to a colour palette in place"
]
]
lzw: make object! [
table: make object! [
size: 257
table: collect [
; skip code 0 to emulate 0-based table offsets
repeat i size [
keep i
]
]
reset: does [
size: 257
clear skip table size
]
value-of: func [
code [integer!]
][
pick table code
]
code-of: func [
value [binary!]
][
case [
tail? next value [
value/1
]
value: find table value [
index-of value
]
]
]
append: func [
run [binary!]
][
insert tail table run
size: me + 1
]
]
encoder: make object! [
encoding: _
width: _
offset: _
open: does [
encoding: copy #{}
width: 9
offset: 1
table/reset
emit 256
]
clear: does [
emit 256
width: 9
table/reset
]
close: does [
emit 257
width: _
offset: _
head encoding also [encoding: _]
]
append: func [
run [binary!]
][
table/append run
switch table/size [
0511 [width: 10]
1023 [width: 11]
2047 [width: 12]
4095 [clear]
]
]
emit: func [
value [integer!]
<local> shifted result
][
; shift 'value left to 'offset
shifted: signed32/shift value left 33 - width - offset
; overwrite dest binary with shifted number
result: change encoding encoding or+ signed32/encode shifted
; update offset and encoding position
offset: offset + width
while [offset > 8] [
offset: offset - 8
encoding: next encoding
]
head result
]
]
compress: func [
"LZW compresses a string series and returns it."
value [binary! text!] "Data to compress"
<local> run extended-run
][
if text? value [
value: to binary! value
]
encoder/open
run: copy #{}
for-each byte value [
extended-run: join copy run byte
either any [
empty? run
find table/table extended-run
][
run: extended-run
][
encoder/emit table/code-of run
encoder/append extended-run
run: join copy #{} byte
]
]
encoder/emit table/code-of run
encoder/close
]
compress-test-stream: func [
"Used to build test streams"
stream [block!]
][
assert [did parse stream [some integer! end]]
encoder/open
encoder/emit 256
for-each value stream [
encoder/emit value
]
encoder/emit 257
encoder/close
]
decoder: make object! [
width: _
offset: _
source: _
value: _
open: func [
encoding [binary!]
][
table/reset
source: :encoding
width: 9
offset: 1
value: make binary! length-of encoding
]
clear: does [
table/reset
width: 9
]
close: does [
width: _
offset: _
source: _
head value also [value: _]
]
emit: func [
run [integer! binary! block!]
][
join value: tail value either all [
integer? run
run > 257
][
table/value-of run
][
run
]
copy value
]
append: func [
run [binary! block!]
][
table/append join copy #{} run
switch table/size [
0510 [width: 10]
1022 [width: 11]
2046 [width: 12]
]
]
masks: make map! [
9 [
-8388608 2143289344 1071644672 535822336
267911168 133955584 66977792 33488896
]
10 [
-4194304 2145386496 1072693248 536346624
268173312 134086656 67043328 33521664
]
11 [
-2097152 2146435072 1073217536 536608768
268304384 134152192 67076096 33538048
]
12 [
-1048576 2146959360 1073479680 536739840
268369920 134184960 67092480 33546240
]
]
add-mask: func [
"Adds a mask for a given width" ; not used, for possible future use
width [integer!]
][
assert [width < 32]
if not find masks width [
put mask width collect [
repeat offset 8 [
; #80000000
keep signed32/shift (signed32/shift -2147483648 right width - 1) logical offset - 1
]
]
]
]
consume: func [
<local> code
][
if not empty? source [ ; otherwise infinite loop
; extract bits using mask and then shift them to get the right number
code: masks/:width/:offset and+ signed32/decode copy/part source 4
code: signed32/shift code logical 33 - width - offset
offset: offset + width
while [offset > 8] [
offset: offset - 8
source: next source
]
code
]
]
]
decompress: func [
"Decompress LZW compressed binary"
encoding [binary!] "Data to decompress"
<local> old code
][
assert [parse encoding [#{80}]]
decoder/open encoding
while [
code: decoder/consume
][
old: case [
code = 256 [
decoder/clear
_
]
code = 257 [
break
]
code < 256 [
code: decoder/emit code
if try :old [
decoder/append [old code]
]
code
]
blank? try :old [
fail "Invalid LZW Code Stream (need a leading 8-bit code point)"
]
code <= table/size [
code: decoder/emit code
decoder/append [old first code]
code
]
<else> [
code: decoder/emit [old first old]
decoder/append code
code
]
]
]
decoder/close
]
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment