Created
January 14, 2020 22:55
-
-
Save huderlem/3066695dc94132f3973478c97d7b68b1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const ( | |
copyByte = iota | |
copyBackReference | |
copyPairs | |
) | |
type command struct { | |
action int | |
length int | |
distance int | |
val byte | |
vals []byte | |
} | |
type control struct { | |
size int | |
val byte | |
needsFlush bool | |
} | |
func (c *control) addBit(bit bool, out, buff []byte) ([]byte, []byte) { | |
return c.addBits([]bool{bit}, out, buff) | |
} | |
func (c *control) addBits(bits []bool, out, buff []byte) ([]byte, []byte) { | |
if len(bits) == 0 { | |
return out, buff | |
} | |
if c.needsFlush { | |
out = append(out, buff...) | |
buff = nil | |
c.needsFlush = false | |
} | |
for _, b := range bits { | |
if b { | |
c.val |= 1 << (7 - c.size) | |
} | |
c.size++ | |
if c.size == 8 { | |
c.needsFlush = true | |
out = append(out, c.val) | |
c.size = 0 | |
c.val = 0 | |
} | |
} | |
if c.size > 0 && c.needsFlush { | |
out = append(out, buff...) | |
buff = nil | |
c.needsFlush = false | |
} | |
return out, buff | |
} | |
func (c *control) handleCopyByte(val byte, out, buff []byte) ([]byte, []byte) { | |
out, buff = c.addBit(false, out, buff) | |
buff = append(buff, val) | |
return out, buff | |
} | |
var lengthCodes = map[int][]bool{ | |
0x2: []bool{true, false}, | |
0x3: []bool{true, true, false}, | |
0x4: []bool{false, false, false}, | |
0x5: []bool{false, true, false}, | |
0x6: []bool{false, false, true, false}, | |
0x7: []bool{false, false, true, true}, | |
0x8: []bool{false, true, true, false}, | |
} | |
var arbitraryLengthCode = []bool{true, true, true} | |
var copyPairsCode = []bool{false, true, true, true} | |
var longDistanceCodes = map[int][]bool{ | |
0x0: []bool{false}, | |
0x1: []bool{true, true, false}, | |
0x2: []bool{true, false, false, false}, | |
0x3: []bool{true, false, false, true}, | |
0x4: []bool{true, false, true, false, true}, | |
0x5: []bool{true, false, true, true, true}, | |
0x6: []bool{true, true, true, false, true}, | |
0x7: []bool{true, true, true, true, true}, | |
0x8: []bool{true, false, true, false, false, false}, | |
0x9: []bool{true, false, true, false, false, true}, | |
0xA: []bool{true, false, true, true, false, false}, | |
0xB: []bool{true, false, true, true, false, true}, | |
0xC: []bool{true, true, true, false, false, false}, | |
0xD: []bool{true, true, true, false, false, true}, | |
0xE: []bool{true, true, true, true, false, false}, | |
0xF: []bool{true, true, true, true, false, true}, | |
} | |
func (c *control) handleCopyBackReference(length, distance int, out, buff []byte) ([]byte, []byte) { | |
if length < 2 { | |
panic("length cannot be less than 2 when handling a back reference") | |
} | |
if distance > 0xFF && length == 2 { | |
panic("distance must be less than 256 if length is 2") | |
} | |
out, buff = c.addBit(true, out, buff) | |
if length <= 8 { | |
out, buff = c.addBits(lengthCodes[length], out, buff) | |
} else { | |
out, buff = c.addBits(arbitraryLengthCode, out, buff) | |
buff = append(buff, byte(length-8)) | |
} | |
if length != 2 { | |
hi := distance >> 8 | |
if hi > 0xF { | |
panic("high byte of distance must be less than 16") | |
} | |
out, buff = c.addBits(longDistanceCodes[hi], out, buff) | |
} | |
buff = append(buff, byte(distance&0xFF)) | |
return out, buff | |
} | |
func (c *control) handleCopyPairs(vals []byte, out, buff []byte) ([]byte, []byte) { | |
if len(vals) < 12 { | |
panic("length of pairs cannot be less than 12") | |
} | |
if len(vals)%4 != 0 { | |
panic("length of pairs must be evenly divisible by 4") | |
} | |
out, buff = c.addBit(true, out, buff) | |
out, buff = c.addBits(copyPairsCode, out, buff) | |
l := len(vals)/4 - 3 | |
numPairsBits := []bool{} | |
for i := 0; i < 4; i++ { | |
b := (l & (1 << (3 - i))) != 0 | |
numPairsBits = append(numPairsBits, b) | |
} | |
out, buff = c.addBits(numPairsBits, out, buff) | |
buff = append(buff, vals...) | |
return out, buff | |
} | |
func (c *control) done(out, buff []byte) []byte { | |
for i := 0; i < 4; i++ { | |
out, buff = c.addBit(true, out, buff) | |
} | |
out, buff = c.addBit(false, out, buff) | |
if c.size > 0 { | |
out = append(out, c.val) | |
} | |
out = append(out, buff...) | |
out = append(out, 0) | |
return out | |
} | |
const maxSingleCopies = 11 | |
const lookaheadSize = 0x100 | |
const windowSize = 0x1000 | |
func compress(data []byte) []byte { | |
unoptimizedCommands := []command{} | |
for i := 0; i < len(data); { | |
length, distance := findPrefix(data, i) | |
if length == 2 && i < len(data)-1 { | |
l2, _ := findPrefix(data, i+1) | |
if l2 > length { | |
length = -1 | |
} | |
} | |
if length > 0 { | |
unoptimizedCommands = append(unoptimizedCommands, command{ | |
action: copyBackReference, | |
length: length, | |
distance: distance - 1, | |
}) | |
i += length | |
} else { | |
unoptimizedCommands = append(unoptimizedCommands, command{ | |
action: copyByte, | |
val: data[i], | |
}) | |
i++ | |
} | |
} | |
// Condense long strings of copyByte commands into copyPairs commands. | |
commands := []command{} | |
for i := 0; i < len(unoptimizedCommands); { | |
count := 0 | |
for j := i; j < len(unoptimizedCommands); j++ { | |
if unoptimizedCommands[j].action == copyByte { | |
count++ | |
} else { | |
break | |
} | |
} | |
if count > 72 { | |
count = 72 | |
} | |
if count >= 12 { | |
skip := count % 4 | |
count -= skip | |
for j := 0; j < skip; j++ { | |
commands = append(commands, unoptimizedCommands[i]) | |
i++ | |
} | |
vals := []byte{} | |
for j := 0; j < count/4; j++ { | |
vals = append(vals, | |
unoptimizedCommands[i].val, | |
unoptimizedCommands[i+1].val, | |
unoptimizedCommands[i+2].val, | |
unoptimizedCommands[i+3].val, | |
) | |
i += 4 | |
} | |
commands = append(commands, command{ | |
action: copyPairs, | |
vals: vals, | |
}) | |
} else { | |
commands = append(commands, unoptimizedCommands[i]) | |
i++ | |
} | |
} | |
ctrl := &control{size: 2, val: 0} | |
var out []byte | |
var buff []byte | |
for _, c := range commands { | |
switch c.action { | |
case copyByte: | |
out, buff = ctrl.handleCopyByte(c.val, out, buff) | |
case copyBackReference: | |
out, buff = ctrl.handleCopyBackReference(c.length, c.distance, out, buff) | |
case copyPairs: | |
out, buff = ctrl.handleCopyPairs(c.vals, out, buff) | |
} | |
} | |
out = ctrl.done(out, buff) | |
return out | |
} | |
func byteSlicesEqual(a, b []byte) bool { | |
if len(a) != len(b) { | |
return false | |
} | |
for i, v := range a { | |
if v != b[i] { | |
return false | |
} | |
} | |
return true | |
} | |
func findPrefix(data []byte, pos int) (int, int) { | |
prefixEndPos := pos + lookaheadSize | |
if prefixEndPos > len(data) { | |
prefixEndPos = len(data) | |
} | |
matchLength := -1 | |
matchDistance := -1 | |
for j := pos + 2; j < prefixEndPos+1; j++ { | |
matched := false | |
end := pos - windowSize | |
if end < 0 { | |
end = 0 | |
} | |
prefix := data[pos:j] | |
for i := pos - 1; i >= end; i-- { | |
for k := pos; k > i; k-- { | |
var candidatePrefix []byte | |
if k == pos { | |
fullRepetitions := len(prefix) / (pos - i) | |
lastRepetitionLen := len(prefix) % (pos - i) | |
candidatePrefix = make([]byte, pos-i) | |
copy(candidatePrefix, data[i:pos]) | |
for n := 0; n < fullRepetitions-1; n++ { | |
candidatePrefix = append(candidatePrefix, data[i:pos]...) | |
} | |
candidatePrefix = append(candidatePrefix, data[i:i+lastRepetitionLen]...) | |
} else { | |
candidatePrefix = data[i:k] | |
} | |
if byteSlicesEqual(prefix, candidatePrefix) { | |
matched = true | |
if len(prefix) > matchLength { | |
if len(prefix) != 2 || pos-i < 256 { | |
matchLength = len(prefix) | |
matchDistance = pos - i | |
} | |
} | |
} | |
} | |
} | |
if !matched { | |
break | |
} | |
} | |
return matchLength, matchDistance | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment