const ( | |
copyByte = iota | |
copyBackReference | |
copyPairs | |
) | |
type command struct { | |
action int | |
length int | |
distance int | |
val byte | |
vals []byte | |
} | |
type control struct { | |
size int | |
val byte | |
needsFlush bool | |
} | |
func (c *control) addBit(bit bool, out, buff []byte) ([]byte, []byte) { | |
return c.addBits([]bool{bit}, out, buff) | |
} | |
func (c *control) addBits(bits []bool, out, buff []byte) ([]byte, []byte) { | |
if len(bits) == 0 { | |
return out, buff | |
} | |
if c.needsFlush { | |
out = append(out, buff...) | |
buff = nil | |
c.needsFlush = false | |
} | |
for _, b := range bits { | |
if b { | |
c.val |= 1 << (7 - c.size) | |
} | |
c.size++ | |
if c.size == 8 { | |
c.needsFlush = true | |
out = append(out, c.val) | |
c.size = 0 | |
c.val = 0 | |
} | |
} | |
if c.size > 0 && c.needsFlush { | |
out = append(out, buff...) | |
buff = nil | |
c.needsFlush = false | |
} | |
return out, buff | |
} | |
func (c *control) handleCopyByte(val byte, out, buff []byte) ([]byte, []byte) { | |
out, buff = c.addBit(false, out, buff) | |
buff = append(buff, val) | |
return out, buff | |
} | |
var lengthCodes = map[int][]bool{ | |
0x2: []bool{true, false}, | |
0x3: []bool{true, true, false}, | |
0x4: []bool{false, false, false}, | |
0x5: []bool{false, true, false}, | |
0x6: []bool{false, false, true, false}, | |
0x7: []bool{false, false, true, true}, | |
0x8: []bool{false, true, true, false}, | |
} | |
var arbitraryLengthCode = []bool{true, true, true} | |
var copyPairsCode = []bool{false, true, true, true} | |
var longDistanceCodes = map[int][]bool{ | |
0x0: []bool{false}, | |
0x1: []bool{true, true, false}, | |
0x2: []bool{true, false, false, false}, | |
0x3: []bool{true, false, false, true}, | |
0x4: []bool{true, false, true, false, true}, | |
0x5: []bool{true, false, true, true, true}, | |
0x6: []bool{true, true, true, false, true}, | |
0x7: []bool{true, true, true, true, true}, | |
0x8: []bool{true, false, true, false, false, false}, | |
0x9: []bool{true, false, true, false, false, true}, | |
0xA: []bool{true, false, true, true, false, false}, | |
0xB: []bool{true, false, true, true, false, true}, | |
0xC: []bool{true, true, true, false, false, false}, | |
0xD: []bool{true, true, true, false, false, true}, | |
0xE: []bool{true, true, true, true, false, false}, | |
0xF: []bool{true, true, true, true, false, true}, | |
} | |
func (c *control) handleCopyBackReference(length, distance int, out, buff []byte) ([]byte, []byte) { | |
if length < 2 { | |
panic("length cannot be less than 2 when handling a back reference") | |
} | |
if distance > 0xFF && length == 2 { | |
panic("distance must be less than 256 if length is 2") | |
} | |
out, buff = c.addBit(true, out, buff) | |
if length <= 8 { | |
out, buff = c.addBits(lengthCodes[length], out, buff) | |
} else { | |
out, buff = c.addBits(arbitraryLengthCode, out, buff) | |
buff = append(buff, byte(length-8)) | |
} | |
if length != 2 { | |
hi := distance >> 8 | |
if hi > 0xF { | |
panic("high byte of distance must be less than 16") | |
} | |
out, buff = c.addBits(longDistanceCodes[hi], out, buff) | |
} | |
buff = append(buff, byte(distance&0xFF)) | |
return out, buff | |
} | |
func (c *control) handleCopyPairs(vals []byte, out, buff []byte) ([]byte, []byte) { | |
if len(vals) < 12 { | |
panic("length of pairs cannot be less than 12") | |
} | |
if len(vals)%4 != 0 { | |
panic("length of pairs must be evenly divisible by 4") | |
} | |
out, buff = c.addBit(true, out, buff) | |
out, buff = c.addBits(copyPairsCode, out, buff) | |
l := len(vals)/4 - 3 | |
numPairsBits := []bool{} | |
for i := 0; i < 4; i++ { | |
b := (l & (1 << (3 - i))) != 0 | |
numPairsBits = append(numPairsBits, b) | |
} | |
out, buff = c.addBits(numPairsBits, out, buff) | |
buff = append(buff, vals...) | |
return out, buff | |
} | |
func (c *control) done(out, buff []byte) []byte { | |
for i := 0; i < 4; i++ { | |
out, buff = c.addBit(true, out, buff) | |
} | |
out, buff = c.addBit(false, out, buff) | |
if c.size > 0 { | |
out = append(out, c.val) | |
} | |
out = append(out, buff...) | |
out = append(out, 0) | |
return out | |
} | |
const maxSingleCopies = 11 | |
const lookaheadSize = 0x100 | |
const windowSize = 0x1000 | |
func compress(data []byte) []byte { | |
unoptimizedCommands := []command{} | |
for i := 0; i < len(data); { | |
length, distance := findPrefix(data, i) | |
if length == 2 && i < len(data)-1 { | |
l2, _ := findPrefix(data, i+1) | |
if l2 > length { | |
length = -1 | |
} | |
} | |
if length > 0 { | |
unoptimizedCommands = append(unoptimizedCommands, command{ | |
action: copyBackReference, | |
length: length, | |
distance: distance - 1, | |
}) | |
i += length | |
} else { | |
unoptimizedCommands = append(unoptimizedCommands, command{ | |
action: copyByte, | |
val: data[i], | |
}) | |
i++ | |
} | |
} | |
// Condense long strings of copyByte commands into copyPairs commands. | |
commands := []command{} | |
for i := 0; i < len(unoptimizedCommands); { | |
count := 0 | |
for j := i; j < len(unoptimizedCommands); j++ { | |
if unoptimizedCommands[j].action == copyByte { | |
count++ | |
} else { | |
break | |
} | |
} | |
if count > 72 { | |
count = 72 | |
} | |
if count >= 12 { | |
skip := count % 4 | |
count -= skip | |
for j := 0; j < skip; j++ { | |
commands = append(commands, unoptimizedCommands[i]) | |
i++ | |
} | |
vals := []byte{} | |
for j := 0; j < count/4; j++ { | |
vals = append(vals, | |
unoptimizedCommands[i].val, | |
unoptimizedCommands[i+1].val, | |
unoptimizedCommands[i+2].val, | |
unoptimizedCommands[i+3].val, | |
) | |
i += 4 | |
} | |
commands = append(commands, command{ | |
action: copyPairs, | |
vals: vals, | |
}) | |
} else { | |
commands = append(commands, unoptimizedCommands[i]) | |
i++ | |
} | |
} | |
ctrl := &control{size: 2, val: 0} | |
var out []byte | |
var buff []byte | |
for _, c := range commands { | |
switch c.action { | |
case copyByte: | |
out, buff = ctrl.handleCopyByte(c.val, out, buff) | |
case copyBackReference: | |
out, buff = ctrl.handleCopyBackReference(c.length, c.distance, out, buff) | |
case copyPairs: | |
out, buff = ctrl.handleCopyPairs(c.vals, out, buff) | |
} | |
} | |
out = ctrl.done(out, buff) | |
return out | |
} | |
func byteSlicesEqual(a, b []byte) bool { | |
if len(a) != len(b) { | |
return false | |
} | |
for i, v := range a { | |
if v != b[i] { | |
return false | |
} | |
} | |
return true | |
} | |
func findPrefix(data []byte, pos int) (int, int) { | |
prefixEndPos := pos + lookaheadSize | |
if prefixEndPos > len(data) { | |
prefixEndPos = len(data) | |
} | |
matchLength := -1 | |
matchDistance := -1 | |
for j := pos + 2; j < prefixEndPos+1; j++ { | |
matched := false | |
end := pos - windowSize | |
if end < 0 { | |
end = 0 | |
} | |
prefix := data[pos:j] | |
for i := pos - 1; i >= end; i-- { | |
for k := pos; k > i; k-- { | |
var candidatePrefix []byte | |
if k == pos { | |
fullRepetitions := len(prefix) / (pos - i) | |
lastRepetitionLen := len(prefix) % (pos - i) | |
candidatePrefix = make([]byte, pos-i) | |
copy(candidatePrefix, data[i:pos]) | |
for n := 0; n < fullRepetitions-1; n++ { | |
candidatePrefix = append(candidatePrefix, data[i:pos]...) | |
} | |
candidatePrefix = append(candidatePrefix, data[i:i+lastRepetitionLen]...) | |
} else { | |
candidatePrefix = data[i:k] | |
} | |
if byteSlicesEqual(prefix, candidatePrefix) { | |
matched = true | |
if len(prefix) > matchLength { | |
if len(prefix) != 2 || pos-i < 256 { | |
matchLength = len(prefix) | |
matchDistance = pos - i | |
} | |
} | |
} | |
} | |
} | |
if !matched { | |
break | |
} | |
} | |
return matchLength, matchDistance | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment