Skip to content

Instantly share code, notes, and snippets.

@huderlem
Created January 14, 2020 22:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save huderlem/3066695dc94132f3973478c97d7b68b1 to your computer and use it in GitHub Desktop.
Save huderlem/3066695dc94132f3973478c97d7b68b1 to your computer and use it in GitHub Desktop.
const (
copyByte = iota
copyBackReference
copyPairs
)
type command struct {
action int
length int
distance int
val byte
vals []byte
}
type control struct {
size int
val byte
needsFlush bool
}
func (c *control) addBit(bit bool, out, buff []byte) ([]byte, []byte) {
return c.addBits([]bool{bit}, out, buff)
}
func (c *control) addBits(bits []bool, out, buff []byte) ([]byte, []byte) {
if len(bits) == 0 {
return out, buff
}
if c.needsFlush {
out = append(out, buff...)
buff = nil
c.needsFlush = false
}
for _, b := range bits {
if b {
c.val |= 1 << (7 - c.size)
}
c.size++
if c.size == 8 {
c.needsFlush = true
out = append(out, c.val)
c.size = 0
c.val = 0
}
}
if c.size > 0 && c.needsFlush {
out = append(out, buff...)
buff = nil
c.needsFlush = false
}
return out, buff
}
func (c *control) handleCopyByte(val byte, out, buff []byte) ([]byte, []byte) {
out, buff = c.addBit(false, out, buff)
buff = append(buff, val)
return out, buff
}
var lengthCodes = map[int][]bool{
0x2: []bool{true, false},
0x3: []bool{true, true, false},
0x4: []bool{false, false, false},
0x5: []bool{false, true, false},
0x6: []bool{false, false, true, false},
0x7: []bool{false, false, true, true},
0x8: []bool{false, true, true, false},
}
var arbitraryLengthCode = []bool{true, true, true}
var copyPairsCode = []bool{false, true, true, true}
var longDistanceCodes = map[int][]bool{
0x0: []bool{false},
0x1: []bool{true, true, false},
0x2: []bool{true, false, false, false},
0x3: []bool{true, false, false, true},
0x4: []bool{true, false, true, false, true},
0x5: []bool{true, false, true, true, true},
0x6: []bool{true, true, true, false, true},
0x7: []bool{true, true, true, true, true},
0x8: []bool{true, false, true, false, false, false},
0x9: []bool{true, false, true, false, false, true},
0xA: []bool{true, false, true, true, false, false},
0xB: []bool{true, false, true, true, false, true},
0xC: []bool{true, true, true, false, false, false},
0xD: []bool{true, true, true, false, false, true},
0xE: []bool{true, true, true, true, false, false},
0xF: []bool{true, true, true, true, false, true},
}
func (c *control) handleCopyBackReference(length, distance int, out, buff []byte) ([]byte, []byte) {
if length < 2 {
panic("length cannot be less than 2 when handling a back reference")
}
if distance > 0xFF && length == 2 {
panic("distance must be less than 256 if length is 2")
}
out, buff = c.addBit(true, out, buff)
if length <= 8 {
out, buff = c.addBits(lengthCodes[length], out, buff)
} else {
out, buff = c.addBits(arbitraryLengthCode, out, buff)
buff = append(buff, byte(length-8))
}
if length != 2 {
hi := distance >> 8
if hi > 0xF {
panic("high byte of distance must be less than 16")
}
out, buff = c.addBits(longDistanceCodes[hi], out, buff)
}
buff = append(buff, byte(distance&0xFF))
return out, buff
}
func (c *control) handleCopyPairs(vals []byte, out, buff []byte) ([]byte, []byte) {
if len(vals) < 12 {
panic("length of pairs cannot be less than 12")
}
if len(vals)%4 != 0 {
panic("length of pairs must be evenly divisible by 4")
}
out, buff = c.addBit(true, out, buff)
out, buff = c.addBits(copyPairsCode, out, buff)
l := len(vals)/4 - 3
numPairsBits := []bool{}
for i := 0; i < 4; i++ {
b := (l & (1 << (3 - i))) != 0
numPairsBits = append(numPairsBits, b)
}
out, buff = c.addBits(numPairsBits, out, buff)
buff = append(buff, vals...)
return out, buff
}
func (c *control) done(out, buff []byte) []byte {
for i := 0; i < 4; i++ {
out, buff = c.addBit(true, out, buff)
}
out, buff = c.addBit(false, out, buff)
if c.size > 0 {
out = append(out, c.val)
}
out = append(out, buff...)
out = append(out, 0)
return out
}
const maxSingleCopies = 11
const lookaheadSize = 0x100
const windowSize = 0x1000
func compress(data []byte) []byte {
unoptimizedCommands := []command{}
for i := 0; i < len(data); {
length, distance := findPrefix(data, i)
if length == 2 && i < len(data)-1 {
l2, _ := findPrefix(data, i+1)
if l2 > length {
length = -1
}
}
if length > 0 {
unoptimizedCommands = append(unoptimizedCommands, command{
action: copyBackReference,
length: length,
distance: distance - 1,
})
i += length
} else {
unoptimizedCommands = append(unoptimizedCommands, command{
action: copyByte,
val: data[i],
})
i++
}
}
// Condense long strings of copyByte commands into copyPairs commands.
commands := []command{}
for i := 0; i < len(unoptimizedCommands); {
count := 0
for j := i; j < len(unoptimizedCommands); j++ {
if unoptimizedCommands[j].action == copyByte {
count++
} else {
break
}
}
if count > 72 {
count = 72
}
if count >= 12 {
skip := count % 4
count -= skip
for j := 0; j < skip; j++ {
commands = append(commands, unoptimizedCommands[i])
i++
}
vals := []byte{}
for j := 0; j < count/4; j++ {
vals = append(vals,
unoptimizedCommands[i].val,
unoptimizedCommands[i+1].val,
unoptimizedCommands[i+2].val,
unoptimizedCommands[i+3].val,
)
i += 4
}
commands = append(commands, command{
action: copyPairs,
vals: vals,
})
} else {
commands = append(commands, unoptimizedCommands[i])
i++
}
}
ctrl := &control{size: 2, val: 0}
var out []byte
var buff []byte
for _, c := range commands {
switch c.action {
case copyByte:
out, buff = ctrl.handleCopyByte(c.val, out, buff)
case copyBackReference:
out, buff = ctrl.handleCopyBackReference(c.length, c.distance, out, buff)
case copyPairs:
out, buff = ctrl.handleCopyPairs(c.vals, out, buff)
}
}
out = ctrl.done(out, buff)
return out
}
func byteSlicesEqual(a, b []byte) bool {
if len(a) != len(b) {
return false
}
for i, v := range a {
if v != b[i] {
return false
}
}
return true
}
func findPrefix(data []byte, pos int) (int, int) {
prefixEndPos := pos + lookaheadSize
if prefixEndPos > len(data) {
prefixEndPos = len(data)
}
matchLength := -1
matchDistance := -1
for j := pos + 2; j < prefixEndPos+1; j++ {
matched := false
end := pos - windowSize
if end < 0 {
end = 0
}
prefix := data[pos:j]
for i := pos - 1; i >= end; i-- {
for k := pos; k > i; k-- {
var candidatePrefix []byte
if k == pos {
fullRepetitions := len(prefix) / (pos - i)
lastRepetitionLen := len(prefix) % (pos - i)
candidatePrefix = make([]byte, pos-i)
copy(candidatePrefix, data[i:pos])
for n := 0; n < fullRepetitions-1; n++ {
candidatePrefix = append(candidatePrefix, data[i:pos]...)
}
candidatePrefix = append(candidatePrefix, data[i:i+lastRepetitionLen]...)
} else {
candidatePrefix = data[i:k]
}
if byteSlicesEqual(prefix, candidatePrefix) {
matched = true
if len(prefix) > matchLength {
if len(prefix) != 2 || pos-i < 256 {
matchLength = len(prefix)
matchDistance = pos - i
}
}
}
}
}
if !matched {
break
}
}
return matchLength, matchDistance
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment