Skip to content

Instantly share code, notes, and snippets.

@josharian
Created May 16, 2017 19:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save josharian/e0bc6e238d4914a44289b44bc4ae3640 to your computer and use it in GitHub Desktop.
Save josharian/e0bc6e238d4914a44289b44bc4ae3640 to your computer and use it in GitHub Desktop.
extract for go issue 16122
/*
Copyright 2011-2013 Frederic Langlet
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package p
import (
"bytes"
"errors"
"fmt"
"math/rand"
"testing"
"time"
)
// func main() {
// fmt.Printf("TestZRLT\n")
// TestCorrectness()
// TestSpeed()
// }
func TestCorrectness(t *testing.T) {
// fmt.Printf("Correctness test\n")
for ii := 0; ii < 20; ii++ {
rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
// fmt.Printf("\nTest %v\n", ii)
var arr []int
if ii == 0 {
arr2 := [64]int{30, 0, 4, 0, 30, 17, 240, 0, 0, 0, 12, 2, 23, 17, 254, 0, 254, 24, 0, 24, 20, 0, 17, 0, 254, 0, 254, 32, 0, 0, 13, 32, 255, 31, 15, 25, 0, 29, 8, 24, 20, 0, 0, 0, 245, 253, 0, 32, 19, 0, 243, 0, 0, 0, 13, 244, 24, 4, 0, 7, 25, 2, 0, 15}
arr = arr2[0:len(arr2)]
} else {
arr = make([]int, 64)
for i := range arr {
val := rnd.Intn(100) - 16
if val >= 33 {
val = 0
}
arr[i] = val
}
}
size := len(arr)
input := make([]byte, size)
output := make([]byte, size*3/2)
reverse := make([]byte, size)
for i := range output {
output[i] = 0x55
}
for i := range arr {
if i == len(arr)/2 {
input[i] = 255
} else {
input[i] = byte(arr[i])
}
}
ZRLT, err := NewZRLT()
if err != nil {
t.Errorf("\nError: %v\n", err)
continue
}
// fmt.Printf("\nOriginal: ")
// for i := range input {
// if i%100 == 0 {
// fmt.Printf("\n")
// }
// fmt.Printf("%v ", input[i])
// }
// fmt.Printf("\nCoded: ")
_, dstIdx, err2 := ZRLT.Forward(input, output)
if err2 != nil {
t.Errorf("\nEncoding error: %v\n", err2)
continue
}
// for i := uint(0); i < dstIdx; i++ {
// if i%100 == 0 {
// fmt.Printf("\n")
// }
// fmt.Printf("%v ", output[i])
// }
// fmt.Printf(" (Compression ratio: %v%%)", dstIdx*100/srcIdx)
ZRLT, err2 = NewZRLT()
if err2 != nil {
t.Errorf("\nError: %v\n", err2)
continue
}
_, _, err2 = ZRLT.Inverse(output[0:dstIdx], reverse)
if err2 != nil {
t.Errorf("\nDecoding error: %v\n", err2)
continue
}
// fmt.Printf("\nDecoded: ")
ok := true
for i := range input {
// if i%100 == 0 {
// fmt.Printf("\n")
// }
// fmt.Printf("%v ", reverse[i])
if reverse[i] != input[i] {
ok = false
}
}
if !ok {
t.Fatalf("\nDifferent\n")
}
}
}
func BenchmarkSpeed(b *testing.B) {
for _, direction := range [...]string{"encode", "decode"} {
b.Run(direction, func(b *testing.B) {
b.StopTimer()
for jj := 0; jj < 3; jj++ {
input := make([]byte, 50000)
output := make([]byte, len(input)*2)
reverse := make([]byte, len(input))
// Generate random data with runs
n := 0
var compressed uint
var err error
for n < len(input) {
val := byte(rand.Intn(7))
input[n] = val
n++
run := rand.Intn(128)
run -= 100
for run > 0 && n < len(input) {
input[n] = val
n++
run--
}
}
if direction == "encode" {
b.StartTimer()
}
for ii := 0; ii < b.N; ii++ {
zrlt, _ := NewZRLT()
if _, compressed, err = zrlt.Forward(input, output); err != nil {
b.Fatalf("Encoding error: %v\n", err)
}
}
if direction == "encode" {
b.StopTimer()
}
if direction == "decode" {
b.StartTimer()
}
for ii := 0; ii < b.N; ii++ {
zrlt, _ := NewZRLT()
if _, _, err = zrlt.Inverse(output[0:compressed], reverse); err != nil {
b.Fatalf("Decoding error: %v\n", err)
}
}
if direction == "decode" {
b.StopTimer()
}
idx := -1
// Sanity check
for i := range input {
if input[i] != reverse[i] {
idx = i
break
}
}
if idx >= 0 {
b.Fatalf("Failure at index %v (%v <-> %v)\n", idx, input[idx], reverse[idx])
}
}
})
}
}
// Zero Length Encoding is a simple encoding algorithm by Wheeler
// closely related to Run Length Encoding. The main difference is
// that only runs of 0 values are processed. Also, the length is
// encoded in a different way (each digit in a different byte)
// This algorithm is well adapted to process post BWT/MTFT data
const (
ZRLT_MAX_RUN = int(1<<31) - 1
)
type ZRLT struct {
}
func NewZRLT() (*ZRLT, error) {
this := new(ZRLT)
return this, nil
}
func (this *ZRLT) Forward(src, dst []byte) (uint, uint, error) {
if src == nil {
return uint(0), uint(0), errors.New("Invalid null source buffer")
}
if dst == nil {
return uint(0), uint(0), errors.New("Invalid null destination buffer")
}
if SameByteSlices(src, dst, false) {
return 0, 0, errors.New("Input and output buffers cannot be equal")
}
if n := this.MaxEncodedLen(len(src)); len(dst) < n {
return 0, 0, fmt.Errorf("Output buffer is too small - size: %d, required %d", len(dst), n)
}
srcEnd, dstEnd := uint(len(src)), uint(len(dst))
dstEnd2 := dstEnd - 2
runLength := 1
srcIdx, dstIdx := uint(0), uint(0)
var err error
for srcIdx < srcEnd && dstIdx < dstEnd {
val := src[srcIdx]
if val == 0 {
runLength++
srcIdx++
if srcIdx < srcEnd && runLength < ZRLT_MAX_RUN {
continue
}
}
if runLength > 1 {
// Encode length
log2 := uint(1)
for runLength>>log2 > 1 {
log2++
}
if dstIdx >= dstEnd-log2 {
break
}
// Write every bit as a byte except the most significant one
for log2 > 0 {
log2--
dst[dstIdx] = byte((runLength >> log2) & 1)
dstIdx++
}
runLength = 1
continue
}
if val >= 0xFE {
if dstIdx >= dstEnd2 {
break
}
dst[dstIdx] = 0xFF
dstIdx++
dst[dstIdx] = val - 0xFE
dstIdx++
} else {
dst[dstIdx] = val + 1
dstIdx++
}
srcIdx++
}
if srcIdx != srcEnd || runLength != 1 {
err = errors.New("Output buffer is too small")
}
return srcIdx, dstIdx, err
}
func (this *ZRLT) Inverse(src, dst []byte) (uint, uint, error) {
if src == nil {
return uint(0), uint(0), errors.New("Invalid null source buffer")
}
if dst == nil {
return uint(0), uint(0), errors.New("Invalid null destination buffer")
}
if SameByteSlices(src, dst, false) {
return 0, 0, errors.New("Input and output buffers cannot be equal")
}
srcEnd, dstEnd := len(src), len(dst)
runLength := 1
srcIdx, dstIdx := 0, 0
var err error
for srcIdx < srcEnd && dstIdx < dstEnd {
if runLength > 1 {
runLength--
dst[dstIdx] = 0
dstIdx++
continue
}
val := src[srcIdx]
if val <= 1 {
// Generate the run length bit by bit (but force MSB)
runLength = 1
for val&1 == val {
runLength = (runLength << 1) | int(val)
srcIdx++
if srcIdx >= srcEnd {
break
}
val = src[srcIdx]
}
continue
}
// Regular data processing
if val == 0xFF {
srcIdx++
if srcIdx >= srcEnd {
break
}
dst[dstIdx] = 0xFE + src[srcIdx]
} else {
dst[dstIdx] = val - 1
}
dstIdx++
srcIdx++
}
// If runLength is not 1, add trailing 0s
end := dstIdx + runLength - 1
if end > dstEnd {
err = errors.New("Output buffer is too small")
} else {
for dstIdx < end {
dst[dstIdx] = 0
dstIdx++
}
if srcIdx < srcEnd {
err = errors.New("Output buffer is too small")
}
}
return uint(srcIdx), uint(dstIdx), err
}
// Required encoding output buffer size unknown
func (this ZRLT) MaxEncodedLen(srcLen int) int {
return srcLen
}
func SameByteSlices(slice1, slice2 []byte, deepCheck bool) bool {
if slice2 == nil {
return slice1 == nil
}
if slice1 == nil {
return false
}
if &slice1 == &slice2 {
return true
}
if len(slice1) != len(slice2) {
return false
}
if deepCheck == true {
return bytes.Equal(slice1, slice2)
} else {
return false
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment