Created
May 16, 2017 19:15
-
-
Save josharian/e0bc6e238d4914a44289b44bc4ae3640 to your computer and use it in GitHub Desktop.
extract for go issue 16122
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Copyright 2011-2013 Frederic Langlet | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
you may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. | |
*/ | |
package p | |
import ( | |
"bytes" | |
"errors" | |
"fmt" | |
"math/rand" | |
"testing" | |
"time" | |
) | |
// func main() { | |
// fmt.Printf("TestZRLT\n") | |
// TestCorrectness() | |
// TestSpeed() | |
// } | |
func TestCorrectness(t *testing.T) { | |
// fmt.Printf("Correctness test\n") | |
for ii := 0; ii < 20; ii++ { | |
rnd := rand.New(rand.NewSource(time.Now().UnixNano())) | |
// fmt.Printf("\nTest %v\n", ii) | |
var arr []int | |
if ii == 0 { | |
arr2 := [64]int{30, 0, 4, 0, 30, 17, 240, 0, 0, 0, 12, 2, 23, 17, 254, 0, 254, 24, 0, 24, 20, 0, 17, 0, 254, 0, 254, 32, 0, 0, 13, 32, 255, 31, 15, 25, 0, 29, 8, 24, 20, 0, 0, 0, 245, 253, 0, 32, 19, 0, 243, 0, 0, 0, 13, 244, 24, 4, 0, 7, 25, 2, 0, 15} | |
arr = arr2[0:len(arr2)] | |
} else { | |
arr = make([]int, 64) | |
for i := range arr { | |
val := rnd.Intn(100) - 16 | |
if val >= 33 { | |
val = 0 | |
} | |
arr[i] = val | |
} | |
} | |
size := len(arr) | |
input := make([]byte, size) | |
output := make([]byte, size*3/2) | |
reverse := make([]byte, size) | |
for i := range output { | |
output[i] = 0x55 | |
} | |
for i := range arr { | |
if i == len(arr)/2 { | |
input[i] = 255 | |
} else { | |
input[i] = byte(arr[i]) | |
} | |
} | |
ZRLT, err := NewZRLT() | |
if err != nil { | |
t.Errorf("\nError: %v\n", err) | |
continue | |
} | |
// fmt.Printf("\nOriginal: ") | |
// for i := range input { | |
// if i%100 == 0 { | |
// fmt.Printf("\n") | |
// } | |
// fmt.Printf("%v ", input[i]) | |
// } | |
// fmt.Printf("\nCoded: ") | |
_, dstIdx, err2 := ZRLT.Forward(input, output) | |
if err2 != nil { | |
t.Errorf("\nEncoding error: %v\n", err2) | |
continue | |
} | |
// for i := uint(0); i < dstIdx; i++ { | |
// if i%100 == 0 { | |
// fmt.Printf("\n") | |
// } | |
// fmt.Printf("%v ", output[i]) | |
// } | |
// fmt.Printf(" (Compression ratio: %v%%)", dstIdx*100/srcIdx) | |
ZRLT, err2 = NewZRLT() | |
if err2 != nil { | |
t.Errorf("\nError: %v\n", err2) | |
continue | |
} | |
_, _, err2 = ZRLT.Inverse(output[0:dstIdx], reverse) | |
if err2 != nil { | |
t.Errorf("\nDecoding error: %v\n", err2) | |
continue | |
} | |
// fmt.Printf("\nDecoded: ") | |
ok := true | |
for i := range input { | |
// if i%100 == 0 { | |
// fmt.Printf("\n") | |
// } | |
// fmt.Printf("%v ", reverse[i]) | |
if reverse[i] != input[i] { | |
ok = false | |
} | |
} | |
if !ok { | |
t.Fatalf("\nDifferent\n") | |
} | |
} | |
} | |
func BenchmarkSpeed(b *testing.B) { | |
for _, direction := range [...]string{"encode", "decode"} { | |
b.Run(direction, func(b *testing.B) { | |
b.StopTimer() | |
for jj := 0; jj < 3; jj++ { | |
input := make([]byte, 50000) | |
output := make([]byte, len(input)*2) | |
reverse := make([]byte, len(input)) | |
// Generate random data with runs | |
n := 0 | |
var compressed uint | |
var err error | |
for n < len(input) { | |
val := byte(rand.Intn(7)) | |
input[n] = val | |
n++ | |
run := rand.Intn(128) | |
run -= 100 | |
for run > 0 && n < len(input) { | |
input[n] = val | |
n++ | |
run-- | |
} | |
} | |
if direction == "encode" { | |
b.StartTimer() | |
} | |
for ii := 0; ii < b.N; ii++ { | |
zrlt, _ := NewZRLT() | |
if _, compressed, err = zrlt.Forward(input, output); err != nil { | |
b.Fatalf("Encoding error: %v\n", err) | |
} | |
} | |
if direction == "encode" { | |
b.StopTimer() | |
} | |
if direction == "decode" { | |
b.StartTimer() | |
} | |
for ii := 0; ii < b.N; ii++ { | |
zrlt, _ := NewZRLT() | |
if _, _, err = zrlt.Inverse(output[0:compressed], reverse); err != nil { | |
b.Fatalf("Decoding error: %v\n", err) | |
} | |
} | |
if direction == "decode" { | |
b.StopTimer() | |
} | |
idx := -1 | |
// Sanity check | |
for i := range input { | |
if input[i] != reverse[i] { | |
idx = i | |
break | |
} | |
} | |
if idx >= 0 { | |
b.Fatalf("Failure at index %v (%v <-> %v)\n", idx, input[idx], reverse[idx]) | |
} | |
} | |
}) | |
} | |
} | |
// Zero Length Encoding is a simple encoding algorithm by Wheeler | |
// closely related to Run Length Encoding. The main difference is | |
// that only runs of 0 values are processed. Also, the length is | |
// encoded in a different way (each digit in a different byte) | |
// This algorithm is well adapted to process post BWT/MTFT data | |
const ( | |
ZRLT_MAX_RUN = int(1<<31) - 1 | |
) | |
type ZRLT struct { | |
} | |
func NewZRLT() (*ZRLT, error) { | |
this := new(ZRLT) | |
return this, nil | |
} | |
func (this *ZRLT) Forward(src, dst []byte) (uint, uint, error) { | |
if src == nil { | |
return uint(0), uint(0), errors.New("Invalid null source buffer") | |
} | |
if dst == nil { | |
return uint(0), uint(0), errors.New("Invalid null destination buffer") | |
} | |
if SameByteSlices(src, dst, false) { | |
return 0, 0, errors.New("Input and output buffers cannot be equal") | |
} | |
if n := this.MaxEncodedLen(len(src)); len(dst) < n { | |
return 0, 0, fmt.Errorf("Output buffer is too small - size: %d, required %d", len(dst), n) | |
} | |
srcEnd, dstEnd := uint(len(src)), uint(len(dst)) | |
dstEnd2 := dstEnd - 2 | |
runLength := 1 | |
srcIdx, dstIdx := uint(0), uint(0) | |
var err error | |
for srcIdx < srcEnd && dstIdx < dstEnd { | |
val := src[srcIdx] | |
if val == 0 { | |
runLength++ | |
srcIdx++ | |
if srcIdx < srcEnd && runLength < ZRLT_MAX_RUN { | |
continue | |
} | |
} | |
if runLength > 1 { | |
// Encode length | |
log2 := uint(1) | |
for runLength>>log2 > 1 { | |
log2++ | |
} | |
if dstIdx >= dstEnd-log2 { | |
break | |
} | |
// Write every bit as a byte except the most significant one | |
for log2 > 0 { | |
log2-- | |
dst[dstIdx] = byte((runLength >> log2) & 1) | |
dstIdx++ | |
} | |
runLength = 1 | |
continue | |
} | |
if val >= 0xFE { | |
if dstIdx >= dstEnd2 { | |
break | |
} | |
dst[dstIdx] = 0xFF | |
dstIdx++ | |
dst[dstIdx] = val - 0xFE | |
dstIdx++ | |
} else { | |
dst[dstIdx] = val + 1 | |
dstIdx++ | |
} | |
srcIdx++ | |
} | |
if srcIdx != srcEnd || runLength != 1 { | |
err = errors.New("Output buffer is too small") | |
} | |
return srcIdx, dstIdx, err | |
} | |
func (this *ZRLT) Inverse(src, dst []byte) (uint, uint, error) { | |
if src == nil { | |
return uint(0), uint(0), errors.New("Invalid null source buffer") | |
} | |
if dst == nil { | |
return uint(0), uint(0), errors.New("Invalid null destination buffer") | |
} | |
if SameByteSlices(src, dst, false) { | |
return 0, 0, errors.New("Input and output buffers cannot be equal") | |
} | |
srcEnd, dstEnd := len(src), len(dst) | |
runLength := 1 | |
srcIdx, dstIdx := 0, 0 | |
var err error | |
for srcIdx < srcEnd && dstIdx < dstEnd { | |
if runLength > 1 { | |
runLength-- | |
dst[dstIdx] = 0 | |
dstIdx++ | |
continue | |
} | |
val := src[srcIdx] | |
if val <= 1 { | |
// Generate the run length bit by bit (but force MSB) | |
runLength = 1 | |
for val&1 == val { | |
runLength = (runLength << 1) | int(val) | |
srcIdx++ | |
if srcIdx >= srcEnd { | |
break | |
} | |
val = src[srcIdx] | |
} | |
continue | |
} | |
// Regular data processing | |
if val == 0xFF { | |
srcIdx++ | |
if srcIdx >= srcEnd { | |
break | |
} | |
dst[dstIdx] = 0xFE + src[srcIdx] | |
} else { | |
dst[dstIdx] = val - 1 | |
} | |
dstIdx++ | |
srcIdx++ | |
} | |
// If runLength is not 1, add trailing 0s | |
end := dstIdx + runLength - 1 | |
if end > dstEnd { | |
err = errors.New("Output buffer is too small") | |
} else { | |
for dstIdx < end { | |
dst[dstIdx] = 0 | |
dstIdx++ | |
} | |
if srcIdx < srcEnd { | |
err = errors.New("Output buffer is too small") | |
} | |
} | |
return uint(srcIdx), uint(dstIdx), err | |
} | |
// Required encoding output buffer size unknown | |
func (this ZRLT) MaxEncodedLen(srcLen int) int { | |
return srcLen | |
} | |
func SameByteSlices(slice1, slice2 []byte, deepCheck bool) bool { | |
if slice2 == nil { | |
return slice1 == nil | |
} | |
if slice1 == nil { | |
return false | |
} | |
if &slice1 == &slice2 { | |
return true | |
} | |
if len(slice1) != len(slice2) { | |
return false | |
} | |
if deepCheck == true { | |
return bytes.Equal(slice1, slice2) | |
} else { | |
return false | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment