josharian/issue16122_test.go

## issue16122_test.go
/*
Copyright 2011-2013 Frederic Langlet
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may obtain a copy of the License at

                http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package p

import (
	"bytes"
	"errors"
	"fmt"
	"math/rand"
	"testing"
	"time"
)

// func main() {
// 	fmt.Printf("TestZRLT\n")
// 	TestCorrectness()
// 	TestSpeed()
// }

func TestCorrectness(t *testing.T) {
	// fmt.Printf("Correctness test\n")

	for ii := 0; ii < 20; ii++ {
		rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
		// fmt.Printf("\nTest %v\n", ii)
		var arr []int

		if ii == 0 {
			arr2 := [64]int{30, 0, 4, 0, 30, 17, 240, 0, 0, 0, 12, 2, 23, 17, 254, 0, 254, 24, 0, 24, 20, 0, 17, 0, 254, 0, 254, 32, 0, 0, 13, 32, 255, 31, 15, 25, 0, 29, 8, 24, 20, 0, 0, 0, 245, 253, 0, 32, 19, 0, 243, 0, 0, 0, 13, 244, 24, 4, 0, 7, 25, 2, 0, 15}
			arr = arr2[0:len(arr2)]
		} else {
			arr = make([]int, 64)

			for i := range arr {
				val := rnd.Intn(100) - 16

				if val >= 33 {
					val = 0
				}

				arr[i] = val
			}
		}

		size := len(arr)
		input := make([]byte, size)
		output := make([]byte, size*3/2)
		reverse := make([]byte, size)

		for i := range output {
			output[i] = 0x55
		}

		for i := range arr {
			if i == len(arr)/2 {
				input[i] = 255
			} else {
				input[i] = byte(arr[i])
			}
		}

		ZRLT, err := NewZRLT()

		if err != nil {
			t.Errorf("\nError: %v\n", err)
			continue
		}

		// fmt.Printf("\nOriginal: ")

		// for i := range input {
		// 	if i%100 == 0 {
		// 		fmt.Printf("\n")
		// 	}

		// 	fmt.Printf("%v ", input[i])
		// }

		// fmt.Printf("\nCoded: ")
		_, dstIdx, err2 := ZRLT.Forward(input, output)

		if err2 != nil {
			t.Errorf("\nEncoding error: %v\n", err2)
			continue
		}

		// for i := uint(0); i < dstIdx; i++ {
		// 	if i%100 == 0 {
		// 		fmt.Printf("\n")
		// 	}

		// 	fmt.Printf("%v ", output[i])
		// }

		// fmt.Printf(" (Compression ratio: %v%%)", dstIdx*100/srcIdx)
		ZRLT, err2 = NewZRLT()

		if err2 != nil {
			t.Errorf("\nError: %v\n", err2)
			continue
		}

		_, _, err2 = ZRLT.Inverse(output[0:dstIdx], reverse)

		if err2 != nil {
			t.Errorf("\nDecoding error: %v\n", err2)
			continue
		}

		// fmt.Printf("\nDecoded: ")
		ok := true

		for i := range input {
			// if i%100 == 0 {
			// 	fmt.Printf("\n")
			// }

			// fmt.Printf("%v ", reverse[i])

			if reverse[i] != input[i] {
				ok = false
			}
		}

		if !ok {
			t.Fatalf("\nDifferent\n")
		}
	}
}

func BenchmarkSpeed(b *testing.B) {
	for _, direction := range [...]string{"encode", "decode"} {
		b.Run(direction, func(b *testing.B) {
			b.StopTimer()
			for jj := 0; jj < 3; jj++ {
				input := make([]byte, 50000)
				output := make([]byte, len(input)*2)
				reverse := make([]byte, len(input))

				// Generate random data with runs
				n := 0
				var compressed uint
				var err error

				for n < len(input) {
					val := byte(rand.Intn(7))
					input[n] = val
					n++
					run := rand.Intn(128)
					run -= 100

					for run > 0 && n < len(input) {
						input[n] = val
						n++
						run--
					}
				}

				if direction == "encode" {
					b.StartTimer()
				}
				for ii := 0; ii < b.N; ii++ {
					zrlt, _ := NewZRLT()
					if _, compressed, err = zrlt.Forward(input, output); err != nil {
						b.Fatalf("Encoding error: %v\n", err)
					}
				}
				if direction == "encode" {
					b.StopTimer()
				}

				if direction == "decode" {
					b.StartTimer()
				}
				for ii := 0; ii < b.N; ii++ {
					zrlt, _ := NewZRLT()
					if _, _, err = zrlt.Inverse(output[0:compressed], reverse); err != nil {
						b.Fatalf("Decoding error: %v\n", err)
					}
				}
				if direction == "decode" {
					b.StopTimer()
				}

				idx := -1

				// Sanity check
				for i := range input {
					if input[i] != reverse[i] {
						idx = i
						break
					}
				}

				if idx >= 0 {
					b.Fatalf("Failure at index %v (%v <-> %v)\n", idx, input[idx], reverse[idx])
				}
			}
		})
	}
}

// Zero Length Encoding is a simple encoding algorithm by Wheeler
// closely related to Run Length Encoding. The main difference is
// that only runs of 0 values are processed. Also, the length is
// encoded in a different way (each digit in a different byte)
// This algorithm is well adapted to process post BWT/MTFT data

const (
	ZRLT_MAX_RUN = int(1<<31) - 1
)

type ZRLT struct {
}

func NewZRLT() (*ZRLT, error) {
	this := new(ZRLT)
	return this, nil
}

func (this *ZRLT) Forward(src, dst []byte) (uint, uint, error) {
	if src == nil {
		return uint(0), uint(0), errors.New("Invalid null source buffer")
	}

	if dst == nil {
		return uint(0), uint(0), errors.New("Invalid null destination buffer")
	}

	if SameByteSlices(src, dst, false) {
		return 0, 0, errors.New("Input and output buffers cannot be equal")
	}

	if n := this.MaxEncodedLen(len(src)); len(dst) < n {
		return 0, 0, fmt.Errorf("Output buffer is too small - size: %d, required %d", len(dst), n)
	}

	srcEnd, dstEnd := uint(len(src)), uint(len(dst))
	dstEnd2 := dstEnd - 2
	runLength := 1
	srcIdx, dstIdx := uint(0), uint(0)
	var err error

	for srcIdx < srcEnd && dstIdx < dstEnd {
		val := src[srcIdx]

		if val == 0 {
			runLength++
			srcIdx++

			if srcIdx < srcEnd && runLength < ZRLT_MAX_RUN {
				continue
			}
		}

		if runLength > 1 {
			// Encode length
			log2 := uint(1)

			for runLength>>log2 > 1 {
				log2++
			}

			if dstIdx >= dstEnd-log2 {
				break
			}

			// Write every bit as a byte except the most significant one
			for log2 > 0 {
				log2--
				dst[dstIdx] = byte((runLength >> log2) & 1)
				dstIdx++
			}

			runLength = 1
			continue
		}

		if val >= 0xFE {
			if dstIdx >= dstEnd2 {
				break
			}

			dst[dstIdx] = 0xFF
			dstIdx++
			dst[dstIdx] = val - 0xFE
			dstIdx++
		} else {
			dst[dstIdx] = val + 1
			dstIdx++
		}

		srcIdx++
	}

	if srcIdx != srcEnd || runLength != 1 {
		err = errors.New("Output buffer is too small")
	}

	return srcIdx, dstIdx, err
}

func (this *ZRLT) Inverse(src, dst []byte) (uint, uint, error) {
	if src == nil {
		return uint(0), uint(0), errors.New("Invalid null source buffer")
	}

	if dst == nil {
		return uint(0), uint(0), errors.New("Invalid null destination buffer")
	}

	if SameByteSlices(src, dst, false) {
		return 0, 0, errors.New("Input and output buffers cannot be equal")
	}

	srcEnd, dstEnd := len(src), len(dst)
	runLength := 1
	srcIdx, dstIdx := 0, 0
	var err error

	for srcIdx < srcEnd && dstIdx < dstEnd {
		if runLength > 1 {
			runLength--
			dst[dstIdx] = 0
			dstIdx++
			continue
		}

		val := src[srcIdx]

		if val <= 1 {
			// Generate the run length bit by bit (but force MSB)
			runLength = 1

			for val&1 == val {
				runLength = (runLength << 1) | int(val)
				srcIdx++

				if srcIdx >= srcEnd {
					break
				}

				val = src[srcIdx]
			}

			continue
		}

		// Regular data processing
		if val == 0xFF {
			srcIdx++

			if srcIdx >= srcEnd {
				break
			}

			dst[dstIdx] = 0xFE + src[srcIdx]
		} else {
			dst[dstIdx] = val - 1
		}

		dstIdx++
		srcIdx++
	}

	// If runLength is not 1, add trailing 0s
	end := dstIdx + runLength - 1

	if end > dstEnd {
		err = errors.New("Output buffer is too small")
	} else {
		for dstIdx < end {
			dst[dstIdx] = 0
			dstIdx++
		}

		if srcIdx < srcEnd {
			err = errors.New("Output buffer is too small")
		}
	}

	return uint(srcIdx), uint(dstIdx), err
}

// Required encoding output buffer size unknown
func (this ZRLT) MaxEncodedLen(srcLen int) int {
	return srcLen
}

func SameByteSlices(slice1, slice2 []byte, deepCheck bool) bool {
	if slice2 == nil {
		return slice1 == nil
	}

	if slice1 == nil {
		return false
	}

	if &slice1 == &slice2 {
		return true
	}

	if len(slice1) != len(slice2) {
		return false
	}

	if deepCheck == true {
		return bytes.Equal(slice1, slice2)
	} else {
		return false
	}
}
	/*
	Copyright 2011-2013 Frederic Langlet
	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	you may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	*/

	package p

	import (
	"bytes"
	"errors"
	"fmt"
	"math/rand"
	"testing"
	"time"
	)

	// func main() {
	// fmt.Printf("TestZRLT\n")
	// TestCorrectness()
	// TestSpeed()
	// }

	func TestCorrectness(t *testing.T) {
	// fmt.Printf("Correctness test\n")

	for ii := 0; ii < 20; ii++ {
	rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
	// fmt.Printf("\nTest %v\n", ii)
	var arr []int

	if ii == 0 {
	arr2 := [64]int{30, 0, 4, 0, 30, 17, 240, 0, 0, 0, 12, 2, 23, 17, 254, 0, 254, 24, 0, 24, 20, 0, 17, 0, 254, 0, 254, 32, 0, 0, 13, 32, 255, 31, 15, 25, 0, 29, 8, 24, 20, 0, 0, 0, 245, 253, 0, 32, 19, 0, 243, 0, 0, 0, 13, 244, 24, 4, 0, 7, 25, 2, 0, 15}
	arr = arr2[0:len(arr2)]
	} else {
	arr = make([]int, 64)

	for i := range arr {
	val := rnd.Intn(100) - 16

	if val >= 33 {
	val = 0
	}

	arr[i] = val
	}
	}

	size := len(arr)
	input := make([]byte, size)
	output := make([]byte, size*3/2)
	reverse := make([]byte, size)

	for i := range output {
	output[i] = 0x55
	}

	for i := range arr {
	if i == len(arr)/2 {
	input[i] = 255
	} else {
	input[i] = byte(arr[i])
	}
	}

	ZRLT, err := NewZRLT()

	if err != nil {
	t.Errorf("\nError: %v\n", err)
	continue
	}

	// fmt.Printf("\nOriginal: ")

	// for i := range input {
	// if i%100 == 0 {
	// fmt.Printf("\n")
	// }

	// fmt.Printf("%v ", input[i])
	// }

	// fmt.Printf("\nCoded: ")
	_, dstIdx, err2 := ZRLT.Forward(input, output)

	if err2 != nil {
	t.Errorf("\nEncoding error: %v\n", err2)
	continue
	}

	// for i := uint(0); i < dstIdx; i++ {
	// if i%100 == 0 {
	// fmt.Printf("\n")
	// }

	// fmt.Printf("%v ", output[i])
	// }

	// fmt.Printf(" (Compression ratio: %v%%)", dstIdx*100/srcIdx)
	ZRLT, err2 = NewZRLT()

	if err2 != nil {
	t.Errorf("\nError: %v\n", err2)
	continue
	}

	_, _, err2 = ZRLT.Inverse(output[0:dstIdx], reverse)

	if err2 != nil {
	t.Errorf("\nDecoding error: %v\n", err2)
	continue
	}

	// fmt.Printf("\nDecoded: ")
	ok := true

	for i := range input {
	// if i%100 == 0 {
	// fmt.Printf("\n")
	// }

	// fmt.Printf("%v ", reverse[i])

	if reverse[i] != input[i] {
	ok = false
	}
	}

	if !ok {
	t.Fatalf("\nDifferent\n")
	}
	}
	}

	func BenchmarkSpeed(b *testing.B) {
	for _, direction := range [...]string{"encode", "decode"} {
	b.Run(direction, func(b *testing.B) {
	b.StopTimer()
	for jj := 0; jj < 3; jj++ {
	input := make([]byte, 50000)
	output := make([]byte, len(input)*2)
	reverse := make([]byte, len(input))

	// Generate random data with runs
	n := 0
	var compressed uint
	var err error

	for n < len(input) {
	val := byte(rand.Intn(7))
	input[n] = val
	n++
	run := rand.Intn(128)
	run -= 100

	for run > 0 && n < len(input) {
	input[n] = val
	n++
	run--
	}
	}

	if direction == "encode" {
	b.StartTimer()
	}
	for ii := 0; ii < b.N; ii++ {
	zrlt, _ := NewZRLT()
	if _, compressed, err = zrlt.Forward(input, output); err != nil {
	b.Fatalf("Encoding error: %v\n", err)
	}
	}
	if direction == "encode" {
	b.StopTimer()
	}

	if direction == "decode" {
	b.StartTimer()
	}
	for ii := 0; ii < b.N; ii++ {
	zrlt, _ := NewZRLT()
	if _, _, err = zrlt.Inverse(output[0:compressed], reverse); err != nil {
	b.Fatalf("Decoding error: %v\n", err)
	}
	}
	if direction == "decode" {
	b.StopTimer()
	}

	idx := -1

	// Sanity check
	for i := range input {
	if input[i] != reverse[i] {
	idx = i
	break
	}
	}

	if idx >= 0 {
	b.Fatalf("Failure at index %v (%v <-> %v)\n", idx, input[idx], reverse[idx])
	}
	}
	})
	}
	}

	// Zero Length Encoding is a simple encoding algorithm by Wheeler
	// closely related to Run Length Encoding. The main difference is
	// that only runs of 0 values are processed. Also, the length is
	// encoded in a different way (each digit in a different byte)
	// This algorithm is well adapted to process post BWT/MTFT data

	const (
	ZRLT_MAX_RUN = int(1<<31) - 1
	)

	type ZRLT struct {
	}

	func NewZRLT() (*ZRLT, error) {
	this := new(ZRLT)
	return this, nil
	}

	func (this *ZRLT) Forward(src, dst []byte) (uint, uint, error) {
	if src == nil {
	return uint(0), uint(0), errors.New("Invalid null source buffer")
	}

	if dst == nil {
	return uint(0), uint(0), errors.New("Invalid null destination buffer")
	}

	if SameByteSlices(src, dst, false) {
	return 0, 0, errors.New("Input and output buffers cannot be equal")
	}

	if n := this.MaxEncodedLen(len(src)); len(dst) < n {
	return 0, 0, fmt.Errorf("Output buffer is too small - size: %d, required %d", len(dst), n)
	}

	srcEnd, dstEnd := uint(len(src)), uint(len(dst))
	dstEnd2 := dstEnd - 2
	runLength := 1
	srcIdx, dstIdx := uint(0), uint(0)
	var err error

	for srcIdx < srcEnd && dstIdx < dstEnd {
	val := src[srcIdx]

	if val == 0 {
	runLength++
	srcIdx++

	if srcIdx < srcEnd && runLength < ZRLT_MAX_RUN {
	continue
	}
	}

	if runLength > 1 {
	// Encode length
	log2 := uint(1)

	for runLength>>log2 > 1 {
	log2++
	}

	if dstIdx >= dstEnd-log2 {
	break
	}

	// Write every bit as a byte except the most significant one
	for log2 > 0 {
	log2--
	dst[dstIdx] = byte((runLength >> log2) & 1)
	dstIdx++
	}

	runLength = 1
	continue
	}

	if val >= 0xFE {
	if dstIdx >= dstEnd2 {
	break
	}

	dst[dstIdx] = 0xFF
	dstIdx++
	dst[dstIdx] = val - 0xFE
	dstIdx++
	} else {
	dst[dstIdx] = val + 1
	dstIdx++
	}

	srcIdx++
	}

	if srcIdx != srcEnd \|\| runLength != 1 {
	err = errors.New("Output buffer is too small")
	}

	return srcIdx, dstIdx, err
	}

	func (this *ZRLT) Inverse(src, dst []byte) (uint, uint, error) {
	if src == nil {
	return uint(0), uint(0), errors.New("Invalid null source buffer")
	}

	if dst == nil {
	return uint(0), uint(0), errors.New("Invalid null destination buffer")
	}

	if SameByteSlices(src, dst, false) {
	return 0, 0, errors.New("Input and output buffers cannot be equal")
	}

	srcEnd, dstEnd := len(src), len(dst)
	runLength := 1
	srcIdx, dstIdx := 0, 0
	var err error

	for srcIdx < srcEnd && dstIdx < dstEnd {
	if runLength > 1 {
	runLength--
	dst[dstIdx] = 0
	dstIdx++
	continue
	}

	val := src[srcIdx]

	if val <= 1 {
	// Generate the run length bit by bit (but force MSB)
	runLength = 1

	for val&1 == val {
	runLength = (runLength << 1) \| int(val)
	srcIdx++

	if srcIdx >= srcEnd {
	break
	}

	val = src[srcIdx]
	}

	continue
	}

	// Regular data processing
	if val == 0xFF {
	srcIdx++

	if srcIdx >= srcEnd {
	break
	}

	dst[dstIdx] = 0xFE + src[srcIdx]
	} else {
	dst[dstIdx] = val - 1
	}

	dstIdx++
	srcIdx++
	}

	// If runLength is not 1, add trailing 0s
	end := dstIdx + runLength - 1

	if end > dstEnd {
	err = errors.New("Output buffer is too small")
	} else {
	for dstIdx < end {
	dst[dstIdx] = 0
	dstIdx++
	}

	if srcIdx < srcEnd {
	err = errors.New("Output buffer is too small")
	}
	}

	return uint(srcIdx), uint(dstIdx), err
	}

	// Required encoding output buffer size unknown
	func (this ZRLT) MaxEncodedLen(srcLen int) int {
	return srcLen
	}

	func SameByteSlices(slice1, slice2 []byte, deepCheck bool) bool {
	if slice2 == nil {
	return slice1 == nil
	}

	if slice1 == nil {
	return false
	}

	if &slice1 == &slice2 {
	return true
	}

	if len(slice1) != len(slice2) {
	return false
	}

	if deepCheck == true {
	return bytes.Equal(slice1, slice2)
	} else {
	return false
	}
	}