tylerstillwater/elvis.go

## elvis.go
// All material is licensed under the Apache License Version 2.0, January 2004
// http://www.apache.org/licenses/LICENSE-2.0

// Sample program that takes a stream of bytes and looks for the bytes
// “elvis” and when they are found, replace them with “Elvis”. The code
// cannot assume that there are any line feeds or other delimiters in the
// stream and the code must assume that the stream is of any arbitrary length.
// The solution cannot meaningfully buffer to the end of the stream and
// then process the replacement.
package main

import (
	"bufio"
	"bytes"
	"fmt"
	"io"
)

// data represents a table of input and expected output.
var data = []struct {
	input  []byte
	output []byte
}{
	{[]byte("abc"), []byte("abc")},
	{[]byte("elvis"), []byte("Elvis")},
	{[]byte("aElvis"), []byte("aElvis")},
	{[]byte("abcelvis"), []byte("abcElvis")},
	{[]byte("eelvis"), []byte("eElvis")},
	{[]byte("aelvis"), []byte("aElvis")},
	{[]byte("aabeeeelvis"), []byte("aabeeeElvis")},
	{[]byte("e l v i s"), []byte("e l v i s")},
	{[]byte("aa bb e l v i saa"), []byte("aa bb e l v i saa")},
	{[]byte(" elvi s"), []byte(" elvi s")},
	{[]byte("elvielvis"), []byte("elviElvis")},
	{[]byte("elvielvielviselvi1"), []byte("elvielviElviselvi1")},
	{[]byte("elvielviselvis"), []byte("elviElvisElvis")},
}

// Declare what needs to be found and its replacement.
var find = []byte("elvis")
var repl = []byte("Elvis")

// Calculate the number of bytes we need to locate.
var size = len(find)

func main() {
	var output bytes.Buffer

	fmt.Println("=======================================\nRunning Algorithm One")
	for _, d := range data {
		output.Reset()
		algorithmOne(d.input, &output)
		matched := bytes.Compare(d.output, output.Bytes())
		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Two")
	for _, d := range data {
		output.Reset()
		algorithmTwo(d.input, &output)
		matched := bytes.Compare(d.output, output.Bytes())
		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Three")
	for _, d := range data {
		output.Reset()
		algorithmThree(bytes.NewReader(d.input), &output)
		matched := bytes.Compare(d.output, output.Bytes())
		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Four")
	for _, d := range data {
		output.Reset()
		algorithmFour(bytes.NewReader(d.input), &output)
		matched := bytes.Compare(d.output, output.Bytes())
		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}
}

// algorithmOne is one way to solve the problem.
func algorithmOne(data []byte, output *bytes.Buffer) {

	// Use a bytes Reader to provide a stream to process.
	input := bytes.NewReader(data)

	// Declare the buffer we need to process the stream.
	buf := make([]byte, size)
	end := size - 1

	// Read in an initial number of bytes we need to get started.
	if n, err := io.ReadFull(input, buf[:end]); err != nil {
		output.Write(buf[:n])
		return
	}

	for {

		// Read in one byte from the input stream.
		b, err := input.ReadByte()
		if err != nil {

			// Flush the reset of the bytes we have.
			output.Write(buf[:end])
			break
		}

		// Add this byte to the end of the buffer.
		buf[end] = b

		// If we have a match, replace the bytes.
		if bytes.Compare(buf, find) == 0 {
			copy(buf, repl)
		}

		// Write the front byte since it has been compared.
		output.WriteByte(buf[0])

		// Slice that front byte out.
		copy(buf, buf[1:])
	}
}

// algorithmTwo is a second way to solve the problem.
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
func algorithmTwo(data []byte, output *bytes.Buffer) {

	// Use the bytes Reader to provide a stream to process.
	input := bytes.NewReader(data)

	// Create an index variable to match bytes.
	idx := 0

	for {

		// Read a single byte from our input.
		b, err := input.ReadByte()
		if err != nil {
			break
		}

		// Does this byte match the byte at this offset?
		if b == find[idx] {

			// It matches so increment the index position.
			idx++

			// If every byte has been matched, write
			// out the replacement.
			if idx == size {
				output.Write(repl)
				idx = 0
			}

			continue
		}

		// Did we have any sort of match on any given byte?
		if idx != 0 {

			// Write what we've matched up to this point.
			output.Write(find[:idx])

			// Unread the unmatched byte so it can be processed again.
			input.UnreadByte()

			// Reset the offset to start matching from the beginning.
			idx = 0

			continue
		}

		// There was no previous match. Write byte and reset.
		output.WriteByte(b)
		idx = 0
	}
}

// algorithmThree is a second way to solve the problem. This approach takes an
// io.Reader to represent an infinite stream. This allows for the algorithm to
// accept input from just about anywhere, thanks to the beauty of Go
// interfaces.
//
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
func algorithmThree(data io.Reader, output *bytes.Buffer) {

	// Use bufio.NewReaderSize to provide a stream from which we can read and
	// unread single bytes.
	input := bufio.NewReaderSize(data, 1)

	// Create an index variable to match bytes.
	idx := 0

	for {

		// Read a single byte from our input.
		b, err := input.ReadByte()
		if err != nil {
			break
		}

		// Does this byte match the byte at this offset?
		if b == find[idx] {

			// It matches so increment the index position.
			idx++

			// If every byte has been matched, write
			// out the replacement.
			if idx == size {
				output.Write(repl)
				idx = 0
			}

			continue
		}

		// Did we have any sort of match on any given byte?
		if idx != 0 {

			// Write what we've matched up to this point.
			output.Write(find[:idx])

			// Unread the unmatched byte so it can be processed again.
			input.UnreadByte()

			// Reset the offset to start matching from the beginning.
			idx = 0

			continue
		}

		// There was no previous match. Write byte and reset.
		output.WriteByte(b)
		idx = 0
	}
}

// algorithmFour is a fourth way to solve the problem. This approach takes an
// io.Reader to represent an infinite stream. This allows for the algorithm to
// accept input from just about anywhere, thanks to the beauty of Go
// interfaces.
//
// Additionally, it has been optimized for minimal allocations by reading
// directly from the stream onto the stack. This results in 1 allocation of 1
// byte at the time of writing.
//
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
func algorithmFour(data io.Reader, output *bytes.Buffer) {

	// Create a byte slice of length 1 into which our byte will be read.
	b := make([]byte, 1)

	// Create an index variable to match bytes.
	idx := 0

	for {

		// Are we re-using the byte from a previous call?
		if b[0] == 0 {
			// Read a single byte from our input.
			n, err := data.Read(b)
			if n == 0 || err != nil {
				break
			}
		}

		// Does this byte match the byte at this offset?
		if b[0] == find[idx] {

			// It matches so increment the index position.
			idx++

			// If every byte has been matched, write
			// out the replacement.
			if idx == size {
				output.Write(repl)
				idx = 0
			}

			// Reset the reader byte to 0 so another byte will be read.
			b[0] = 0
			continue
		}

		// Did we have any sort of match on any given byte?
		if idx != 0 {

			// Write what we've matched up to this point.
			output.Write(find[:idx])

			// NOTE: we are NOT resetting the reader byte to 0 here because we need
			// to re-use it on the next call. This is equivalent to the UnreadByte()
			// call in the other functions.

			// Reset the offset to start matching from the beginning.
			idx = 0

			continue
		}

		// There was no previous match. Write byte and reset.
		output.WriteByte(b[0])
		// Reset the reader byte to 0 so another byte will be read.
		b[0] = 0
	}
}

## elvis_test.go
// All material is licensed under the Apache License Version 2.0, January 2004
// http://www.apache.org/licenses/LICENSE-2.0

// go test -run none -bench . -benchtime 3s -benchmem

// Tests to see how each algorithm compare.
package main

import (
	"bytes"
	"testing"
)

// assembleInputStream appends all the input slices together to allow for
// consistent testing across all benchmarks
func assembleInputStream() []byte {
	var out []byte
	for _, d := range data {
		out = append(out, d.input...)
	}
	return out
}

// Capture the time it takes to execute algorithm one.
func BenchmarkAlgorithmOne(b *testing.B) {
	var output bytes.Buffer
	in := assembleInputStream()

	for i := 0; i < b.N; i++ {
		output.Reset()
		algorithmOne(in, &output)
	}
}

// Capture the time it takes to execute algorithm two.
func BenchmarkAlgorithmTwo(b *testing.B) {
	var output bytes.Buffer
	in := assembleInputStream()

	for i := 0; i < b.N; i++ {
		output.Reset()
		algorithmTwo(in, &output)
	}
}

// Capture the time it takes to execute algorithm three.
func BenchmarkAlgorithmThree(b *testing.B) {
	var output bytes.Buffer
	in := bytes.NewReader(assembleInputStream())

	for i := 0; i < b.N; i++ {
		output.Reset()
		// Seek our reader to the beginning of the byte array
		in.Seek(0, 0)
		algorithmThree(in, &output)
	}
}

// Capture the time it takes to execute algorithm four.
func BenchmarkAlgorithmFour(b *testing.B) {
	var output bytes.Buffer
	in := bytes.NewReader(assembleInputStream())

	for i := 0; i < b.N; i++ {
		output.Reset()
		// Seek our reader to the beginning of the byte array
		in.Seek(0, 0)
		algorithmFour(in, &output)
	}
}

## elvis_test_results.txt
BenchmarkAlgorithmOne-8        	 2000000       	      2803 ns/op       	      53 B/op  	       2 allocs/op
BenchmarkAlgorithmTwo-8        	 5000000       	      1042 ns/op       	       0 B/op  	       0 allocs/op
BenchmarkAlgorithmThree-8      	 3000000       	      1583 ns/op       	      16 B/op  	       1 allocs/op
BenchmarkAlgorithmFour-8       	 2000000       	      2030 ns/op       	       1 B/op  	       1 allocs/op
	// All material is licensed under the Apache License Version 2.0, January 2004
	// http://www.apache.org/licenses/LICENSE-2.0

	// Sample program that takes a stream of bytes and looks for the bytes
	// “elvis” and when they are found, replace them with “Elvis”. The code
	// cannot assume that there are any line feeds or other delimiters in the
	// stream and the code must assume that the stream is of any arbitrary length.
	// The solution cannot meaningfully buffer to the end of the stream and
	// then process the replacement.
	package main

	import (
	"bufio"
	"bytes"
	"fmt"
	"io"
	)

	// data represents a table of input and expected output.
	var data = []struct {
	input []byte
	output []byte
	}{
	{[]byte("abc"), []byte("abc")},
	{[]byte("elvis"), []byte("Elvis")},
	{[]byte("aElvis"), []byte("aElvis")},
	{[]byte("abcelvis"), []byte("abcElvis")},
	{[]byte("eelvis"), []byte("eElvis")},
	{[]byte("aelvis"), []byte("aElvis")},
	{[]byte("aabeeeelvis"), []byte("aabeeeElvis")},
	{[]byte("e l v i s"), []byte("e l v i s")},
	{[]byte("aa bb e l v i saa"), []byte("aa bb e l v i saa")},
	{[]byte(" elvi s"), []byte(" elvi s")},
	{[]byte("elvielvis"), []byte("elviElvis")},
	{[]byte("elvielvielviselvi1"), []byte("elvielviElviselvi1")},
	{[]byte("elvielviselvis"), []byte("elviElvisElvis")},
	}

	// Declare what needs to be found and its replacement.
	var find = []byte("elvis")
	var repl = []byte("Elvis")

	// Calculate the number of bytes we need to locate.
	var size = len(find)

	func main() {
	var output bytes.Buffer

	fmt.Println("=======================================\nRunning Algorithm One")
	for _, d := range data {
	output.Reset()
	algorithmOne(d.input, &output)
	matched := bytes.Compare(d.output, output.Bytes())
	fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Two")
	for _, d := range data {
	output.Reset()
	algorithmTwo(d.input, &output)
	matched := bytes.Compare(d.output, output.Bytes())
	fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Three")
	for _, d := range data {
	output.Reset()
	algorithmThree(bytes.NewReader(d.input), &output)
	matched := bytes.Compare(d.output, output.Bytes())
	fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Four")
	for _, d := range data {
	output.Reset()
	algorithmFour(bytes.NewReader(d.input), &output)
	matched := bytes.Compare(d.output, output.Bytes())
	fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}
	}

	// algorithmOne is one way to solve the problem.
	func algorithmOne(data []byte, output *bytes.Buffer) {

	// Use a bytes Reader to provide a stream to process.
	input := bytes.NewReader(data)

	// Declare the buffer we need to process the stream.
	buf := make([]byte, size)
	end := size - 1

	// Read in an initial number of bytes we need to get started.
	if n, err := io.ReadFull(input, buf[:end]); err != nil {
	output.Write(buf[:n])
	return
	}

	for {

	// Read in one byte from the input stream.
	b, err := input.ReadByte()
	if err != nil {

	// Flush the reset of the bytes we have.
	output.Write(buf[:end])
	break
	}

	// Add this byte to the end of the buffer.
	buf[end] = b

	// If we have a match, replace the bytes.
	if bytes.Compare(buf, find) == 0 {
	copy(buf, repl)
	}

	// Write the front byte since it has been compared.
	output.WriteByte(buf[0])

	// Slice that front byte out.
	copy(buf, buf[1:])
	}
	}

	// algorithmTwo is a second way to solve the problem.
	// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
	func algorithmTwo(data []byte, output *bytes.Buffer) {

	// Use the bytes Reader to provide a stream to process.
	input := bytes.NewReader(data)

	// Create an index variable to match bytes.
	idx := 0

	for {

	// Read a single byte from our input.
	b, err := input.ReadByte()
	if err != nil {
	break
	}

	// Does this byte match the byte at this offset?
	if b == find[idx] {

	// It matches so increment the index position.
	idx++

	// If every byte has been matched, write
	// out the replacement.
	if idx == size {
	output.Write(repl)
	idx = 0
	}

	continue
	}

	// Did we have any sort of match on any given byte?
	if idx != 0 {

	// Write what we've matched up to this point.
	output.Write(find[:idx])

	// Unread the unmatched byte so it can be processed again.
	input.UnreadByte()

	// Reset the offset to start matching from the beginning.
	idx = 0

	continue
	}

	// There was no previous match. Write byte and reset.
	output.WriteByte(b)
	idx = 0
	}
	}

	// algorithmThree is a second way to solve the problem. This approach takes an
	// io.Reader to represent an infinite stream. This allows for the algorithm to
	// accept input from just about anywhere, thanks to the beauty of Go
	// interfaces.
	//
	// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
	func algorithmThree(data io.Reader, output *bytes.Buffer) {

	// Use bufio.NewReaderSize to provide a stream from which we can read and
	// unread single bytes.
	input := bufio.NewReaderSize(data, 1)

	// Create an index variable to match bytes.
	idx := 0

	for {

	// Read a single byte from our input.
	b, err := input.ReadByte()
	if err != nil {
	break
	}

	// Does this byte match the byte at this offset?
	if b == find[idx] {

	// It matches so increment the index position.
	idx++

	// If every byte has been matched, write
	// out the replacement.
	if idx == size {
	output.Write(repl)
	idx = 0
	}

	continue
	}

	// Did we have any sort of match on any given byte?
	if idx != 0 {

	// Write what we've matched up to this point.
	output.Write(find[:idx])

	// Unread the unmatched byte so it can be processed again.
	input.UnreadByte()

	// Reset the offset to start matching from the beginning.
	idx = 0

	continue
	}

	// There was no previous match. Write byte and reset.
	output.WriteByte(b)
	idx = 0
	}
	}

	// algorithmFour is a fourth way to solve the problem. This approach takes an
	// io.Reader to represent an infinite stream. This allows for the algorithm to
	// accept input from just about anywhere, thanks to the beauty of Go
	// interfaces.
	//
	// Additionally, it has been optimized for minimal allocations by reading
	// directly from the stream onto the stack. This results in 1 allocation of 1
	// byte at the time of writing.
	//
	// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
	func algorithmFour(data io.Reader, output *bytes.Buffer) {

	// Create a byte slice of length 1 into which our byte will be read.
	b := make([]byte, 1)

	// Create an index variable to match bytes.
	idx := 0

	for {

	// Are we re-using the byte from a previous call?
	if b[0] == 0 {
	// Read a single byte from our input.
	n, err := data.Read(b)
	if n == 0 \|\| err != nil {
	break
	}
	}

	// Does this byte match the byte at this offset?
	if b[0] == find[idx] {

	// It matches so increment the index position.
	idx++

	// If every byte has been matched, write
	// out the replacement.
	if idx == size {
	output.Write(repl)
	idx = 0
	}

	// Reset the reader byte to 0 so another byte will be read.
	b[0] = 0
	continue
	}

	// Did we have any sort of match on any given byte?
	if idx != 0 {

	// Write what we've matched up to this point.
	output.Write(find[:idx])

	// NOTE: we are NOT resetting the reader byte to 0 here because we need
	// to re-use it on the next call. This is equivalent to the UnreadByte()
	// call in the other functions.

	// Reset the offset to start matching from the beginning.
	idx = 0

	continue
	}

	// There was no previous match. Write byte and reset.
	output.WriteByte(b[0])
	// Reset the reader byte to 0 so another byte will be read.
	b[0] = 0
	}
	}
	// All material is licensed under the Apache License Version 2.0, January 2004
	// http://www.apache.org/licenses/LICENSE-2.0

	// go test -run none -bench . -benchtime 3s -benchmem

	// Tests to see how each algorithm compare.
	package main

	import (
	"bytes"
	"testing"
	)

	// assembleInputStream appends all the input slices together to allow for
	// consistent testing across all benchmarks
	func assembleInputStream() []byte {
	var out []byte
	for _, d := range data {
	out = append(out, d.input...)
	}
	return out
	}

	// Capture the time it takes to execute algorithm one.
	func BenchmarkAlgorithmOne(b *testing.B) {
	var output bytes.Buffer
	in := assembleInputStream()

	for i := 0; i < b.N; i++ {
	output.Reset()
	algorithmOne(in, &output)
	}
	}

	// Capture the time it takes to execute algorithm two.
	func BenchmarkAlgorithmTwo(b *testing.B) {
	var output bytes.Buffer
	in := assembleInputStream()

	for i := 0; i < b.N; i++ {
	output.Reset()
	algorithmTwo(in, &output)
	}
	}

	// Capture the time it takes to execute algorithm three.
	func BenchmarkAlgorithmThree(b *testing.B) {
	var output bytes.Buffer
	in := bytes.NewReader(assembleInputStream())

	for i := 0; i < b.N; i++ {
	output.Reset()
	// Seek our reader to the beginning of the byte array
	in.Seek(0, 0)
	algorithmThree(in, &output)
	}
	}

	// Capture the time it takes to execute algorithm four.
	func BenchmarkAlgorithmFour(b *testing.B) {
	var output bytes.Buffer
	in := bytes.NewReader(assembleInputStream())

	for i := 0; i < b.N; i++ {
	output.Reset()
	// Seek our reader to the beginning of the byte array
	in.Seek(0, 0)
	algorithmFour(in, &output)
	}
	}
	BenchmarkAlgorithmOne-8 2000000 2803 ns/op 53 B/op 2 allocs/op
	BenchmarkAlgorithmTwo-8 5000000 1042 ns/op 0 B/op 0 allocs/op
	BenchmarkAlgorithmThree-8 3000000 1583 ns/op 16 B/op 1 allocs/op
	BenchmarkAlgorithmFour-8 2000000 2030 ns/op 1 B/op 1 allocs/op