Skip to content

Instantly share code, notes, and snippets.

@tylerstillwater
Last active August 5, 2016 16:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tylerstillwater/a37ac5444afe4492b2a75a2cd0be3238 to your computer and use it in GitHub Desktop.
Save tylerstillwater/a37ac5444afe4492b2a75a2cd0be3238 to your computer and use it in GitHub Desktop.
// All material is licensed under the Apache License Version 2.0, January 2004
// http://www.apache.org/licenses/LICENSE-2.0
// Sample program that takes a stream of bytes and looks for the bytes
// “elvis” and when they are found, replace them with “Elvis”. The code
// cannot assume that there are any line feeds or other delimiters in the
// stream and the code must assume that the stream is of any arbitrary length.
// The solution cannot meaningfully buffer to the end of the stream and
// then process the replacement.
package main
import (
"bufio"
"bytes"
"fmt"
"io"
)
// data represents a table of input and expected output.
var data = []struct {
input []byte
output []byte
}{
{[]byte("abc"), []byte("abc")},
{[]byte("elvis"), []byte("Elvis")},
{[]byte("aElvis"), []byte("aElvis")},
{[]byte("abcelvis"), []byte("abcElvis")},
{[]byte("eelvis"), []byte("eElvis")},
{[]byte("aelvis"), []byte("aElvis")},
{[]byte("aabeeeelvis"), []byte("aabeeeElvis")},
{[]byte("e l v i s"), []byte("e l v i s")},
{[]byte("aa bb e l v i saa"), []byte("aa bb e l v i saa")},
{[]byte(" elvi s"), []byte(" elvi s")},
{[]byte("elvielvis"), []byte("elviElvis")},
{[]byte("elvielvielviselvi1"), []byte("elvielviElviselvi1")},
{[]byte("elvielviselvis"), []byte("elviElvisElvis")},
}
// Declare what needs to be found and its replacement.
var find = []byte("elvis")
var repl = []byte("Elvis")
// Calculate the number of bytes we need to locate.
var size = len(find)
func main() {
var output bytes.Buffer
fmt.Println("=======================================\nRunning Algorithm One")
for _, d := range data {
output.Reset()
algorithmOne(d.input, &output)
matched := bytes.Compare(d.output, output.Bytes())
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
}
fmt.Println("=======================================\nRunning Algorithm Two")
for _, d := range data {
output.Reset()
algorithmTwo(d.input, &output)
matched := bytes.Compare(d.output, output.Bytes())
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
}
fmt.Println("=======================================\nRunning Algorithm Three")
for _, d := range data {
output.Reset()
algorithmThree(bytes.NewReader(d.input), &output)
matched := bytes.Compare(d.output, output.Bytes())
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
}
fmt.Println("=======================================\nRunning Algorithm Four")
for _, d := range data {
output.Reset()
algorithmFour(bytes.NewReader(d.input), &output)
matched := bytes.Compare(d.output, output.Bytes())
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
}
}
// algorithmOne is one way to solve the problem.
func algorithmOne(data []byte, output *bytes.Buffer) {
// Use a bytes Reader to provide a stream to process.
input := bytes.NewReader(data)
// Declare the buffer we need to process the stream.
buf := make([]byte, size)
end := size - 1
// Read in an initial number of bytes we need to get started.
if n, err := io.ReadFull(input, buf[:end]); err != nil {
output.Write(buf[:n])
return
}
for {
// Read in one byte from the input stream.
b, err := input.ReadByte()
if err != nil {
// Flush the reset of the bytes we have.
output.Write(buf[:end])
break
}
// Add this byte to the end of the buffer.
buf[end] = b
// If we have a match, replace the bytes.
if bytes.Compare(buf, find) == 0 {
copy(buf, repl)
}
// Write the front byte since it has been compared.
output.WriteByte(buf[0])
// Slice that front byte out.
copy(buf, buf[1:])
}
}
// algorithmTwo is a second way to solve the problem.
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
func algorithmTwo(data []byte, output *bytes.Buffer) {
// Use the bytes Reader to provide a stream to process.
input := bytes.NewReader(data)
// Create an index variable to match bytes.
idx := 0
for {
// Read a single byte from our input.
b, err := input.ReadByte()
if err != nil {
break
}
// Does this byte match the byte at this offset?
if b == find[idx] {
// It matches so increment the index position.
idx++
// If every byte has been matched, write
// out the replacement.
if idx == size {
output.Write(repl)
idx = 0
}
continue
}
// Did we have any sort of match on any given byte?
if idx != 0 {
// Write what we've matched up to this point.
output.Write(find[:idx])
// Unread the unmatched byte so it can be processed again.
input.UnreadByte()
// Reset the offset to start matching from the beginning.
idx = 0
continue
}
// There was no previous match. Write byte and reset.
output.WriteByte(b)
idx = 0
}
}
// algorithmThree is a second way to solve the problem. This approach takes an
// io.Reader to represent an infinite stream. This allows for the algorithm to
// accept input from just about anywhere, thanks to the beauty of Go
// interfaces.
//
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
func algorithmThree(data io.Reader, output *bytes.Buffer) {
// Use bufio.NewReaderSize to provide a stream from which we can read and
// unread single bytes.
input := bufio.NewReaderSize(data, 1)
// Create an index variable to match bytes.
idx := 0
for {
// Read a single byte from our input.
b, err := input.ReadByte()
if err != nil {
break
}
// Does this byte match the byte at this offset?
if b == find[idx] {
// It matches so increment the index position.
idx++
// If every byte has been matched, write
// out the replacement.
if idx == size {
output.Write(repl)
idx = 0
}
continue
}
// Did we have any sort of match on any given byte?
if idx != 0 {
// Write what we've matched up to this point.
output.Write(find[:idx])
// Unread the unmatched byte so it can be processed again.
input.UnreadByte()
// Reset the offset to start matching from the beginning.
idx = 0
continue
}
// There was no previous match. Write byte and reset.
output.WriteByte(b)
idx = 0
}
}
// algorithmFour is a fourth way to solve the problem. This approach takes an
// io.Reader to represent an infinite stream. This allows for the algorithm to
// accept input from just about anywhere, thanks to the beauty of Go
// interfaces.
//
// Additionally, it has been optimized for minimal allocations by reading
// directly from the stream onto the stack. This results in 1 allocation of 1
// byte at the time of writing.
//
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
func algorithmFour(data io.Reader, output *bytes.Buffer) {
// Create a byte slice of length 1 into which our byte will be read.
b := make([]byte, 1)
// Create an index variable to match bytes.
idx := 0
for {
// Are we re-using the byte from a previous call?
if b[0] == 0 {
// Read a single byte from our input.
n, err := data.Read(b)
if n == 0 || err != nil {
break
}
}
// Does this byte match the byte at this offset?
if b[0] == find[idx] {
// It matches so increment the index position.
idx++
// If every byte has been matched, write
// out the replacement.
if idx == size {
output.Write(repl)
idx = 0
}
// Reset the reader byte to 0 so another byte will be read.
b[0] = 0
continue
}
// Did we have any sort of match on any given byte?
if idx != 0 {
// Write what we've matched up to this point.
output.Write(find[:idx])
// NOTE: we are NOT resetting the reader byte to 0 here because we need
// to re-use it on the next call. This is equivalent to the UnreadByte()
// call in the other functions.
// Reset the offset to start matching from the beginning.
idx = 0
continue
}
// There was no previous match. Write byte and reset.
output.WriteByte(b[0])
// Reset the reader byte to 0 so another byte will be read.
b[0] = 0
}
}
// All material is licensed under the Apache License Version 2.0, January 2004
// http://www.apache.org/licenses/LICENSE-2.0
// go test -run none -bench . -benchtime 3s -benchmem
// Tests to see how each algorithm compare.
package main
import (
"bytes"
"testing"
)
// assembleInputStream appends all the input slices together to allow for
// consistent testing across all benchmarks
func assembleInputStream() []byte {
var out []byte
for _, d := range data {
out = append(out, d.input...)
}
return out
}
// Capture the time it takes to execute algorithm one.
func BenchmarkAlgorithmOne(b *testing.B) {
var output bytes.Buffer
in := assembleInputStream()
for i := 0; i < b.N; i++ {
output.Reset()
algorithmOne(in, &output)
}
}
// Capture the time it takes to execute algorithm two.
func BenchmarkAlgorithmTwo(b *testing.B) {
var output bytes.Buffer
in := assembleInputStream()
for i := 0; i < b.N; i++ {
output.Reset()
algorithmTwo(in, &output)
}
}
// Capture the time it takes to execute algorithm three.
func BenchmarkAlgorithmThree(b *testing.B) {
var output bytes.Buffer
in := bytes.NewReader(assembleInputStream())
for i := 0; i < b.N; i++ {
output.Reset()
// Seek our reader to the beginning of the byte array
in.Seek(0, 0)
algorithmThree(in, &output)
}
}
// Capture the time it takes to execute algorithm four.
func BenchmarkAlgorithmFour(b *testing.B) {
var output bytes.Buffer
in := bytes.NewReader(assembleInputStream())
for i := 0; i < b.N; i++ {
output.Reset()
// Seek our reader to the beginning of the byte array
in.Seek(0, 0)
algorithmFour(in, &output)
}
}
BenchmarkAlgorithmOne-8 2000000 2803 ns/op 53 B/op 2 allocs/op
BenchmarkAlgorithmTwo-8 5000000 1042 ns/op 0 B/op 0 allocs/op
BenchmarkAlgorithmThree-8 3000000 1583 ns/op 16 B/op 1 allocs/op
BenchmarkAlgorithmFour-8 2000000 2030 ns/op 1 B/op 1 allocs/op
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment