earthboundkid/-Go iterator experiment.md

## -Go iterator experiment.md

      
    Raw
  

              -Go iterator experiment.md
            
          
    Go iterator proposal experiment

The second problem in Project Euler is

By considering the terms in the Fibonacci sequence whose values do not exceed four million, find the sum of the even-valued terms.

Nine years ago, I wrote a blog post that showed how to solve this in Python using iterators and in Go using channels. The Go code started simple, but ended up a bit complicated because it also had stop channels to allow early exit. See euler.py for the Python code, and channel.go for the Go code using channels.
Go issue #61405 proposes to add a new function-based iterator protocol to Go. To test it, I rewrote the Go code using push function iterators. See functional.go. The code is notably simpler and shorter than the channel based code.
Worth noting is that a version of the code that just uses a plain for loop and if statements is significantly faster and shorter than the versions using push functions. The cost is that the code for generating Fibonacci numbers is a little obscure. A middle ground is to use a push function for fibs, but filter and sum inside a normal loop. See nonfunctional.go.
Benchmark results

Take these benchmarks with a grain of salt as the numbers will change if the proposal is finalized and optimization begins in earnest.
>>> timeit.timeit('functional()', globals=globals())
5.410251271969173
timeit.timeit('nonfunctional()', globals=globals())
>>> timeit.timeit('nonfunctional()', globals=globals())
2.24513187300181
For context, Python takes about 5 microseconds functionally and 2.5 microseconds imperatively, a 2x overhead for functional looping.
$ gotip test -v -benchmem -bench . .
=== RUN   TestEuler
--- PASS: TestEuler (0.00s)
goos: darwin
goarch: amd64
pkg: x/euler
cpu: Intel(R) Core(TM) i7-1060NG7 CPU @ 1.20GHz
BenchmarkFunctional
BenchmarkFunctional-8            3613938               327.7 ns/op           136 B/op          8 allocs/op
BenchmarkNonfunctional
BenchmarkNonfunctional-8        38324710                30.15 ns/op            0 B/op          0 allocs/op
BenchmarkHybrid
BenchmarkHybrid-8               17409680                69.89 ns/op            0 B/op          0 allocs/op
BenchmarkChannels
BenchmarkChannels-8                32686             38886 ns/op             490 B/op          7 allocs/op
PASS
ok      x/euler 5.796s

$ GOMAXPROCS=1 gotip test -v -benchmem -bench . .
=== RUN   TestEuler
--- PASS: TestEuler (0.00s)
goos: darwin
goarch: amd64
pkg: x/euler
cpu: Intel(R) Core(TM) i7-1060NG7 CPU @ 1.20GHz
BenchmarkFunctional
BenchmarkFunctional      3516156               348.3 ns/op           136 B/op          8 allocs/op
BenchmarkNonfunctional
BenchmarkNonfunctional  36625514                30.08 ns/op            0 B/op          0 allocs/op
BenchmarkHybrid
BenchmarkHybrid         17324220                68.38 ns/op            0 B/op          0 allocs/op
BenchmarkChannels
BenchmarkChannels          55174             21107 ns/op             488 B/op          7 allocs/op
PASS
ok      x/euler 5.494s

Go takes around 30 nanoseconds imperatively and 300 nanoseconds functionally, approximately a 10x overhead. A hybrid version of that code that generates the fib sequence using a push function, but filters and sums it using normal Go if statements does much better with only a 2x overhead (70 nanoseconds). A channel version takes an absurd 40 microseconds, which is slower than Python's functional version (5 microseconds). The speed of the channel version increases by setting GOMAXPROCS to 1, making it only 20 microseconds.
Observations and impressions


Channel based code is very rare in production because it is too verbose and too slow.
The signature for push functions is quite long. func(yield func(int64) bool) bool is bad enough. Something like func TakeUntil(limit int64, it Iter[int64]) Iter[int64] would be totally unreadable if Iter[int64] were expanded.
It's not clear what to return for the final bool. Does true indicate exhaustion or non-cancellation?
I think I could tolerate a 2x performance hit to use a push function in production, but I'm not sure I could tolerate the performance hit that comes from stacking them.
Should I be using stop functions somewhere in here? I think you only need them if you convert a push function into a pull function via coro.Pull, but it's not totally clear.
It's a little odd that you can write for someBool but have to write for range someInt. I understand why, but it's easy to forget the range.


## channel.go
package euler

func sendOrFail(n int64, out chan<- int64, quit <-chan struct{}) bool {
	select {
	case out <- n:
		return true
	case <-quit:
		return false
	}
}

func FibGenCh(quit <-chan struct{}) <-chan int64 {
	out := make(chan int64)
	go func() {
		defer close(out)
		var last, current int64 = 1, 2

		if ok := sendOrFail(last, out, quit); !ok {
			return
		}
		for {
			if ok := sendOrFail(current, out, quit); !ok {
				return
			}
			last, current = current, last+current
		}
	}()
	return out
}

func EvensCh(in <-chan int64, quit <-chan struct{}) <-chan int64 {
	out := make(chan int64)
	go func() {
		defer close(out)
		for n := range in {
			if n%2 == 0 {
				if ok := sendOrFail(n, out, quit); !ok {
					return
				}
			}
		}
	}()
	return out
}

func TakeUntilCh(max int64, in <-chan int64, quit <-chan struct{}) <-chan int64 {
	out := make(chan int64)
	go func() {
		defer close(out)
		for n := range in {
			if n > max {
				return
			}
			if ok := sendOrFail(n, out, quit); !ok {
				return
			}
		}
	}()
	return out
}

func Channel() int64 {
	var total int64

	quit := make(chan struct{})
	defer close(quit)
	for n := range EvensCh(TakeUntilCh(4000000, FibGenCh(quit), quit), quit) {
		total += n
	}
	return total
}

## euler.py
import timeit

def nonfunctional():
    total = 0
    a, b = 1, 1
    while a < 4_000_000:
        if not a%2:
            total += a
        a, b = b, a+b
    return total

def fibgen():
    last, current = 1, 2
    yield last
    while True:
        yield current
        last, current = current, last + current

def take_until(max, gen):
   for n in gen:
      if n > max:
          return
      yield n

def evens(gen):
   for n in gen:
      if n%2 == 0:
          yield n

def functional():
    sum(evens(take_until(4000000, fibgen())))

timeit.timeit('functional()', globals=globals())
timeit.timeit('nonfunctional()', globals=globals())

## euler_test.go
package euler_test

import (
	"testing"
	"x/euler"
)

const Answer = 4_613_732

func TestEuler(t *testing.T) {
	n := euler.Functional()
	if n != Answer {
		t.Fatal(n)
	}
	n = euler.Nonfunctional()
	if n != Answer {
		t.Fatal(n)
	}
	n = euler.Hybrid()
	if n != Answer {
		t.Fatal(n)
	}
	n = euler.Channel()
	if n != Answer {
		t.Fatal(n)
	}
}

var sink int64

func BenchmarkFunctional(b *testing.B) {
	for range b.N {
		n := euler.Functional()
		if n != Answer {
			b.Fatal(n)
		}
		sink = n
	}
}

func BenchmarkNonfunctional(b *testing.B) {
	for range b.N {
		n := euler.Nonfunctional()
		if n != Answer {
			b.Fatal(n)
		}
		sink = n
	}
}

func BenchmarkHybrid(b *testing.B) {
	for range b.N {
		n := euler.Hybrid()
		if n != Answer {
			b.Fatal(n)
		}
		sink = n
	}
}
func BenchmarkChannels(b *testing.B) {
	for range b.N {
		n := euler.Channel()
		if n != Answer {
			b.Fatal(n)
		}
		sink = n
	}
}

## functional.go
package euler

type Iter[T any] func(yield func(T) bool) bool

func Fibs(yield func(int64) bool) bool {
	a, b := int64(1), int64(1)
	for {
		if !yield(a) {
			return false
		}
		a, b = b, a+b
	}
}

func Evens(it Iter[int64]) Iter[int64] {
	return func(yield func(int64) bool) bool {
		for n := range it {
			if n%2 == 0 {
				if !yield(n) {
					return false
				}
			}
		}
		return true
	}
}

func TakeUntil(limit int64, it Iter[int64]) Iter[int64] {
	return func(yield func(int64) bool) bool {
		for n := range it {
			if n >= limit {
				return true
			}
			if !yield(n) {
				return false
			}
		}
		return true
	}
}

func Sum(it Iter[int64]) int64 {
	total := int64(0)
	for n := range it {
		total += n
	}
	return total
}

func Functional() int64 {
	return Sum(Evens(TakeUntil(4_000_000, Fibs)))
}

## go.mod
module x/euler

go 1.20

## nonfunctional.go
package euler

func Nonfunctional() int64 {
	total := int64(0)
	a, b := int64(1), int64(1)
	for a < 4_000_000 {
		if a%2 == 0 {
			total += a
		}
		a, b = b, a+b
	}
	return total
}

func Hybrid() int64 {
	total := int64(0)
	for n := range Fibs {
		if n > 4_000_000 {
			return total
		}
		if n%2 == 0 {
			total += n
		}
	}
	panic("unreachable")
}
	package euler

	func sendOrFail(n int64, out chan<- int64, quit <-chan struct{}) bool {
	select {
	case out <- n:
	return true
	case <-quit:
	return false
	}
	}

	func FibGenCh(quit <-chan struct{}) <-chan int64 {
	out := make(chan int64)
	go func() {
	defer close(out)
	var last, current int64 = 1, 2

	if ok := sendOrFail(last, out, quit); !ok {
	return
	}
	for {
	if ok := sendOrFail(current, out, quit); !ok {
	return
	}
	last, current = current, last+current
	}
	}()
	return out
	}

	func EvensCh(in <-chan int64, quit <-chan struct{}) <-chan int64 {
	out := make(chan int64)
	go func() {
	defer close(out)
	for n := range in {
	if n%2 == 0 {
	if ok := sendOrFail(n, out, quit); !ok {
	return
	}
	}
	}
	}()
	return out
	}

	func TakeUntilCh(max int64, in <-chan int64, quit <-chan struct{}) <-chan int64 {
	out := make(chan int64)
	go func() {
	defer close(out)
	for n := range in {
	if n > max {
	return
	}
	if ok := sendOrFail(n, out, quit); !ok {
	return
	}
	}
	}()
	return out
	}

	func Channel() int64 {
	var total int64

	quit := make(chan struct{})
	defer close(quit)
	for n := range EvensCh(TakeUntilCh(4000000, FibGenCh(quit), quit), quit) {
	total += n
	}
	return total
	}
	import timeit

	def nonfunctional():
	total = 0
	a, b = 1, 1
	while a < 4_000_000:
	if not a%2:
	total += a
	a, b = b, a+b
	return total

	def fibgen():
	last, current = 1, 2
	yield last
	while True:
	yield current
	last, current = current, last + current

	def take_until(max, gen):
	for n in gen:
	if n > max:
	return
	yield n

	def evens(gen):
	for n in gen:
	if n%2 == 0:
	yield n

	def functional():
	sum(evens(take_until(4000000, fibgen())))

	timeit.timeit('functional()', globals=globals())
	timeit.timeit('nonfunctional()', globals=globals())
	package euler_test

	import (
	"testing"
	"x/euler"
	)

	const Answer = 4_613_732

	func TestEuler(t *testing.T) {
	n := euler.Functional()
	if n != Answer {
	t.Fatal(n)
	}
	n = euler.Nonfunctional()
	if n != Answer {
	t.Fatal(n)
	}
	n = euler.Hybrid()
	if n != Answer {
	t.Fatal(n)
	}
	n = euler.Channel()
	if n != Answer {
	t.Fatal(n)
	}
	}

	var sink int64

	func BenchmarkFunctional(b *testing.B) {
	for range b.N {
	n := euler.Functional()
	if n != Answer {
	b.Fatal(n)
	}
	sink = n
	}
	}

	func BenchmarkNonfunctional(b *testing.B) {
	for range b.N {
	n := euler.Nonfunctional()
	if n != Answer {
	b.Fatal(n)
	}
	sink = n
	}
	}

	func BenchmarkHybrid(b *testing.B) {
	for range b.N {
	n := euler.Hybrid()
	if n != Answer {
	b.Fatal(n)
	}
	sink = n
	}
	}
	func BenchmarkChannels(b *testing.B) {
	for range b.N {
	n := euler.Channel()
	if n != Answer {
	b.Fatal(n)
	}
	sink = n
	}
	}
	package euler

	type Iter[T any] func(yield func(T) bool) bool

	func Fibs(yield func(int64) bool) bool {
	a, b := int64(1), int64(1)
	for {
	if !yield(a) {
	return false
	}
	a, b = b, a+b
	}
	}

	func Evens(it Iter[int64]) Iter[int64] {
	return func(yield func(int64) bool) bool {
	for n := range it {
	if n%2 == 0 {
	if !yield(n) {
	return false
	}
	}
	}
	return true
	}
	}

	func TakeUntil(limit int64, it Iter[int64]) Iter[int64] {
	return func(yield func(int64) bool) bool {
	for n := range it {
	if n >= limit {
	return true
	}
	if !yield(n) {
	return false
	}
	}
	return true
	}
	}

	func Sum(it Iter[int64]) int64 {
	total := int64(0)
	for n := range it {
	total += n
	}
	return total
	}

	func Functional() int64 {
	return Sum(Evens(TakeUntil(4_000_000, Fibs)))
	}
	package euler

	func Nonfunctional() int64 {
	total := int64(0)
	a, b := int64(1), int64(1)
	for a < 4_000_000 {
	if a%2 == 0 {
	total += a
	}
	a, b = b, a+b
	}
	return total
	}

	func Hybrid() int64 {
	total := int64(0)
	for n := range Fibs {
	if n > 4_000_000 {
	return total
	}
	if n%2 == 0 {
	total += n
	}
	}
	panic("unreachable")
	}