nateberkopec/threaddemo.rb Secret

## threaddemo.rb
require 'parallel'

# This is a demonstration of how CPU-bound and IO-bound work interact with the GVL in Ruby and how it affects the throughput of work.

# Here's some CPU-bound work.
def sieve_of_eratosthenes(limit)
  sieve = Array.new(limit + 1, true)
  sieve[0] = sieve[1] = false

  (2..Math.sqrt(limit)).each do |i|
    next unless sieve[i]

    (i * i..limit).step(i) do |j|
      sieve[j] = false
    end
  end

  primes = []
  (2..limit).each do |i|
    primes << i if sieve[i]
  end

  primes
end

def cpu_intensive_task(iterations, limit)
  iterations.times do
    sieve_of_eratosthenes(limit)
  end
end

# Here's our demo.

def demo(thread_count, type, verbose = false)
  puts '*' * 100
  puts "DEMO: #{thread_count} thread doing a #{type} workload"
  puts "[#{Time.now.utc}] DEMO START"
  start = Time.now.utc
  # We'll do this 24 times in thread_count number of threads...
  Parallel.each(Array.new(24) { |i| [i] }, in_threads: thread_count) do |chunk|
    puts "[#{Time.now.utc}] [PID:#{Process.pid}][Thread:#{Thread.current.object_id}] Starting..." if verbose
    yield
    puts "[#{Time.now.utc}] [PID:#{Process.pid}][Thread:#{Thread.current.object_id}] Finishing..." if verbose
  end
  puts "[#{Time.now.utc}] DEMO END"
  time_elapsed = Time.now.utc - start
  puts "TIME ELAPSED: #{time_elapsed}"
end

# Each Ruby process has one Global VM Lock (GVL)
# Each Ruby process has many threads
# Threads must obtain the GVL before they can run any Ruby
# Threads release the GVL while waiting on I/O
#
# In a single Sidekiq process, we use the concurrency setting to change the number of worker threads.
# These worker threads are all vying for the process' GVL.
# In this setup, each worker thread can also create parallel threads.
# These workers compete with all threads across the process (Sidekiq threads, parallel threads created by other Sidekiq threads)

# Let's run the demo with 3 threads and with 1 thread.
demo(3, 'CPU') { cpu_intensive_task(1, 1_000_000) }
demo(1, 'CPU') { cpu_intensive_task(1, 1_000_000) }

# What do you notice? Does throughput change?

# Let's try with some "I/O" (in our case, sleeping). I tried to tune this so that it was 50/50 IO
# and CPU, your results may vary.
#
def task_with_io(iterations, limit)
  iterations.times do
    sleep(0.05) # Tune this so that the final demo is 2x slower than the 1 thread CPU demo.
    sieve_of_eratosthenes(limit)
  end
end

# Now we'll do the same 3 vs 1 thread demo, but with a 50/50 mixed IO/CPU load.
demo(3, 'mixed') { task_with_io(1, 1_000_000) }
demo(1, 'mixed') { task_with_io(1, 1_000_000) }

# In this case, the parallel processing is a massive win. We get _almost_ 2x the throughput of the single
# thread case. But what happens when we add EVEN MORE parallelism?

demo(10, 'mixed') { task_with_io(1, 1_000_000) }
demo(25, 'mixed') { task_with_io(1, 1_000_000) }

# No further gains in throughput! This is because of Amdahl's Law: the maximum gain in throughput
# via additional parallelism is limited to 1/(1-p), where p is the proportion of the task that can be
# done in parallel. In our case, p is roughly 0.5, so we will see at max a 2x gain in throughput with
# infinite number of threads, and that's exactly what we see!
#
# Threads are not without cost. Each thread adds:
# Memory use
# Database connection use
# Potentially higher time to execute a single job (GVL wait time increases)
#
# So, when deciding how many threads we should run in a Ruby process for background job processing:
# 1. The TOTAL thread count between Parallel and Sidekiq is what matters
# 2. Each additional thread adds less throughput
# 3. Thread "costs" are constant
# 4. We should add threads until the benefits in throughput don't outweight the costs.
# 5. Benefits in throughput depend on Amdahl's Law and our workload's p value.
	require 'parallel'

	# This is a demonstration of how CPU-bound and IO-bound work interact with the GVL in Ruby and how it affects the throughput of work.

	# Here's some CPU-bound work.
	def sieve_of_eratosthenes(limit)
	sieve = Array.new(limit + 1, true)
	sieve[0] = sieve[1] = false

	(2..Math.sqrt(limit)).each do \|i\|
	next unless sieve[i]

	(i * i..limit).step(i) do \|j\|
	sieve[j] = false
	end
	end

	primes = []
	(2..limit).each do \|i\|
	primes << i if sieve[i]
	end

	primes
	end

	def cpu_intensive_task(iterations, limit)
	iterations.times do
	sieve_of_eratosthenes(limit)
	end
	end

	# Here's our demo.

	def demo(thread_count, type, verbose = false)
	puts '' 100
	puts "DEMO: #{thread_count} thread doing a #{type} workload"
	puts "[#{Time.now.utc}] DEMO START"
	start = Time.now.utc
	# We'll do this 24 times in thread_count number of threads...
	Parallel.each(Array.new(24) { \|i\| [i] }, in_threads: thread_count) do \|chunk\|
	puts "[#{Time.now.utc}] [PID:#{Process.pid}][Thread:#{Thread.current.object_id}] Starting..." if verbose
	yield
	puts "[#{Time.now.utc}] [PID:#{Process.pid}][Thread:#{Thread.current.object_id}] Finishing..." if verbose
	end
	puts "[#{Time.now.utc}] DEMO END"
	time_elapsed = Time.now.utc - start
	puts "TIME ELAPSED: #{time_elapsed}"
	end

	# Each Ruby process has one Global VM Lock (GVL)
	# Each Ruby process has many threads
	# Threads must obtain the GVL before they can run any Ruby
	# Threads release the GVL while waiting on I/O
	#
	# In a single Sidekiq process, we use the concurrency setting to change the number of worker threads.
	# These worker threads are all vying for the process' GVL.
	# In this setup, each worker thread can also create parallel threads.
	# These workers compete with all threads across the process (Sidekiq threads, parallel threads created by other Sidekiq threads)

	# Let's run the demo with 3 threads and with 1 thread.
	demo(3, 'CPU') { cpu_intensive_task(1, 1_000_000) }
	demo(1, 'CPU') { cpu_intensive_task(1, 1_000_000) }

	# What do you notice? Does throughput change?

	# Let's try with some "I/O" (in our case, sleeping). I tried to tune this so that it was 50/50 IO
	# and CPU, your results may vary.
	#
	def task_with_io(iterations, limit)
	iterations.times do
	sleep(0.05) # Tune this so that the final demo is 2x slower than the 1 thread CPU demo.
	sieve_of_eratosthenes(limit)
	end
	end

	# Now we'll do the same 3 vs 1 thread demo, but with a 50/50 mixed IO/CPU load.
	demo(3, 'mixed') { task_with_io(1, 1_000_000) }
	demo(1, 'mixed') { task_with_io(1, 1_000_000) }

	# In this case, the parallel processing is a massive win. We get _almost_ 2x the throughput of the single
	# thread case. But what happens when we add EVEN MORE parallelism?

	demo(10, 'mixed') { task_with_io(1, 1_000_000) }
	demo(25, 'mixed') { task_with_io(1, 1_000_000) }

	# No further gains in throughput! This is because of Amdahl's Law: the maximum gain in throughput
	# via additional parallelism is limited to 1/(1-p), where p is the proportion of the task that can be
	# done in parallel. In our case, p is roughly 0.5, so we will see at max a 2x gain in throughput with
	# infinite number of threads, and that's exactly what we see!
	#
	# Threads are not without cost. Each thread adds:
	# Memory use
	# Database connection use
	# Potentially higher time to execute a single job (GVL wait time increases)
	#
	# So, when deciding how many threads we should run in a Ruby process for background job processing:
	# 1. The TOTAL thread count between Parallel and Sidekiq is what matters
	# 2. Each additional thread adds less throughput
	# 3. Thread "costs" are constant
	# 4. We should add threads until the benefits in throughput don't outweight the costs.
	# 5. Benefits in throughput depend on Amdahl's Law and our workload's p value.