Patrikios/multithreading.jl

## multithreading.jl
# Multithreaded programming in Julia

#=
    Julia supports multiple types of parallelism
    - SIMD (handled by the compiler) -> for instance the package StaticArrays.jl, short static vectors incline to encourage SIMD operations, a further reason why the package is so efficient
    - Threads with shared memory (this documents touches upon this point)
    - Distributed (multi-mode, not sharing memory)
    - GPUs
=#

#=
    Getting started with Threads
    - use `julia -t 4` to start julia with 4 Threads
    - In VSCode use the settings' julia.numThreads variable
=#

Threads.nthreads()
# 4
Threads.threadid()
# 1

#=
    Julia uses a task based model, where it has a fixed number of Threads
    and schedule defined pieces of the work (Task's) onto them.
    Julia Team likes this model bacause for them, it is 'composable'.
=#

using Images, Statistics
using Base.Threads: @spawn, @threads, threadid

x = @spawn threadid()
fetch(x)

for i = 1:10
    println( "Hello from thread ", threadid() )
end

@threads for i in 1:10
    println( "Hello from thread ", threadid() )
end

@spawn println( "Hello from thread ", threadid() )

@sync begin
    @spawn println( "Hello from thread ", threadid() )
    @spawn println( "Hello from thread ", threadid() )
    @spawn println( "Hello from thread ", threadid() )
    @spawn println( "Hello from thread ", threadid() )
end

t1 = @spawn mean(randn(1_000_000_000))
fetch(t1)

##--- prove there are multiple threads ---------------------------------------------------

x = 1
@spawn begin
    while x == 1
    end
    println("done!")
end

1 + 1 # u can run this asynchronously (while the previous code still runs), as the loop runs in the background which doesn't block the main session
x = 0 # interupts the background process as x is not anymore equal to 1

#--- memory is shared hence the spawned thread has access to the main threads memory ---

using DataFrames
using Statistics

t = @spawn begin
    n = 100000000
    d1 = DataFrame(x = 1:n)
    temp = d1.x |> sum
    d1.y = repeat([temp], n)
    d1
end
print("Hello world!")
df = fetch(t)

#-----------------------------------------------------------------------------------------

#=
    Use @threads for loops with uniform iterations
    use @spawn for unbalanced and nested parallelism
=#

## example for unbalanced parallelism

function escapetime(z; maxiter = 80)
    c = z
    for n = 1:maxiter
        if abs(z) > 2
            return n-1
        end
        z = z^2 + c
    end
    return maxiter
end

function mandel(; width = 80, height = 20, maxiter = 80)
    out = zeros(Int, height, width)
    real = range(-2.0,0.5,length=width)
    imag = range(-1.0,1.0,length=height)
    for x in 1:width
        for y in 1:height
            z = real[x] + imag[y]*im
            out[x,y]= escapetime(z, maxiter = maxiter)
        end
    end
    return out
end

mandel(width = 80, height = 20, maxiter = 80)

# somethin the code above doesnt want to even even though it is one to one copy from https://www.youtube.com/watch?v=FzhipiZO4Jk

using ThreadsX #parallel versions of common algos like sum, sort, max, min etc

@time sqrt(6 * sum(1/n^2 for n in 1:1_000_000_000))
@time sqrt(6 * ThreadsX.sum(1/n^2 for n in 1:1_000_000_000))

#=
I/O
    - I/O is itegrated with task system
    - file, socket, pipe I/O is thread-safe and can overlap
    - compute tasks with I/O latency
    - can speed up file I/O a bit using 'open(filename, lock = false)' when yuo need multi-threaded access
=#

#=
Caveats
    - data races are possible but fairly easy to avoid with these hogher-level constructs
    - Base julia data structures are usually not thread-safe by default
    - for more advances cases:
        - channels (inter-task communication)
        - locks
        - atomic operations
=#

# race condition
x = [0]
@threads for i = 1:1000
    x[1] += 1
end
x

# VS

# no race condition however the atomic is expensive operation
x = Threads.Atomic{Int}(0)
@threads for i = 1:1000
    Threads.atomic_add!(x, 1)
end
x[]


# async is an older kyeword, not really neccessary anymore, one can use @spawn instead

@async f($(g(x)), 1)
	# Multithreaded programming in Julia

	#=
	Julia supports multiple types of parallelism
	- SIMD (handled by the compiler) -> for instance the package StaticArrays.jl, short static vectors incline to encourage SIMD operations, a further reason why the package is so efficient
	- Threads with shared memory (this documents touches upon this point)
	- Distributed (multi-mode, not sharing memory)
	- GPUs
	=#

	#=
	Getting started with Threads
	- use `julia -t 4` to start julia with 4 Threads
	- In VSCode use the settings' julia.numThreads variable
	=#

	Threads.nthreads()
	# 4
	Threads.threadid()
	# 1

	#=
	Julia uses a task based model, where it has a fixed number of Threads
	and schedule defined pieces of the work (Task's) onto them.
	Julia Team likes this model bacause for them, it is 'composable'.
	=#

	using Images, Statistics
	using Base.Threads: @spawn, @threads, threadid

	x = @spawn threadid()
	fetch(x)

	for i = 1:10
	println( "Hello from thread ", threadid() )
	end

	@threads for i in 1:10
	println( "Hello from thread ", threadid() )
	end

	@spawn println( "Hello from thread ", threadid() )

	@sync begin
	@spawn println( "Hello from thread ", threadid() )
	@spawn println( "Hello from thread ", threadid() )
	@spawn println( "Hello from thread ", threadid() )
	@spawn println( "Hello from thread ", threadid() )
	end

	t1 = @spawn mean(randn(1_000_000_000))
	fetch(t1)

	##--- prove there are multiple threads ---------------------------------------------------

	x = 1
	@spawn begin
	while x == 1
	end
	println("done!")
	end

	1 + 1 # u can run this asynchronously (while the previous code still runs), as the loop runs in the background which doesn't block the main session
	x = 0 # interupts the background process as x is not anymore equal to 1

	#--- memory is shared hence the spawned thread has access to the main threads memory ---

	using DataFrames
	using Statistics

	t = @spawn begin
	n = 100000000
	d1 = DataFrame(x = 1:n)
	temp = d1.x \|> sum
	d1.y = repeat([temp], n)
	d1
	end
	print("Hello world!")
	df = fetch(t)

	#-----------------------------------------------------------------------------------------

	#=
	Use @threads for loops with uniform iterations
	use @spawn for unbalanced and nested parallelism
	=#

	## example for unbalanced parallelism

	function escapetime(z; maxiter = 80)
	c = z
	for n = 1:maxiter
	if abs(z) > 2
	return n-1
	end
	z = z^2 + c
	end
	return maxiter
	end

	function mandel(; width = 80, height = 20, maxiter = 80)
	out = zeros(Int, height, width)
	real = range(-2.0,0.5,length=width)
	imag = range(-1.0,1.0,length=height)
	for x in 1:width
	for y in 1:height
	z = real[x] + imag[y]*im
	out[x,y]= escapetime(z, maxiter = maxiter)
	end
	end
	return out
	end

	mandel(width = 80, height = 20, maxiter = 80)

	# somethin the code above doesnt want to even even though it is one to one copy from https://www.youtube.com/watch?v=FzhipiZO4Jk

	using ThreadsX #parallel versions of common algos like sum, sort, max, min etc

	@time sqrt(6 * sum(1/n^2 for n in 1:1_000_000_000))
	@time sqrt(6 * ThreadsX.sum(1/n^2 for n in 1:1_000_000_000))

	#=
	I/O
	- I/O is itegrated with task system
	- file, socket, pipe I/O is thread-safe and can overlap
	- compute tasks with I/O latency
	- can speed up file I/O a bit using 'open(filename, lock = false)' when yuo need multi-threaded access
	=#

	#=
	Caveats
	- data races are possible but fairly easy to avoid with these hogher-level constructs
	- Base julia data structures are usually not thread-safe by default
	- for more advances cases:
	- channels (inter-task communication)
	- locks
	- atomic operations
	=#

	# race condition
	x = [0]
	@threads for i = 1:1000
	x[1] += 1
	end
	x

	# VS

	# no race condition however the atomic is expensive operation
	x = Threads.Atomic{Int}(0)
	@threads for i = 1:1000
	Threads.atomic_add!(x, 1)
	end
	x[]


	# async is an older kyeword, not really neccessary anymore, one can use @spawn instead

	@async f($(g(x)), 1)