kylebutts/simulation-hot_hand_ecta.jl

## simulation-hot_hand_ecta.jl
# %% [markdown]
# ---
# format: gfm
# ---
# %%
using StatsBase, Statistics
using Random

# %%
"""
Check if a consecutive sequence of True values of length k exists in the input list of Boolean values.

# Arguments
-  shots: Vector{Bool} - The list of Boolean values to check.
-  k: Int - The length of the consecutive True sequence to look for.

# Returns
-  Bool: Returns true if a sequence of length k with all True values exists in the input list, false otherwise.
"""
function check_shots(shots::Vector{Bool}, k::Int)
  for i in 1:length(shots)-k
    if all(view(shots, i:i+k-1))
      return true
    end
  end
  return false
end

# %%
"""
Take shots until at least one hot streak (of length k) is observed.

Parameters:
-   shots: Array{Bool}: The array to store the results of the shots. Modifies in place.
-   p: Float64: Probability of making a shot successfully (0 < p < 1).
-   k: Int64: Positive integer representing the length of the hot streak (after k successes, check whether or not the next shot goes in).
-   rng: Random number generator, default is Random.default_rng().

Returns:
-   Void

Remarks:
-   The function continues shooting until at least one hot streak of length k is observed, where each shot is determined based on the probability p.
-   The check_shots function is used internally to verify if the hot streak condition is met.

Example:
```julia
shots = falses(10)
shoot!(shots, 0.7, 2)
```
"""
function shoot!(shots, p, k, rng = Random.default_rng())
  @assert length(shots) >= k+1

  while true
    for i in eachindex(shots)
      shots[i] = rand(rng) .> p
    end
    # Need to make at least k-1 shots in the first n-1 shots
    check_shots(shots, k) && break
  end
end

# %%
"""
Calculate the shooting efficiency based on a series of shots.

Parameters:
-    shots: Array{Bool}: Array containing the results of the shots (true for made shot, false for missed shot).
-    k: Int64: Positive integer representing the length of the hot streak (after k successes, check whether or not the next shot goes in).

Returns:
-    Float64: Shooting efficiency calculated as the proportion of hot hand shots that are made succesfully.

Remarks:
-    The function iterates through the shots array and calculates the shooting efficiency for hot hands shots, where a hot hands shot is a shot that occurs after k successful shots.

Example:
```julia
shots = [true, true, false, true, true, true, false, true, true]
score_shots(shots, 2)
```
"""
function score_shots(shots, k)
  n_hh_shots_made = 0
  n_hh_shots_taken = 0
  streak = 0
  for shot in shots
    # if made the last k shots, mark this shot
    if streak >= k
      n_hh_shots_taken += 1
      n_hh_shots_made += shot
    end

    # update streak count (avoiding branching)
    streak = shot ? (streak + 1) : 0
  end

  n_hh_shots_made / n_hh_shots_taken
end

# %% Monte Carlo Simulation
# Run 1 simulation, counting the number of hot shot streaks made
function simulate(shots, p, k, rng)
  shoot!(shots, p, k, rng)
  score_shots(shots, k)
end
function monte_carlo(n, p, k, B, rng)
  shots = zeros(Bool, n)
  pct_hot_hand_shots_made = [simulate(shots, p, k, rng) for _ in 1:B]
  mean(pct_hot_hand_shots_made)
end

#' # Simuations
#' Run a set of simulations for different values of $(n, p, k)$. This replicates figure 1 of Miller and Sanjurjo (2018, ECTA)
# %%
using DataFrames, DataFramesMeta, Chain
rng = Xoshiro(1234)
B = 100_000

simulations = allcombinations(
  DataFrame, n=1:100, p=[0.25, 0.5, 0.75], k=1:4
)
simulations = @chain simulations begin
  @subset :n .> :k
  @groupby [:p, :k]
  transform(groupindices => :simulation_id)
end

# Run simulation for each (n, p, k)
@time @rtransform!(
  simulations,
  :pct_hot_hand_shots_made = monte_carlo(:n, :p, :k, B, rng)
)

#' Plotting the expected percent of successes following $k$ successful shots:
# %%
using Plots
p = plot(
  xlims = (0, 100), ylims = (0, 0.8),
  ylabel = "Average % of hot hand shots made",
  xlabel = "Number of shots recorded per trial"
)
hline!([0.25, 0.5, 0.75], color = "black", linestyle = :dash, linewidth = 1, legend = false)
plot!(
  simulations.n, simulations.pct_hot_hand_shots_made,
  group = simulations.simulation_id, color = simulations.k,
  palette = [:grey70, :grey50, :grey30, :grey10],
  line = :solid, linewidth = 2,
)


# %% Testing
#| include: false
#| eval: false
using Test
@testset "check shots" begin
  @test check_shots(Bool[1, 1, 1, 0, 0], 2) == true
  @test check_shots(Bool[0, 0, 1, 1, 1], 2) == true
  @test check_shots(Bool[0, 0, 0, 1, 1], 2) == false # no shot after k makes
  @test check_shots(Bool[1, 0, 1, 1, 0], 1) == true
  @test check_shots(Bool[0, 0, 0, 0, 0], 1) == false
end

shots = zeros(Bool, 5)
function check_shots()
  shoot!(shots, 0.5, 3)
  check_shots(shots, 3)
end
@testset "shoot! only returns valid sets of shots" begin
  @test all([check_shots() for _ in 1:100])
end
@test
@testset "score_shots returns the correct pct of hot hand shots made" begin
  @test score_shots(Bool[1,1,1,0,0], 1) ≈ 2/3
  @test score_shots(Bool[1,1,1,1,0], 1) ≈ 3/4
  @test score_shots(Bool[1,1,1,1,1], 1) ≈ 1
  @test score_shots(Bool[1,0,1,0,0], 1) ≈ 0
  @test score_shots(Bool[1,0,1,1,0], 1) ≈ 1/3
end

## simulation-hot_hand_ecta.md

      
    Raw
  

              simulation-hot_hand_ecta.md
            
          
    Simuations

using StatsBase, Statistics
using Random
"""
Check if a consecutive sequence of True values of length k exists in the input list of Boolean values.

# Arguments
-  shots: Vector{Bool} - The list of Boolean values to check.
-  k: Int - The length of the consecutive True sequence to look for.

# Returns
-  Bool: Returns true if a sequence of length k with all True values exists in the input list, false otherwise.
"""
function check_shots(shots::Vector{Bool}, k::Int)
  for i in 1:length(shots)-k
    if all(view(shots, i:i+k-1))
      return true
    end
  end
  return false
end
check_shots

"""
Take shots until at least one hot streak (of length k) is observed. 

Parameters:
-   shots: Array{Bool}: The array to store the results of the shots. Modifies in place.
-   p: Float64: Probability of making a shot successfully (0 < p < 1).
-   k: Int64: Positive integer representing the length of the hot streak (after k successes, check whether or not the next shot goes in).
-   rng: Random number generator, default is Random.default_rng().

Returns:
-   Void

Remarks:
-   The function continues shooting until at least one hot streak of length k is observed, where each shot is determined based on the probability p.
-   The check_shots function is used internally to verify if the hot streak condition is met.
"""
function shoot!(shots, p, k, rng = Random.default_rng())  
  @assert length(shots) >= k+1

  while true
    for i in eachindex(shots)
      shots[i] = rand(rng) .> p
    end
    # Need to make at least k-1 shots in the first n-1 shots
    check_shots(shots, k) && break
  end
end
shoot!

"""
Calculate the shooting efficiency based on a series of shots.

Parameters:
-    shots: Array{Bool}: Array containing the results of the shots (true for made shot, false for missed shot).
-    k: Int64: Positive integer representing the length of the hot streak (after k successes, check whether or not the next shot goes in).

Returns:
-    Float64: Shooting efficiency calculated as the proportion of hot hand shots that are made succesfully.

Remarks:
-    The function iterates through the shots array and calculates the shooting efficiency for hot hands shots, where a hot hands shot is a shot that occurs after k successful shots.
"""
function score_shots(shots, k)
  n_hh_shots_made = 0
  n_hh_shots_taken = 0
  streak = 0
  for shot in shots
    # if made the last k shots, mark this shot
    if streak >= k
      n_hh_shots_taken += 1
      n_hh_shots_made += shot
    end 

    # update streak count (avoiding branching)
    streak = shot ? (streak + 1) : 0
  end

  n_hh_shots_made / n_hh_shots_taken
end
score_shots

# Run 1 simulation, counting the number of hot shot streaks made
function simulate(shots, p, k, rng)
  shoot!(shots, p, k, rng)
  score_shots(shots, k)
end
function monte_carlo(n, p, k, B, rng)
  shots = zeros(Bool, n)
  pct_hot_hand_shots_made = [simulate(shots, p, k, rng) for _ in 1:B]
  mean(pct_hot_hand_shots_made)
end
monte_carlo (generic function with 1 method)

Run a set of simulations for different values of $(n, p, k)$. This
replicates figure 1 of Miller and Sanjurjo (2018, ECTA)
using DataFrames, DataFramesMeta, Chain
rng = Xoshiro(1234)
B = 100_000

simulations = allcombinations(
  DataFrame, n=1:100, p=[0.25, 0.5, 0.75], k=1:4
)
simulations = @chain simulations begin
  @subset :n .> :k
  @groupby [:p, :k] 
  transform(groupindices => :simulation_id)
end

# Run simulation for each (n, p, k)
@time @rtransform!(
  simulations, 
  :pct_hot_hand_shots_made = monte_carlo(:n, :p, :k, B, rng)
)
 33.226253 seconds (141.51 k allocations: 902.041 MiB, 0.27% gc time, 0.27% compilation time)

Plotting the expected percent of successes following $k$ successful
shots:
using Plots

plt = plot(
  xlims = (0, 100), ylims = (0, 0.8),
  ylabel = "Average % of hot hand shots made",
  xlabel = "Number of shots recorded per trial"
)
hline!([0.25, 0.5, 0.75], color = "black", linestyle = :dash, linewidth = 1, legend = false)
plot!(
  simulations.n, simulations.pct_hot_hand_shots_made,
  group = simulations.simulation_id, color = simulations.k,
  palette = [:grey70, :grey50, :grey30, :grey10],
  line = :solid, linewidth = 2,
)

display("image/png", plt)
	# %% [markdown]
	# ---
	# format: gfm
	# ---
	# %%
	using StatsBase, Statistics
	using Random

	# %%
	"""
	Check if a consecutive sequence of True values of length k exists in the input list of Boolean values.

	# Arguments
	- shots: Vector{Bool} - The list of Boolean values to check.
	- k: Int - The length of the consecutive True sequence to look for.

	# Returns
	- Bool: Returns true if a sequence of length k with all True values exists in the input list, false otherwise.
	"""
	function check_shots(shots::Vector{Bool}, k::Int)
	for i in 1:length(shots)-k
	if all(view(shots, i:i+k-1))
	return true
	end
	end
	return false
	end

	# %%
	"""
	Take shots until at least one hot streak (of length k) is observed.

	Parameters:
	- shots: Array{Bool}: The array to store the results of the shots. Modifies in place.
	- p: Float64: Probability of making a shot successfully (0 < p < 1).
	- k: Int64: Positive integer representing the length of the hot streak (after k successes, check whether or not the next shot goes in).
	- rng: Random number generator, default is Random.default_rng().

	Returns:
	- Void

	Remarks:
	- The function continues shooting until at least one hot streak of length k is observed, where each shot is determined based on the probability p.
	- The check_shots function is used internally to verify if the hot streak condition is met.

	Example:
	```julia
	shots = falses(10)
	shoot!(shots, 0.7, 2)
	```
	"""
	function shoot!(shots, p, k, rng = Random.default_rng())
	@assert length(shots) >= k+1

	while true
	for i in eachindex(shots)
	shots[i] = rand(rng) .> p
	end
	# Need to make at least k-1 shots in the first n-1 shots
	check_shots(shots, k) && break
	end
	end

	# %%
	"""
	Calculate the shooting efficiency based on a series of shots.

	Parameters:
	- shots: Array{Bool}: Array containing the results of the shots (true for made shot, false for missed shot).
	- k: Int64: Positive integer representing the length of the hot streak (after k successes, check whether or not the next shot goes in).

	Returns:
	- Float64: Shooting efficiency calculated as the proportion of hot hand shots that are made succesfully.

	Remarks:
	- The function iterates through the shots array and calculates the shooting efficiency for hot hands shots, where a hot hands shot is a shot that occurs after k successful shots.

	Example:
	```julia
	shots = [true, true, false, true, true, true, false, true, true]
	score_shots(shots, 2)
	```
	"""
	function score_shots(shots, k)
	n_hh_shots_made = 0
	n_hh_shots_taken = 0
	streak = 0
	for shot in shots
	# if made the last k shots, mark this shot
	if streak >= k
	n_hh_shots_taken += 1
	n_hh_shots_made += shot
	end

	# update streak count (avoiding branching)
	streak = shot ? (streak + 1) : 0
	end

	n_hh_shots_made / n_hh_shots_taken
	end

	# %% Monte Carlo Simulation
	# Run 1 simulation, counting the number of hot shot streaks made
	function simulate(shots, p, k, rng)
	shoot!(shots, p, k, rng)
	score_shots(shots, k)
	end
	function monte_carlo(n, p, k, B, rng)
	shots = zeros(Bool, n)
	pct_hot_hand_shots_made = [simulate(shots, p, k, rng) for _ in 1:B]
	mean(pct_hot_hand_shots_made)
	end

	#' # Simuations
	#' Run a set of simulations for different values of $(n, p, k)$. This replicates figure 1 of Miller and Sanjurjo (2018, ECTA)
	# %%
	using DataFrames, DataFramesMeta, Chain
	rng = Xoshiro(1234)
	B = 100_000

	simulations = allcombinations(
	DataFrame, n=1:100, p=[0.25, 0.5, 0.75], k=1:4
	)
	simulations = @chain simulations begin
	@subset :n .> :k
	@groupby [:p, :k]
	transform(groupindices => :simulation_id)
	end

	# Run simulation for each (n, p, k)
	@time @rtransform!(
	simulations,
	:pct_hot_hand_shots_made = monte_carlo(:n, :p, :k, B, rng)
	)

	#' Plotting the expected percent of successes following $k$ successful shots:
	# %%
	using Plots
	p = plot(
	xlims = (0, 100), ylims = (0, 0.8),
	ylabel = "Average % of hot hand shots made",
	xlabel = "Number of shots recorded per trial"
	)
	hline!([0.25, 0.5, 0.75], color = "black", linestyle = :dash, linewidth = 1, legend = false)
	plot!(
	simulations.n, simulations.pct_hot_hand_shots_made,
	group = simulations.simulation_id, color = simulations.k,
	palette = [:grey70, :grey50, :grey30, :grey10],
	line = :solid, linewidth = 2,
	)



	# %% Testing
	#\| include: false
	#\| eval: false
	using Test
	@testset "check shots" begin
	@test check_shots(Bool[1, 1, 1, 0, 0], 2) == true
	@test check_shots(Bool[0, 0, 1, 1, 1], 2) == true
	@test check_shots(Bool[0, 0, 0, 1, 1], 2) == false # no shot after k makes
	@test check_shots(Bool[1, 0, 1, 1, 0], 1) == true
	@test check_shots(Bool[0, 0, 0, 0, 0], 1) == false
	end

	shots = zeros(Bool, 5)
	function check_shots()
	shoot!(shots, 0.5, 3)
	check_shots(shots, 3)
	end
	@testset "shoot! only returns valid sets of shots" begin
	@test all([check_shots() for _ in 1:100])
	end
	@test
	@testset "score_shots returns the correct pct of hot hand shots made" begin
	@test score_shots(Bool[1,1,1,0,0], 1) ≈ 2/3
	@test score_shots(Bool[1,1,1,1,0], 1) ≈ 3/4
	@test score_shots(Bool[1,1,1,1,1], 1) ≈ 1
	@test score_shots(Bool[1,0,1,0,0], 1) ≈ 0
	@test score_shots(Bool[1,0,1,1,0], 1) ≈ 1/3
	end