Skip to content

Instantly share code, notes, and snippets.

@kylebutts
Created March 19, 2024 18:01
Show Gist options
  • Save kylebutts/803bbdeebee702646949fea2d5fa7c8d to your computer and use it in GitHub Desktop.
Save kylebutts/803bbdeebee702646949fea2d5fa7c8d to your computer and use it in GitHub Desktop.
Simulation of "Surpised by the Hot Hand Fallacy" Econometrica
# %% [markdown]
# ---
# format: gfm
# ---
# %%
using StatsBase, Statistics
using Random
# %%
"""
Check if a consecutive sequence of True values of length k exists in the input list of Boolean values.
# Arguments
- shots: Vector{Bool} - The list of Boolean values to check.
- k: Int - The length of the consecutive True sequence to look for.
# Returns
- Bool: Returns true if a sequence of length k with all True values exists in the input list, false otherwise.
"""
function check_shots(shots::Vector{Bool}, k::Int)
for i in 1:length(shots)-k
if all(view(shots, i:i+k-1))
return true
end
end
return false
end
# %%
"""
Take shots until at least one hot streak (of length k) is observed.
Parameters:
- shots: Array{Bool}: The array to store the results of the shots. Modifies in place.
- p: Float64: Probability of making a shot successfully (0 < p < 1).
- k: Int64: Positive integer representing the length of the hot streak (after k successes, check whether or not the next shot goes in).
- rng: Random number generator, default is Random.default_rng().
Returns:
- Void
Remarks:
- The function continues shooting until at least one hot streak of length k is observed, where each shot is determined based on the probability p.
- The check_shots function is used internally to verify if the hot streak condition is met.
Example:
```julia
shots = falses(10)
shoot!(shots, 0.7, 2)
```
"""
function shoot!(shots, p, k, rng = Random.default_rng())
@assert length(shots) >= k+1
while true
for i in eachindex(shots)
shots[i] = rand(rng) .> p
end
# Need to make at least k-1 shots in the first n-1 shots
check_shots(shots, k) && break
end
end
# %%
"""
Calculate the shooting efficiency based on a series of shots.
Parameters:
- shots: Array{Bool}: Array containing the results of the shots (true for made shot, false for missed shot).
- k: Int64: Positive integer representing the length of the hot streak (after k successes, check whether or not the next shot goes in).
Returns:
- Float64: Shooting efficiency calculated as the proportion of hot hand shots that are made succesfully.
Remarks:
- The function iterates through the shots array and calculates the shooting efficiency for hot hands shots, where a hot hands shot is a shot that occurs after k successful shots.
Example:
```julia
shots = [true, true, false, true, true, true, false, true, true]
score_shots(shots, 2)
```
"""
function score_shots(shots, k)
n_hh_shots_made = 0
n_hh_shots_taken = 0
streak = 0
for shot in shots
# if made the last k shots, mark this shot
if streak >= k
n_hh_shots_taken += 1
n_hh_shots_made += shot
end
# update streak count (avoiding branching)
streak = shot ? (streak + 1) : 0
end
n_hh_shots_made / n_hh_shots_taken
end
# %% Monte Carlo Simulation
# Run 1 simulation, counting the number of hot shot streaks made
function simulate(shots, p, k, rng)
shoot!(shots, p, k, rng)
score_shots(shots, k)
end
function monte_carlo(n, p, k, B, rng)
shots = zeros(Bool, n)
pct_hot_hand_shots_made = [simulate(shots, p, k, rng) for _ in 1:B]
mean(pct_hot_hand_shots_made)
end
#' # Simuations
#' Run a set of simulations for different values of $(n, p, k)$. This replicates figure 1 of Miller and Sanjurjo (2018, ECTA)
# %%
using DataFrames, DataFramesMeta, Chain
rng = Xoshiro(1234)
B = 100_000
simulations = allcombinations(
DataFrame, n=1:100, p=[0.25, 0.5, 0.75], k=1:4
)
simulations = @chain simulations begin
@subset :n .> :k
@groupby [:p, :k]
transform(groupindices => :simulation_id)
end
# Run simulation for each (n, p, k)
@time @rtransform!(
simulations,
:pct_hot_hand_shots_made = monte_carlo(:n, :p, :k, B, rng)
)
#' Plotting the expected percent of successes following $k$ successful shots:
# %%
using Plots
p = plot(
xlims = (0, 100), ylims = (0, 0.8),
ylabel = "Average % of hot hand shots made",
xlabel = "Number of shots recorded per trial"
)
hline!([0.25, 0.5, 0.75], color = "black", linestyle = :dash, linewidth = 1, legend = false)
plot!(
simulations.n, simulations.pct_hot_hand_shots_made,
group = simulations.simulation_id, color = simulations.k,
palette = [:grey70, :grey50, :grey30, :grey10],
line = :solid, linewidth = 2,
)
# %% Testing
#| include: false
#| eval: false
using Test
@testset "check shots" begin
@test check_shots(Bool[1, 1, 1, 0, 0], 2) == true
@test check_shots(Bool[0, 0, 1, 1, 1], 2) == true
@test check_shots(Bool[0, 0, 0, 1, 1], 2) == false # no shot after k makes
@test check_shots(Bool[1, 0, 1, 1, 0], 1) == true
@test check_shots(Bool[0, 0, 0, 0, 0], 1) == false
end
shots = zeros(Bool, 5)
function check_shots()
shoot!(shots, 0.5, 3)
check_shots(shots, 3)
end
@testset "shoot! only returns valid sets of shots" begin
@test all([check_shots() for _ in 1:100])
end
@test
@testset "score_shots returns the correct pct of hot hand shots made" begin
@test score_shots(Bool[1,1,1,0,0], 1) ≈ 2/3
@test score_shots(Bool[1,1,1,1,0], 1) ≈ 3/4
@test score_shots(Bool[1,1,1,1,1], 1) ≈ 1
@test score_shots(Bool[1,0,1,0,0], 1) ≈ 0
@test score_shots(Bool[1,0,1,1,0], 1) ≈ 1/3
end

Simuations

using StatsBase, Statistics
using Random
"""
Check if a consecutive sequence of True values of length k exists in the input list of Boolean values.

# Arguments
-  shots: Vector{Bool} - The list of Boolean values to check.
-  k: Int - The length of the consecutive True sequence to look for.

# Returns
-  Bool: Returns true if a sequence of length k with all True values exists in the input list, false otherwise.
"""
function check_shots(shots::Vector{Bool}, k::Int)
  for i in 1:length(shots)-k
    if all(view(shots, i:i+k-1))
      return true
    end
  end
  return false
end
check_shots
"""
Take shots until at least one hot streak (of length k) is observed. 

Parameters:
-   shots: Array{Bool}: The array to store the results of the shots. Modifies in place.
-   p: Float64: Probability of making a shot successfully (0 < p < 1).
-   k: Int64: Positive integer representing the length of the hot streak (after k successes, check whether or not the next shot goes in).
-   rng: Random number generator, default is Random.default_rng().

Returns:
-   Void

Remarks:
-   The function continues shooting until at least one hot streak of length k is observed, where each shot is determined based on the probability p.
-   The check_shots function is used internally to verify if the hot streak condition is met.
"""
function shoot!(shots, p, k, rng = Random.default_rng())  
  @assert length(shots) >= k+1

  while true
    for i in eachindex(shots)
      shots[i] = rand(rng) .> p
    end
    # Need to make at least k-1 shots in the first n-1 shots
    check_shots(shots, k) && break
  end
end
shoot!
"""
Calculate the shooting efficiency based on a series of shots.

Parameters:
-    shots: Array{Bool}: Array containing the results of the shots (true for made shot, false for missed shot).
-    k: Int64: Positive integer representing the length of the hot streak (after k successes, check whether or not the next shot goes in).

Returns:
-    Float64: Shooting efficiency calculated as the proportion of hot hand shots that are made succesfully.

Remarks:
-    The function iterates through the shots array and calculates the shooting efficiency for hot hands shots, where a hot hands shot is a shot that occurs after k successful shots.
"""
function score_shots(shots, k)
  n_hh_shots_made = 0
  n_hh_shots_taken = 0
  streak = 0
  for shot in shots
    # if made the last k shots, mark this shot
    if streak >= k
      n_hh_shots_taken += 1
      n_hh_shots_made += shot
    end 

    # update streak count (avoiding branching)
    streak = shot ? (streak + 1) : 0
  end

  n_hh_shots_made / n_hh_shots_taken
end
score_shots
# Run 1 simulation, counting the number of hot shot streaks made
function simulate(shots, p, k, rng)
  shoot!(shots, p, k, rng)
  score_shots(shots, k)
end
function monte_carlo(n, p, k, B, rng)
  shots = zeros(Bool, n)
  pct_hot_hand_shots_made = [simulate(shots, p, k, rng) for _ in 1:B]
  mean(pct_hot_hand_shots_made)
end
monte_carlo (generic function with 1 method)

Run a set of simulations for different values of $(n, p, k)$. This replicates figure 1 of Miller and Sanjurjo (2018, ECTA)

using DataFrames, DataFramesMeta, Chain
rng = Xoshiro(1234)
B = 100_000

simulations = allcombinations(
  DataFrame, n=1:100, p=[0.25, 0.5, 0.75], k=1:4
)
simulations = @chain simulations begin
  @subset :n .> :k
  @groupby [:p, :k] 
  transform(groupindices => :simulation_id)
end

# Run simulation for each (n, p, k)
@time @rtransform!(
  simulations, 
  :pct_hot_hand_shots_made = monte_carlo(:n, :p, :k, B, rng)
)
 33.226253 seconds (141.51 k allocations: 902.041 MiB, 0.27% gc time, 0.27% compilation time)

Plotting the expected percent of successes following $k$ successful shots:

using Plots

plt = plot(
  xlims = (0, 100), ylims = (0, 0.8),
  ylabel = "Average % of hot hand shots made",
  xlabel = "Number of shots recorded per trial"
)
hline!([0.25, 0.5, 0.75], color = "black", linestyle = :dash, linewidth = 1, legend = false)
plot!(
  simulations.n, simulations.pct_hot_hand_shots_made,
  group = simulations.simulation_id, color = simulations.k,
  palette = [:grey70, :grey50, :grey30, :grey10],
  line = :solid, linewidth = 2,
)

display("image/png", plt)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment