dantonnoriega/deep_loops_in_stan.R

## deep_loops_in_stan.R
library(tidyverse)
library(rstan)

## HOW THE SIMULATION LOOP BELOW WORKS -- ignore the shocks for now --------------
# quick example
I = 3 # individuals
M = 5 # "months"
S = 7 # sims
mat <- matrix(NA, I*M, S) # M rows (months) will be filled in at a time for each individual i across all S columns (simulations); records for individual i will populate rows ((i - 1)*M + 1):(i*M)

# store starting values
initial_values <- data.frame(customer_id = 1:I,
                             initial_value = (1:I) / 10)

row = 0 # row counter required to properly fill in matrix
for(i in 1:I) {
  for(m in 1:M) {
    row = row + 1 # advance a row each time we advance a month or individual
    for(s in 1:S) {
      if(m == 1) {
        # values stored produce values: [m].[i][s]
        # that is, ones = month, tenths = individual, hundreths = simulation
        mat[row, s] <- m + initial_values$initial_value[i] + s/100 # fill in with month value
      } else mat[row, s] <- mat[row - 1, s] + 1
    }
  }
}
# > mat
# [,1] [,2] [,3] [,4] [,5] [,6] [,7]
# [1,] 1.11 1.12 1.13 1.14 1.15 1.16 1.17
# [2,] 2.11 2.12 2.13 2.14 2.15 2.16 2.17
# [3,] 3.11 3.12 3.13 3.14 3.15 3.16 3.17
# [4,] 4.11 4.12 4.13 4.14 4.15 4.16 4.17
# [5,] 5.11 5.12 5.13 5.14 5.15 5.16 5.17
# [6,] 1.21 1.22 1.23 1.24 1.25 1.26 1.27
# [7,] 2.21 2.22 2.23 2.24 2.25 2.26 2.27
# [8,] 3.21 3.22 3.23 3.24 3.25 3.26 3.27
# [9,] 4.21 4.22 4.23 4.24 4.25 4.26 4.27
# [10,] 5.21 5.22 5.23 5.24 5.25 5.26 5.27
# [11,] 1.31 1.32 1.33 1.34 1.35 1.36 1.37
# [12,] 2.31 2.32 2.33 2.34 2.35 2.36 2.37
# [13,] 3.31 3.32 3.33 3.34 3.35 3.36 3.37
# [14,] 4.31 4.32 4.33 4.34 4.35 4.36 4.37
# [15,] 5.31 5.32 5.33 5.34 5.35 5.36 5.37

# we can collect each simulation as a matrix Y where each elements Y(i,j) = Y(i,m)
lapply(1:S, function(s) matrix(mat[, s], nrow = I, byrow = T)) # by row

# or, we can create a dataframe of panel data
indx <- c(sapply(1:I, function(i) rep(i, M)))
mons <- rep(1:M, I)
data.frame(customer_id = indx, month = mons, sims = mat)

#>    customer_id month sims.1 sims.2 sims.3 sims.4 sims.5 sims.6 sims.7
#> 1            1     1   1.11   1.12   1.13   1.14   1.15   1.16   1.17
#> 2            1     2   2.11   2.12   2.13   2.14   2.15   2.16   2.17
#> 3            1     3   3.11   3.12   3.13   3.14   3.15   3.16   3.17
#> 4            1     4   4.11   4.12   4.13   4.14   4.15   4.16   4.17
#> 5            1     5   5.11   5.12   5.13   5.14   5.15   5.16   5.17
#> 6            2     1   1.21   1.22   1.23   1.24   1.25   1.26   1.27
#> 7            2     2   2.21   2.22   2.23   2.24   2.25   2.26   2.27
#> 8            2     3   3.21   3.22   3.23   3.24   3.25   3.26   3.27
#> 9            2     4   4.21   4.22   4.23   4.24   4.25   4.26   4.27
#> 10           2     5   5.21   5.22   5.23   5.24   5.25   5.26   5.27
#> 11           3     1   1.31   1.32   1.33   1.34   1.35   1.36   1.37
#> 12           3     2   2.31   2.32   2.33   2.34   2.35   2.36   2.37
#> 13           3     3   3.31   3.32   3.33   3.34   3.35   3.36   3.37
#> 14           3     4   4.31   4.32   4.33   4.34   4.35   4.36   4.37
#> 15           3     5   5.31   5.32   5.33   5.34   5.35   5.36   5.37

## END -------------

# Some fake data

Individuals <- 1000
Sims <- 1000
Months <- 12

initial_values <- data.frame(customer_id = 1:Individuals,
                             initial_value = rnorm(Individuals))
parameter_draws <- matrix(c(rnorm(Sims, 0.5, 0.1),
                            rnorm(Sims, 0.3, .1)), 2, Sims, byrow = T)
common_shocks <- matrix(rnorm(Months*Sims, 0, .1), Months, Sims)


# OK -- so the model is y_{it} = alpha[t] + beta * y_{it-1} + epsilon_{it}
# where a[t] ~ normal(a, 0.1) is a common shock to all series.
# and epsilon is n(0,1)

# A function to produce sims in R

sims_in_r <-
  function (
    Individuals,
    Sims,
    Months,
    initial_values,
    parameter_draws,
    common_shocks
  ) {
    out <- matrix(NA, Individuals * Months, Sims)
    count <- 0
    for(i in 1:Individuals) {
      for(m in 1:Months) {
        count <- count + 1
        for(s in 1:Sims) {
          if(m==1) {
            out[count,s] <- rnorm(1, parameter_draws[1,s] +
                                    common_shocks[m,s] +
                                    parameter_draws[2,s] * initial_values[i,2],
                                  1)
          } else {
            out[count,s] <- rnorm(1, parameter_draws[1,s] +
                                    common_shocks[m,s] +
                                    parameter_draws[2,s] * out[count -1,s],
                                  1)
          }
        }
      }
    }
    return(out)
  }

# The same function in Stan -- note that it's almost the same
# difference is that stan function returns a matrix
stan_code <- "
functions {
  matrix sims_in_stan_rng(
    int Individuals,
    int Sims,
    int Months,
    matrix initial_values,
    matrix parameter_draws,
    matrix common_shocks
  ) {
  matrix[Individuals*Months, Sims] out;
    int count = 0;
    for(i in 1:Individuals) {
      for(m in 1:Months) {
        count = count + 1;
        for(s in 1:Sims) {
          if(m==1) {
            out[count,s] = normal_rng(parameter_draws[1,s] +
                                      common_shocks[m,s] +
                                      parameter_draws[2,s] * initial_values[i,2],
                                      1);
          } else {
            out[count,s] = normal_rng(parameter_draws[1,s] +
                                     common_shocks[m,s] +
                                     parameter_draws[2,s] * out[count -1,s],
                                     1);
          }
        }
      }
    }
    return(out);
  }
}
data{}
model{}
"

# write up the stan code
PATH <- "~/Desktop/demo.stan"
fileConn <- file(PATH)
writeLines(stan_code, fileConn)
close(fileConn)

# Let's
expose_stan_functions(PATH)

time_r <- system.time({
  test_r <- sims_in_r(
    Individuals,
    Sims,
    Months,
    initial_values,
    parameter_draws,
    common_shocks
  )
})

time_stan <- system.time({
  test_stan <- sims_in_stan_rng(
    Individuals,
    Sims,
    Months,
    as.matrix(initial_values),
    parameter_draws,
    common_shocks
  )
})

time_r/time_stan
	library(tidyverse)
	library(rstan)

	## HOW THE SIMULATION LOOP BELOW WORKS -- ignore the shocks for now --------------
	# quick example
	I = 3 # individuals
	M = 5 # "months"
	S = 7 # sims
	mat <- matrix(NA, IM, S) # M rows (months) will be filled in at a time for each individual i across all S columns (simulations); records for individual i will populate rows ((i - 1)M + 1):(i*M)

	# store starting values
	initial_values <- data.frame(customer_id = 1:I,
	initial_value = (1:I) / 10)

	row = 0 # row counter required to properly fill in matrix
	for(i in 1:I) {
	for(m in 1:M) {
	row = row + 1 # advance a row each time we advance a month or individual
	for(s in 1:S) {
	if(m == 1) {
	# values stored produce values: [m].[i][s]
	# that is, ones = month, tenths = individual, hundreths = simulation
	mat[row, s] <- m + initial_values$initial_value[i] + s/100 # fill in with month value
	} else mat[row, s] <- mat[row - 1, s] + 1
	}
	}
	}
	# > mat
	# [,1] [,2] [,3] [,4] [,5] [,6] [,7]
	# [1,] 1.11 1.12 1.13 1.14 1.15 1.16 1.17
	# [2,] 2.11 2.12 2.13 2.14 2.15 2.16 2.17
	# [3,] 3.11 3.12 3.13 3.14 3.15 3.16 3.17
	# [4,] 4.11 4.12 4.13 4.14 4.15 4.16 4.17
	# [5,] 5.11 5.12 5.13 5.14 5.15 5.16 5.17
	# [6,] 1.21 1.22 1.23 1.24 1.25 1.26 1.27
	# [7,] 2.21 2.22 2.23 2.24 2.25 2.26 2.27
	# [8,] 3.21 3.22 3.23 3.24 3.25 3.26 3.27
	# [9,] 4.21 4.22 4.23 4.24 4.25 4.26 4.27
	# [10,] 5.21 5.22 5.23 5.24 5.25 5.26 5.27
	# [11,] 1.31 1.32 1.33 1.34 1.35 1.36 1.37
	# [12,] 2.31 2.32 2.33 2.34 2.35 2.36 2.37
	# [13,] 3.31 3.32 3.33 3.34 3.35 3.36 3.37
	# [14,] 4.31 4.32 4.33 4.34 4.35 4.36 4.37
	# [15,] 5.31 5.32 5.33 5.34 5.35 5.36 5.37

	# we can collect each simulation as a matrix Y where each elements Y(i,j) = Y(i,m)
	lapply(1:S, function(s) matrix(mat[, s], nrow = I, byrow = T)) # by row

	# or, we can create a dataframe of panel data
	indx <- c(sapply(1:I, function(i) rep(i, M)))
	mons <- rep(1:M, I)
	data.frame(customer_id = indx, month = mons, sims = mat)

	#> customer_id month sims.1 sims.2 sims.3 sims.4 sims.5 sims.6 sims.7
	#> 1 1 1 1.11 1.12 1.13 1.14 1.15 1.16 1.17
	#> 2 1 2 2.11 2.12 2.13 2.14 2.15 2.16 2.17
	#> 3 1 3 3.11 3.12 3.13 3.14 3.15 3.16 3.17
	#> 4 1 4 4.11 4.12 4.13 4.14 4.15 4.16 4.17
	#> 5 1 5 5.11 5.12 5.13 5.14 5.15 5.16 5.17
	#> 6 2 1 1.21 1.22 1.23 1.24 1.25 1.26 1.27
	#> 7 2 2 2.21 2.22 2.23 2.24 2.25 2.26 2.27
	#> 8 2 3 3.21 3.22 3.23 3.24 3.25 3.26 3.27
	#> 9 2 4 4.21 4.22 4.23 4.24 4.25 4.26 4.27
	#> 10 2 5 5.21 5.22 5.23 5.24 5.25 5.26 5.27
	#> 11 3 1 1.31 1.32 1.33 1.34 1.35 1.36 1.37
	#> 12 3 2 2.31 2.32 2.33 2.34 2.35 2.36 2.37
	#> 13 3 3 3.31 3.32 3.33 3.34 3.35 3.36 3.37
	#> 14 3 4 4.31 4.32 4.33 4.34 4.35 4.36 4.37
	#> 15 3 5 5.31 5.32 5.33 5.34 5.35 5.36 5.37

	## END -------------

	# Some fake data

	Individuals <- 1000
	Sims <- 1000
	Months <- 12

	initial_values <- data.frame(customer_id = 1:Individuals,
	initial_value = rnorm(Individuals))
	parameter_draws <- matrix(c(rnorm(Sims, 0.5, 0.1),
	rnorm(Sims, 0.3, .1)), 2, Sims, byrow = T)
	common_shocks <- matrix(rnorm(Months*Sims, 0, .1), Months, Sims)


	# OK -- so the model is y_{it} = alpha[t] + beta * y_{it-1} + epsilon_{it}
	# where a[t] ~ normal(a, 0.1) is a common shock to all series.
	# and epsilon is n(0,1)

	# A function to produce sims in R

	sims_in_r <-
	function (
	Individuals,
	Sims,
	Months,
	initial_values,
	parameter_draws,
	common_shocks
	) {
	out <- matrix(NA, Individuals * Months, Sims)
	count <- 0
	for(i in 1:Individuals) {
	for(m in 1:Months) {
	count <- count + 1
	for(s in 1:Sims) {
	if(m==1) {
	out[count,s] <- rnorm(1, parameter_draws[1,s] +
	common_shocks[m,s] +
	parameter_draws[2,s] * initial_values[i,2],
	1)
	} else {
	out[count,s] <- rnorm(1, parameter_draws[1,s] +
	common_shocks[m,s] +
	parameter_draws[2,s] * out[count -1,s],
	1)
	}
	}
	}
	}
	return(out)
	}

	# The same function in Stan -- note that it's almost the same
	# difference is that stan function returns a matrix
	stan_code <- "
	functions {
	matrix sims_in_stan_rng(
	int Individuals,
	int Sims,
	int Months,
	matrix initial_values,
	matrix parameter_draws,
	matrix common_shocks
	) {
	matrix[Individuals*Months, Sims] out;
	int count = 0;
	for(i in 1:Individuals) {
	for(m in 1:Months) {
	count = count + 1;
	for(s in 1:Sims) {
	if(m==1) {
	out[count,s] = normal_rng(parameter_draws[1,s] +
	common_shocks[m,s] +
	parameter_draws[2,s] * initial_values[i,2],
	1);
	} else {
	out[count,s] = normal_rng(parameter_draws[1,s] +
	common_shocks[m,s] +
	parameter_draws[2,s] * out[count -1,s],
	1);
	}
	}
	}
	}
	return(out);
	}
	}
	data{}
	model{}
	"

	# write up the stan code
	PATH <- "~/Desktop/demo.stan"
	fileConn <- file(PATH)
	writeLines(stan_code, fileConn)
	close(fileConn)

	# Let's
	expose_stan_functions(PATH)

	time_r <- system.time({
	test_r <- sims_in_r(
	Individuals,
	Sims,
	Months,
	initial_values,
	parameter_draws,
	common_shocks
	)
	})

	time_stan <- system.time({
	test_stan <- sims_in_stan_rng(
	Individuals,
	Sims,
	Months,
	as.matrix(initial_values),
	parameter_draws,
	common_shocks
	)
	})

	time_r/time_stan