diamonaj/Quiz_3_answers.R

## Quiz_3_answers.R
# Quiz 3 ANSWERS
#######

# Read the article:
#  https://www.menshealth.com/trending-news/a30894231/amazon-interview-sock-puzzle/


# 1. Write a function that will simulate the act of pulling 2 socks out of the drawer
#    exactly as described in the Men's Health article. (i.e., selection without replacement)

#    I would like the output of your function to be two numbers indicating the colors of the two socks:
#    a 1 if the sock is black, and a 0 if the sock is white.

#.   For example, if a person were to execute your function and it were to randomly select
#.   two black socks, then your function should output: 1 1

# There are many different ways to write this function correctly.
# One such way appears below:

pulling_sock_function <- function()
{
  # the black socks
  black_socks <- rep(1, 12) # e.g., 1,1,1,...1

  # the white socks
  white_socks <- rep(0, 12) # e.g., 0,0,0,...0

  # pick 2 randomly, without replacement
  pick_two <- sample(x = c(white_socks, black_socks),
                           size = 2,
                           replace = FALSE)

  # return what was picked (e.g., 1,0 or 0,1 or 1,1 or 0,0)
  return(pick_two)
}


# 2. Run your function 4 times, showing the results for each run.

pulling_sock_function()
#[1] 0 1

pulling_sock_function()
#[1] 1 1

pulling_sock_function()
#[1] 0 0

pulling_sock_function()
#[1] 1 0


# 3. Create a 'for loop' that runs your function 100 times, producing output each time.
#    Can you figure out how to store the results each time? You could fill up 2 vectors, or a data frame.
#    If you can do that, then after your 100 loops, you can check to see how often you got 0 0 or 1 1,
#    and you could see how closely your simulated results approximated the exact probability
#    given in the answer in the article.

# create an empty storage matrix
storage_matrix <- matrix(data = NA, nrow = 100, ncol = 2)

for(i in 1:100)
{
  selected_socks <- pulling_sock_function()

  storage_matrix[i,1] <- selected_socks[1]
  storage_matrix[i,2] <- selected_socks[2]
}

## How many times did we get matching socks (out of 100 trials)?
## Well, it turns out that there is a built-in function that sums the rows of data frames and matrices.
## This function is called rowSums(). (If it wasn't built-in, you could create it yourself.)
## Let's identify instances when the 2 elements in each row summed to 0 or 2
## Because these are the instances of matching socks. Mismatched socks sum to 1 (0 + 1 or 1 + 0).

rowSums(storage_matrix) == 0 | rowSums(storage_matrix) == 2
#[1] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE
#[19] FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE
#[37] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE
#[55] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE
#[73]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
#[91] FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE


## In how many of the 100 trials was this summing condition TRUE?
sum(rowSums(storage_matrix) == 0 | rowSums(storage_matrix) == 2)
39 # (your answer will probably be different due to random chance variation)
   # therefore, the simulated probability in my case was 39% for a matching pair of socks.
   # this is a poor approximation to the true probabiity because we only attempted 100 trials.
   # for better accuracy, we should run more trials (like 10,000, as I do in the example below).


# 4. Make your function more sophisticated by allowing the user to specify two numbers:
#    The number of black socks, and the number of white socks.

# we'll set the numbers of each color sock to 12 by default...
pulling_sock_function2 <- function(number_of_black_socks = 12,
                                   number_of_white_socks = 12)
{
  # the black socks
  black_socks <- rep(1, number_of_black_socks) # e.g., 1,1,1,...1

  # the white socks
  white_socks <- rep(0, number_of_white_socks) # e.g., 0,0,0,...0

  # pick 2 randomly, without replacement
  pick_two <- sample(x = c(white_socks, black_socks),
                     size = 2,
                     replace = FALSE)
}

# 5. Perform simulations with different numbers of socks (e.g., start with 1000 loops of 2 white and 2 black socks,
#    and then try 1000 loops of 3 white and 3 black socks, and then try 1000 loops of 4 white and 4 black, etc.),
#    to demonstrate what happens as the amounts of socks increases while holding the levels of white and black socks equal.
#    Create a scatterplot showing how the simulated probability changes as numbers of socks change.
#    Label the plot and the axes.

# in the example below, I use 10,000 trials for each set of socks, starting with 2 black and 2 white
storage_matrix_4_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
for(i in 1:10000)
{
  selected_socks <- pulling_sock_function2(number_of_black_socks = 2,
                                           number_of_white_socks = 2)

  storage_matrix_4_socks[i,1] <- selected_socks[1]
  storage_matrix_4_socks[i,2] <- selected_socks[2]
}

prob_4_socks <-
  sum(rowSums(storage_matrix_4_socks) == 0 | rowSums(storage_matrix_4_socks) == 2) / 10000

###

storage_matrix_6_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
for(i in 1:10000)
{
  selected_socks <- pulling_sock_function2(number_of_black_socks = 3,
                                           number_of_white_socks = 3)

  storage_matrix_6_socks[i,1] <- selected_socks[1]
  storage_matrix_6_socks[i,2] <- selected_socks[2]
}

prob_6_socks <-
  sum(rowSums(storage_matrix_6_socks) == 0 | rowSums(storage_matrix_6_socks) == 2) / 10000

##

storage_matrix_8_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
for(i in 1:10000)
{
  selected_socks <- pulling_sock_function2(number_of_black_socks = 4,
                                           number_of_white_socks = 4)

  storage_matrix_8_socks[i,1] <- selected_socks[1]
  storage_matrix_8_socks[i,2] <- selected_socks[2]
}

prob_8_socks <-
  sum(rowSums(storage_matrix_8_socks) == 0 | rowSums(storage_matrix_8_socks) == 2) / 10000

###

storage_matrix_10_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
for(i in 1:10000)
{
  selected_socks <- pulling_sock_function2(number_of_black_socks = 5,
                                           number_of_white_socks = 5)

  storage_matrix_10_socks[i,1] <- selected_socks[1]
  storage_matrix_10_socks[i,2] <- selected_socks[2]
}

prob_10_socks <-
  sum(rowSums(storage_matrix_10_socks) == 0 | rowSums(storage_matrix_10_socks) == 2) / 10000

##

storage_matrix_12_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
for(i in 1:10000)
{
  selected_socks <- pulling_sock_function2(number_of_black_socks = 6,
                                           number_of_white_socks = 6)

  storage_matrix_12_socks[i,1] <- selected_socks[1]
  storage_matrix_12_socks[i,2] <- selected_socks[2]
}

prob_12_socks <-
  sum(rowSums(storage_matrix_12_socks) == 0 | rowSums(storage_matrix_12_socks) == 2) / 10000

##

storage_matrix_14_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
for(i in 1:10000)
{
  selected_socks <- pulling_sock_function2(number_of_black_socks = 6,
                                           number_of_white_socks = 6)

  storage_matrix_14_socks[i,1] <- selected_socks[1]
  storage_matrix_14_socks[i,2] <- selected_socks[2]
}

prob_14_socks <-
  sum(rowSums(storage_matrix_14_socks) == 0 | rowSums(storage_matrix_14_socks) == 2) / 10000

######### Now that I've simulated drawing 2 socks for cases of 4 to 14 total socks,
#### let's make a fancy figure showing the relationship
#### between numbers of socks and probability of matching socks

numbers_of_socks <- c(4,6,8,10,12,14)
simulated_probabilities <- c(prob_4_socks,
                             prob_6_socks,
                             prob_8_socks,
                             prob_10_socks,
                             prob_12_socks,
                             prob_14_socks
                             )

plot(x = numbers_of_socks,
     y = simulated_probabilities,
     xlab = "Total Socks",
     ylab = "Simulated Probabilities That Socks Match",
     main = "Relationship Btw Total Number of Socks \nand Probability Of Randomly Picking a Matching Pair",
     col = "red", pch = 18, cex = 2)
	# Quiz 3 ANSWERS
	#######

	# Read the article:
	# https://www.menshealth.com/trending-news/a30894231/amazon-interview-sock-puzzle/


	# 1. Write a function that will simulate the act of pulling 2 socks out of the drawer
	# exactly as described in the Men's Health article. (i.e., selection without replacement)

	# I would like the output of your function to be two numbers indicating the colors of the two socks:
	# a 1 if the sock is black, and a 0 if the sock is white.

	#. For example, if a person were to execute your function and it were to randomly select
	#. two black socks, then your function should output: 1 1

	# There are many different ways to write this function correctly.
	# One such way appears below:

	pulling_sock_function <- function()
	{
	# the black socks
	black_socks <- rep(1, 12) # e.g., 1,1,1,...1

	# the white socks
	white_socks <- rep(0, 12) # e.g., 0,0,0,...0

	# pick 2 randomly, without replacement
	pick_two <- sample(x = c(white_socks, black_socks),
	size = 2,
	replace = FALSE)

	# return what was picked (e.g., 1,0 or 0,1 or 1,1 or 0,0)
	return(pick_two)
	}


	# 2. Run your function 4 times, showing the results for each run.

	pulling_sock_function()
	#[1] 0 1

	pulling_sock_function()
	#[1] 1 1

	pulling_sock_function()
	#[1] 0 0

	pulling_sock_function()
	#[1] 1 0


	# 3. Create a 'for loop' that runs your function 100 times, producing output each time.
	# Can you figure out how to store the results each time? You could fill up 2 vectors, or a data frame.
	# If you can do that, then after your 100 loops, you can check to see how often you got 0 0 or 1 1,
	# and you could see how closely your simulated results approximated the exact probability
	# given in the answer in the article.

	# create an empty storage matrix
	storage_matrix <- matrix(data = NA, nrow = 100, ncol = 2)

	for(i in 1:100)
	{
	selected_socks <- pulling_sock_function()

	storage_matrix[i,1] <- selected_socks[1]
	storage_matrix[i,2] <- selected_socks[2]
	}

	## How many times did we get matching socks (out of 100 trials)?
	## Well, it turns out that there is a built-in function that sums the rows of data frames and matrices.
	## This function is called rowSums(). (If it wasn't built-in, you could create it yourself.)
	## Let's identify instances when the 2 elements in each row summed to 0 or 2
	## Because these are the instances of matching socks. Mismatched socks sum to 1 (0 + 1 or 1 + 0).

	rowSums(storage_matrix) == 0 \| rowSums(storage_matrix) == 2
	#[1] FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE FALSE
	#[19] FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE
	#[37] FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE FALSE
	#[55] FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
	#[73] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
	#[91] FALSE TRUE TRUE FALSE TRUE FALSE FALSE FALSE FALSE TRUE


	## In how many of the 100 trials was this summing condition TRUE?
	sum(rowSums(storage_matrix) == 0 \| rowSums(storage_matrix) == 2)
	39 # (your answer will probably be different due to random chance variation)
	# therefore, the simulated probability in my case was 39% for a matching pair of socks.
	# this is a poor approximation to the true probabiity because we only attempted 100 trials.
	# for better accuracy, we should run more trials (like 10,000, as I do in the example below).



	# 4. Make your function more sophisticated by allowing the user to specify two numbers:
	# The number of black socks, and the number of white socks.

	# we'll set the numbers of each color sock to 12 by default...
	pulling_sock_function2 <- function(number_of_black_socks = 12,
	number_of_white_socks = 12)
	{
	# the black socks
	black_socks <- rep(1, number_of_black_socks) # e.g., 1,1,1,...1

	# the white socks
	white_socks <- rep(0, number_of_white_socks) # e.g., 0,0,0,...0

	# pick 2 randomly, without replacement
	pick_two <- sample(x = c(white_socks, black_socks),
	size = 2,
	replace = FALSE)
	}

	# 5. Perform simulations with different numbers of socks (e.g., start with 1000 loops of 2 white and 2 black socks,
	# and then try 1000 loops of 3 white and 3 black socks, and then try 1000 loops of 4 white and 4 black, etc.),
	# to demonstrate what happens as the amounts of socks increases while holding the levels of white and black socks equal.
	# Create a scatterplot showing how the simulated probability changes as numbers of socks change.
	# Label the plot and the axes.

	# in the example below, I use 10,000 trials for each set of socks, starting with 2 black and 2 white
	storage_matrix_4_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
	for(i in 1:10000)
	{
	selected_socks <- pulling_sock_function2(number_of_black_socks = 2,
	number_of_white_socks = 2)

	storage_matrix_4_socks[i,1] <- selected_socks[1]
	storage_matrix_4_socks[i,2] <- selected_socks[2]
	}

	prob_4_socks <-
	sum(rowSums(storage_matrix_4_socks) == 0 \| rowSums(storage_matrix_4_socks) == 2) / 10000

	###

	storage_matrix_6_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
	for(i in 1:10000)
	{
	selected_socks <- pulling_sock_function2(number_of_black_socks = 3,
	number_of_white_socks = 3)

	storage_matrix_6_socks[i,1] <- selected_socks[1]
	storage_matrix_6_socks[i,2] <- selected_socks[2]
	}

	prob_6_socks <-
	sum(rowSums(storage_matrix_6_socks) == 0 \| rowSums(storage_matrix_6_socks) == 2) / 10000

	##

	storage_matrix_8_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
	for(i in 1:10000)
	{
	selected_socks <- pulling_sock_function2(number_of_black_socks = 4,
	number_of_white_socks = 4)

	storage_matrix_8_socks[i,1] <- selected_socks[1]
	storage_matrix_8_socks[i,2] <- selected_socks[2]
	}

	prob_8_socks <-
	sum(rowSums(storage_matrix_8_socks) == 0 \| rowSums(storage_matrix_8_socks) == 2) / 10000

	###

	storage_matrix_10_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
	for(i in 1:10000)
	{
	selected_socks <- pulling_sock_function2(number_of_black_socks = 5,
	number_of_white_socks = 5)

	storage_matrix_10_socks[i,1] <- selected_socks[1]
	storage_matrix_10_socks[i,2] <- selected_socks[2]
	}

	prob_10_socks <-
	sum(rowSums(storage_matrix_10_socks) == 0 \| rowSums(storage_matrix_10_socks) == 2) / 10000

	##

	storage_matrix_12_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
	for(i in 1:10000)
	{
	selected_socks <- pulling_sock_function2(number_of_black_socks = 6,
	number_of_white_socks = 6)

	storage_matrix_12_socks[i,1] <- selected_socks[1]
	storage_matrix_12_socks[i,2] <- selected_socks[2]
	}

	prob_12_socks <-
	sum(rowSums(storage_matrix_12_socks) == 0 \| rowSums(storage_matrix_12_socks) == 2) / 10000

	##

	storage_matrix_14_socks <- matrix(data = NA, nrow = 10000, ncol = 2)
	for(i in 1:10000)
	{
	selected_socks <- pulling_sock_function2(number_of_black_socks = 6,
	number_of_white_socks = 6)

	storage_matrix_14_socks[i,1] <- selected_socks[1]
	storage_matrix_14_socks[i,2] <- selected_socks[2]
	}

	prob_14_socks <-
	sum(rowSums(storage_matrix_14_socks) == 0 \| rowSums(storage_matrix_14_socks) == 2) / 10000

	######### Now that I've simulated drawing 2 socks for cases of 4 to 14 total socks,
	#### let's make a fancy figure showing the relationship
	#### between numbers of socks and probability of matching socks

	numbers_of_socks <- c(4,6,8,10,12,14)
	simulated_probabilities <- c(prob_4_socks,
	prob_6_socks,
	prob_8_socks,
	prob_10_socks,
	prob_12_socks,
	prob_14_socks
	)

	plot(x = numbers_of_socks,
	y = simulated_probabilities,
	xlab = "Total Socks",
	ylab = "Simulated Probabilities That Socks Match",
	main = "Relationship Btw Total Number of Socks \nand Probability Of Randomly Picking a Matching Pair",
	col = "red", pch = 18, cex = 2)