FrankRuns/is-behavior-change-happening.Rmd

## is-behavior-change-happening.Rmd
---
title: "Is-Behavior-Change-Happening"
author: "frank-corrigan"
date: "3/9/2022"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

# libraries
library(tidyverse)
library(rethinking)
```

## Single Behavior - Super Optimistic Starting Point

As a leader, I want to be confident that certain behaviors are being demonstrated across my organizations. I can combine my starting intuition with observational data to understand the uncertainty around the proportion of time these certain behaviors are being practiced.

```{r context-slide-too-optimistic}

# create a grid of estimates for proportion of time behaviors are being used
p_grid <- seq(from=0, to=1, length.out = 1000)

# create a prior for each estimate in the grid above
# in this scenario, the leader is too optimistic saying it's happening
# at least 85% of the time.
prior <- c(rep(0, 850), rep(1, 100), rep(0, 50))

# create a likelihood for each estimate based on observed data
# in this case the observed data is that the leader saw the behavior demonstrated
# at 9/10 meetings.
likelihood <- dbinom(9, size=10, prob=p_grid)

# create posterior. this is a combination of your intuition and
# observed data points.
unstd.posterior <- likelihood * prior

# normalize the posterior so it's a probability
posterior <- unstd.posterior / sum(unstd.posterior)

# sample the posterior so we can WHAT????
samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

print(paste("The behavior is most likely occuring between,",
            round(HPDI(samples)[1],4)*100, "and",
            round(HPDI(samples)[2],4)*100, "percent of the time."))
print(paste("And our best guesss is,",
            round(median(samples),4)*100,
            "percent of the time."))

```

### Visualizing the Optimistic Possibilities

```{r context-slide-too-optimistic-plot}

the_dens <- density(samples)

the_data <- tibble(x = the_dens$x, y = the_dens$y) %>%
  mutate(variable = case_when(
    x <= 0.9 ~ "Lower",
    x >= 0.9 ~ "Higher",
    TRUE ~ NA_character_
  ))

ggplot(the_data, aes(x,y)) + geom_line() +
  geom_area(data = filter(the_data, variable == "Lower"), fill = "#d73027") +
  geom_area(data = filter(the_data, variable == "Higher"), fill = "#1a9850") +
  scale_x_continuous(limits = c(0.5,1), labels = scales::percent) +
  geom_vline(xintercept = 0.9) +
  theme_minimal() +
  theme(text = element_text(size = 20),
        plot.title = element_text(face = "bold"),
        plot.subtitle = element_text(size = 12),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) +
  labs(x="Proportion of Time Behaviors are Happening",
       y="Density of Possibilities",
       title="Super Optimistic Starting Assumption",
       subtitle="Using a super optimistic starting assumption, you can call it a victory...")

```
## Single Behavior - Cautious, Realistic Starting Point

```{r context-slide-cautious-realistic}

# create a grid of estimates for proportion of time behaviors are being used
p_grid <- seq(from=0, to=1, length.out = 1000)

# create a prior for each estimate in the grid above
# in this scenario, the leader is too optimistic saying it's happening
# at least 50% of the time.
prior <- c(rep(0, 500), rep(1, 450), rep(0, 50))
# prior <- c(rep(0, 675), rep(1, 275), rep(0, 50))

# create a likelihood for each estimate based on observed data
# in this case the observed data is that the leader saw the behavior demonstrated
# at 9/10 meetings.
likelihood <- dbinom(9, size=10, prob=p_grid)

# create posterior. this is a combination of your intuition and
# observed data points.
unstd.posterior <- likelihood * prior

# normalize the posterior so it's a probability
posterior <- unstd.posterior / sum(unstd.posterior)

# sample the posterior so we can WHAT????
samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

print(paste("The bahavior is most likely occuring between,",
            round(HPDI(samples)[1],4)*100, "and",
            round(HPDI(samples)[2],4)*100, "percent of the time."))
print(paste("And our best guesss is,",
            round(median(samples),4)*100,
            "percent of the time."))

```
### Visualizing the Cautious Possibilities

```{r context-slide-cautious-realistic-plot}

the_dens <- density(samples)

the_data <- tibble(x = the_dens$x, y = the_dens$y) %>%
  mutate(variable = case_when(
    x <= 0.9 ~ "Lower",
    x >= 0.9 ~ "Higher",
    TRUE ~ NA_character_
  ))

ggplot(the_data, aes(x,y)) + geom_line() +
  geom_area(data = filter(the_data, variable == "Lower"), fill = "#d73027") +
  geom_area(data = filter(the_data, variable == "Higher"), fill = "#1a9850") +
  scale_x_continuous(limits = c(0.5,1), labels = scales::percent) +
  geom_vline(xintercept = 0.9) +
  theme_minimal() +
  theme(text = element_text(size = 20),
        plot.title = element_text(face = "bold"),
        plot.subtitle = element_text(size = 12),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) +
  labs(x="Proportion of Time Behaviors are Happening",
       y="Density of Possibilities",
       title="Cautious & Realistic Starting Assumption",
       subtitle="Using a more cautious & skeptical starting assumption, it's time to listen to your team...")

```

### Variations

```{r variations}

updating <- function(prior, obs_positive, obs) {

  # inputs
  # prior = number between 0-950 representing how confident you
  # are the behaviors are happening at higher rates
  # obs_count = occurrences you observed behavior out of obs
  # obs = number of total observations

  p_grid <- seq(from=0, to=1, length.out = 1000)
  prior <- c(rep(0, prior), rep(1, 1000-prior-50), rep(0, 50))
  likelihood <- dbinom(obs_positive, size=obs, prob=p_grid)
  unstd.posterior <- likelihood * prior
  posterior <- unstd.posterior / sum(unstd.posterior)
  samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

  return(median(samples))

}

# vecs to play with parameters
priors_vec <- c(500, 675, 850)
obs_pos_vec <- c(8, 9, 10)
obs_vec <- c(10, 100, 1000)

# as your starting assumption is more confident,
# your updated confidence grows higher
for (prior in priors_vec) {
  print(updating(prior, 9, 10))
}

# as number of positive observations increases,
# the less uncertainty remains
for (obs_pos in obs_pos_vec) {
  print(updating(500, obs_pos, 10))
}

# as the total number of observations increases,
# and the rate of positives remains steady,
# the certainty converges toward the observed data
for (obs in obs_vec) {
  print(updating(500, obs*0.9, obs))
}

```
	---
	title: "Is-Behavior-Change-Happening"
	author: "frank-corrigan"
	date: "3/9/2022"
	output: html_document
	---

	```{r setup, include=FALSE}
	knitr::opts_chunk$set(echo = TRUE)

	# libraries
	library(tidyverse)
	library(rethinking)
	```

	## Single Behavior - Super Optimistic Starting Point

	As a leader, I want to be confident that certain behaviors are being demonstrated across my organizations. I can combine my starting intuition with observational data to understand the uncertainty around the proportion of time these certain behaviors are being practiced.

	```{r context-slide-too-optimistic}

	# create a grid of estimates for proportion of time behaviors are being used
	p_grid <- seq(from=0, to=1, length.out = 1000)

	# create a prior for each estimate in the grid above
	# in this scenario, the leader is too optimistic saying it's happening
	# at least 85% of the time.
	prior <- c(rep(0, 850), rep(1, 100), rep(0, 50))

	# create a likelihood for each estimate based on observed data
	# in this case the observed data is that the leader saw the behavior demonstrated
	# at 9/10 meetings.
	likelihood <- dbinom(9, size=10, prob=p_grid)

	# create posterior. this is a combination of your intuition and
	# observed data points.
	unstd.posterior <- likelihood * prior

	# normalize the posterior so it's a probability
	posterior <- unstd.posterior / sum(unstd.posterior)

	# sample the posterior so we can WHAT????
	samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

	print(paste("The behavior is most likely occuring between,",
	round(HPDI(samples)[1],4)*100, "and",
	round(HPDI(samples)[2],4)*100, "percent of the time."))
	print(paste("And our best guesss is,",
	round(median(samples),4)*100,
	"percent of the time."))

	```

	### Visualizing the Optimistic Possibilities

	```{r context-slide-too-optimistic-plot}

	the_dens <- density(samples)

	the_data <- tibble(x = the_dens$x, y = the_dens$y) %>%
	mutate(variable = case_when(
	x <= 0.9 ~ "Lower",
	x >= 0.9 ~ "Higher",
	TRUE ~ NA_character_
	))

	ggplot(the_data, aes(x,y)) + geom_line() +
	geom_area(data = filter(the_data, variable == "Lower"), fill = "#d73027") +
	geom_area(data = filter(the_data, variable == "Higher"), fill = "#1a9850") +
	scale_x_continuous(limits = c(0.5,1), labels = scales::percent) +
	geom_vline(xintercept = 0.9) +
	theme_minimal() +
	theme(text = element_text(size = 20),
	plot.title = element_text(face = "bold"),
	plot.subtitle = element_text(size = 12),
	axis.text.y = element_blank(),
	axis.ticks.y = element_blank()) +
	labs(x="Proportion of Time Behaviors are Happening",
	y="Density of Possibilities",
	title="Super Optimistic Starting Assumption",
	subtitle="Using a super optimistic starting assumption, you can call it a victory...")

	```
	## Single Behavior - Cautious, Realistic Starting Point

	```{r context-slide-cautious-realistic}

	# create a grid of estimates for proportion of time behaviors are being used
	p_grid <- seq(from=0, to=1, length.out = 1000)

	# create a prior for each estimate in the grid above
	# in this scenario, the leader is too optimistic saying it's happening
	# at least 50% of the time.
	prior <- c(rep(0, 500), rep(1, 450), rep(0, 50))
	# prior <- c(rep(0, 675), rep(1, 275), rep(0, 50))

	# create a likelihood for each estimate based on observed data
	# in this case the observed data is that the leader saw the behavior demonstrated
	# at 9/10 meetings.
	likelihood <- dbinom(9, size=10, prob=p_grid)

	# create posterior. this is a combination of your intuition and
	# observed data points.
	unstd.posterior <- likelihood * prior

	# normalize the posterior so it's a probability
	posterior <- unstd.posterior / sum(unstd.posterior)

	# sample the posterior so we can WHAT????
	samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

	print(paste("The bahavior is most likely occuring between,",
	round(HPDI(samples)[1],4)*100, "and",
	round(HPDI(samples)[2],4)*100, "percent of the time."))
	print(paste("And our best guesss is,",
	round(median(samples),4)*100,
	"percent of the time."))

	```
	### Visualizing the Cautious Possibilities

	```{r context-slide-cautious-realistic-plot}

	the_dens <- density(samples)

	the_data <- tibble(x = the_dens$x, y = the_dens$y) %>%
	mutate(variable = case_when(
	x <= 0.9 ~ "Lower",
	x >= 0.9 ~ "Higher",
	TRUE ~ NA_character_
	))

	ggplot(the_data, aes(x,y)) + geom_line() +
	geom_area(data = filter(the_data, variable == "Lower"), fill = "#d73027") +
	geom_area(data = filter(the_data, variable == "Higher"), fill = "#1a9850") +
	scale_x_continuous(limits = c(0.5,1), labels = scales::percent) +
	geom_vline(xintercept = 0.9) +
	theme_minimal() +
	theme(text = element_text(size = 20),
	plot.title = element_text(face = "bold"),
	plot.subtitle = element_text(size = 12),
	axis.text.y = element_blank(),
	axis.ticks.y = element_blank()) +
	labs(x="Proportion of Time Behaviors are Happening",
	y="Density of Possibilities",
	title="Cautious & Realistic Starting Assumption",
	subtitle="Using a more cautious & skeptical starting assumption, it's time to listen to your team...")

	```

	### Variations

	```{r variations}

	updating <- function(prior, obs_positive, obs) {

	# inputs
	# prior = number between 0-950 representing how confident you
	# are the behaviors are happening at higher rates
	# obs_count = occurrences you observed behavior out of obs
	# obs = number of total observations

	p_grid <- seq(from=0, to=1, length.out = 1000)
	prior <- c(rep(0, prior), rep(1, 1000-prior-50), rep(0, 50))
	likelihood <- dbinom(obs_positive, size=obs, prob=p_grid)
	unstd.posterior <- likelihood * prior
	posterior <- unstd.posterior / sum(unstd.posterior)
	samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

	return(median(samples))

	}

	# vecs to play with parameters
	priors_vec <- c(500, 675, 850)
	obs_pos_vec <- c(8, 9, 10)
	obs_vec <- c(10, 100, 1000)

	# as your starting assumption is more confident,
	# your updated confidence grows higher
	for (prior in priors_vec) {
	print(updating(prior, 9, 10))
	}

	# as number of positive observations increases,
	# the less uncertainty remains
	for (obs_pos in obs_pos_vec) {
	print(updating(500, obs_pos, 10))
	}

	# as the total number of observations increases,
	# and the rate of positives remains steady,
	# the certainty converges toward the observed data
	for (obs in obs_vec) {
	print(updating(500, obs*0.9, obs))
	}

	```