FrankRuns/max-heart-rate.R

## max-heart-rate.R

# purpose: helper script to determine my max heart rate

# load libraries
library(dplyr)
library(rethinking)

#### Read and Filter Data ----

# grab data
d <- read.csv("Activities.csv")

# convert string pace to numeric seconds pace
get_pace_seconds <- function(x){
  temp <- strsplit(x, ":")[[1]]
  return((as.numeric(temp[1]) * 60) +
           (as.numeric(temp[2])))
}
d$avg_pace_seconds <- unlist(lapply(d$Avg.Pace, function(x) get_pace_seconds(x)))

# filter
# method 1: hardest efforts
# d <- d %>% filter(pace_seconds <= 330 | Distance > 20)
# method 2: over the lower bound
# d <- d %>% filter(Max.HR > 182)
# method 3: top 5%
d <- d %>% filter(Max.HR < 200) %>%
  filter(Activity.Type == "Running") %>%
  arrange(desc(Max.HR)) %>%
  head(42)

#### Priors ----

# priors for mean of max hr
curve(dnorm(x, 190, 2),
      from = 182,
      to = 194)

# prior for standard deviation of max hr
curve(dunif(x, 2, 4), from = -2, to = 10)

# now "prior predictive simulation"
sample_mu <- rnorm(1e4, 188, 1)
sample_sigma <- runif(1e4, 1, 5)
prior_p <- rnorm(1e4, sample_mu, sample_sigma)
dens(prior_p, xlab="Max HR Values")
abline(v = 182, col="blue", lty=2)
abline(v = 194, col="blue", lty=2)

# how much lands "inside" of our limits?
cum_dist <- ecdf(prior_p)
1-((cum_dist(182)) + (1-cum_dist(194)))

#### Posterior (brute force) ----

# brute force calculation of posterior (from Rethinking)
mu.list <- seq(from=188, to=194, length.out = 200)
sigma.list <- seq(from=2, to=5, length.out = 200)
post <- expand.grid(mu=mu.list, sigma=sigma.list)
post$LL <- sapply(1:nrow(post), function(i) sum(
  dnorm(d$Max.HR, post$mu[i], post$sigma[i], log=TRUE)
))
post$prod <- post$LL + dnorm(post$mu, 188, 1, TRUE) +
  dunif(post$sigma, 1, 5, TRUE)
post$prob <- exp(post$prod - max(post$prod))
# on line above, remember, if not log, all probabilities quickly go to zero

# plot contour and heatmap
contour_xyz(post$mu, post$sigma, post$prob)
image_xyz(post$mu, post$sigma, post$prob)

# sample the posterior distribution
sample.rows <- sample(1:nrow(post), size=1e4, replace=TRUE, prob=post$prob)
sample.mu <- post$mu[sample.rows]
sample.sigma <- post$sigma[sample.rows]

# plot samples
plot(sample.mu, sample.sigma, cex=0.5, pch=16, col=col.alpha(rangi2, 0.1))
dens(sample.mu)
dens(sample.sigma)

# possibilities intervals for mean and standard deviation
PI(sample.mu)
PI(sample.sigma)

#### Posterior (quap) ----

flist <- alist(
  Max.HR ~ dnorm( mu, sigma),
  mu ~ dnorm(188, 1),
  sigma ~ dunif(1, 5)
)
m4.1 <- quap(flist, data=d)
precis(m4.1)

cov2cor(vcov(m4.1))
# might be an issue here!
# why would mean and sd be negatively correlated?
# is it the number of data points are each band?
# if so, does this invalidate the results?

# extract samples from the posterior
post <- extract.samples(m4.1, n=1e4)
head(post)
precis(post)

# sample the samples
test <- rnorm(10000, post$mu, post$sigma)
mean(test)
PI(test)

# how much lands "inside" of our limits?
cum_dist <- ecdf(test)
1-((cum_dist(182)) + (1-cum_dist(194)))

# plot posterior distribution
plot(density(test), xlab="Max HR Values")
abline(v = 182, col="blue", lty=2)
abline(v = 194, col="blue", lty=2)

	# purpose: helper script to determine my max heart rate

	# load libraries
	library(dplyr)
	library(rethinking)

	#### Read and Filter Data ----

	# grab data
	d <- read.csv("Activities.csv")

	# convert string pace to numeric seconds pace
	get_pace_seconds <- function(x){
	temp <- strsplit(x, ":")[[1]]
	return((as.numeric(temp[1]) * 60) +
	(as.numeric(temp[2])))
	}
	d$avg_pace_seconds <- unlist(lapply(d$Avg.Pace, function(x) get_pace_seconds(x)))

	# filter
	# method 1: hardest efforts
	# d <- d %>% filter(pace_seconds <= 330 \| Distance > 20)
	# method 2: over the lower bound
	# d <- d %>% filter(Max.HR > 182)
	# method 3: top 5%
	d <- d %>% filter(Max.HR < 200) %>%
	filter(Activity.Type == "Running") %>%
	arrange(desc(Max.HR)) %>%
	head(42)

	#### Priors ----

	# priors for mean of max hr
	curve(dnorm(x, 190, 2),
	from = 182,
	to = 194)

	# prior for standard deviation of max hr
	curve(dunif(x, 2, 4), from = -2, to = 10)

	# now "prior predictive simulation"
	sample_mu <- rnorm(1e4, 188, 1)
	sample_sigma <- runif(1e4, 1, 5)
	prior_p <- rnorm(1e4, sample_mu, sample_sigma)
	dens(prior_p, xlab="Max HR Values")
	abline(v = 182, col="blue", lty=2)
	abline(v = 194, col="blue", lty=2)

	# how much lands "inside" of our limits?
	cum_dist <- ecdf(prior_p)
	1-((cum_dist(182)) + (1-cum_dist(194)))

	#### Posterior (brute force) ----

	# brute force calculation of posterior (from Rethinking)
	mu.list <- seq(from=188, to=194, length.out = 200)
	sigma.list <- seq(from=2, to=5, length.out = 200)
	post <- expand.grid(mu=mu.list, sigma=sigma.list)
	post$LL <- sapply(1:nrow(post), function(i) sum(
	dnorm(d$Max.HR, post$mu[i], post$sigma[i], log=TRUE)
	))
	post$prod <- post$LL + dnorm(post$mu, 188, 1, TRUE) +
	dunif(post$sigma, 1, 5, TRUE)
	post$prob <- exp(post$prod - max(post$prod))
	# on line above, remember, if not log, all probabilities quickly go to zero

	# plot contour and heatmap
	contour_xyz(post$mu, post$sigma, post$prob)
	image_xyz(post$mu, post$sigma, post$prob)

	# sample the posterior distribution
	sample.rows <- sample(1:nrow(post), size=1e4, replace=TRUE, prob=post$prob)
	sample.mu <- post$mu[sample.rows]
	sample.sigma <- post$sigma[sample.rows]

	# plot samples
	plot(sample.mu, sample.sigma, cex=0.5, pch=16, col=col.alpha(rangi2, 0.1))
	dens(sample.mu)
	dens(sample.sigma)

	# possibilities intervals for mean and standard deviation
	PI(sample.mu)
	PI(sample.sigma)

	#### Posterior (quap) ----

	flist <- alist(
	Max.HR ~ dnorm( mu, sigma),
	mu ~ dnorm(188, 1),
	sigma ~ dunif(1, 5)
	)
	m4.1 <- quap(flist, data=d)
	precis(m4.1)

	cov2cor(vcov(m4.1))
	# might be an issue here!
	# why would mean and sd be negatively correlated?
	# is it the number of data points are each band?
	# if so, does this invalidate the results?

	# extract samples from the posterior
	post <- extract.samples(m4.1, n=1e4)
	head(post)
	precis(post)

	# sample the samples
	test <- rnorm(10000, post$mu, post$sigma)
	mean(test)
	PI(test)

	# how much lands "inside" of our limits?
	cum_dist <- ecdf(test)
	1-((cum_dist(182)) + (1-cum_dist(194)))

	# plot posterior distribution
	plot(density(test), xlab="Max HR Values")
	abline(v = 182, col="blue", lty=2)
	abline(v = 194, col="blue", lty=2)