This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# purpose: helper script to determine my max heart rate | |
# load libraries | |
library(dplyr) | |
library(rethinking) | |
#### Read and Filter Data ---- | |
# grab data | |
d <- read.csv("Activities.csv") | |
# convert string pace to numeric seconds pace | |
get_pace_seconds <- function(x){ | |
temp <- strsplit(x, ":")[[1]] | |
return((as.numeric(temp[1]) * 60) + | |
(as.numeric(temp[2]))) | |
} | |
d$avg_pace_seconds <- unlist(lapply(d$Avg.Pace, function(x) get_pace_seconds(x))) | |
# filter | |
# method 1: hardest efforts | |
# d <- d %>% filter(pace_seconds <= 330 | Distance > 20) | |
# method 2: over the lower bound | |
# d <- d %>% filter(Max.HR > 182) | |
# method 3: top 5% | |
d <- d %>% filter(Max.HR < 200) %>% | |
filter(Activity.Type == "Running") %>% | |
arrange(desc(Max.HR)) %>% | |
head(42) | |
#### Priors ---- | |
# priors for mean of max hr | |
curve(dnorm(x, 190, 2), | |
from = 182, | |
to = 194) | |
# prior for standard deviation of max hr | |
curve(dunif(x, 2, 4), from = -2, to = 10) | |
# now "prior predictive simulation" | |
sample_mu <- rnorm(1e4, 188, 1) | |
sample_sigma <- runif(1e4, 1, 5) | |
prior_p <- rnorm(1e4, sample_mu, sample_sigma) | |
dens(prior_p, xlab="Max HR Values") | |
abline(v = 182, col="blue", lty=2) | |
abline(v = 194, col="blue", lty=2) | |
# how much lands "inside" of our limits? | |
cum_dist <- ecdf(prior_p) | |
1-((cum_dist(182)) + (1-cum_dist(194))) | |
#### Posterior (brute force) ---- | |
# brute force calculation of posterior (from Rethinking) | |
mu.list <- seq(from=188, to=194, length.out = 200) | |
sigma.list <- seq(from=2, to=5, length.out = 200) | |
post <- expand.grid(mu=mu.list, sigma=sigma.list) | |
post$LL <- sapply(1:nrow(post), function(i) sum( | |
dnorm(d$Max.HR, post$mu[i], post$sigma[i], log=TRUE) | |
)) | |
post$prod <- post$LL + dnorm(post$mu, 188, 1, TRUE) + | |
dunif(post$sigma, 1, 5, TRUE) | |
post$prob <- exp(post$prod - max(post$prod)) | |
# on line above, remember, if not log, all probabilities quickly go to zero | |
# plot contour and heatmap | |
contour_xyz(post$mu, post$sigma, post$prob) | |
image_xyz(post$mu, post$sigma, post$prob) | |
# sample the posterior distribution | |
sample.rows <- sample(1:nrow(post), size=1e4, replace=TRUE, prob=post$prob) | |
sample.mu <- post$mu[sample.rows] | |
sample.sigma <- post$sigma[sample.rows] | |
# plot samples | |
plot(sample.mu, sample.sigma, cex=0.5, pch=16, col=col.alpha(rangi2, 0.1)) | |
dens(sample.mu) | |
dens(sample.sigma) | |
# possibilities intervals for mean and standard deviation | |
PI(sample.mu) | |
PI(sample.sigma) | |
#### Posterior (quap) ---- | |
flist <- alist( | |
Max.HR ~ dnorm( mu, sigma), | |
mu ~ dnorm(188, 1), | |
sigma ~ dunif(1, 5) | |
) | |
m4.1 <- quap(flist, data=d) | |
precis(m4.1) | |
cov2cor(vcov(m4.1)) | |
# might be an issue here! | |
# why would mean and sd be negatively correlated? | |
# is it the number of data points are each band? | |
# if so, does this invalidate the results? | |
# extract samples from the posterior | |
post <- extract.samples(m4.1, n=1e4) | |
head(post) | |
precis(post) | |
# sample the samples | |
test <- rnorm(10000, post$mu, post$sigma) | |
mean(test) | |
PI(test) | |
# how much lands "inside" of our limits? | |
cum_dist <- ecdf(test) | |
1-((cum_dist(182)) + (1-cum_dist(194))) | |
# plot posterior distribution | |
plot(density(test), xlab="Max HR Values") | |
abline(v = 182, col="blue", lty=2) | |
abline(v = 194, col="blue", lty=2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment