Skip to content

Instantly share code, notes, and snippets.

@dggoldst
Created July 6, 2016 22:46
Show Gist options
  • Save dggoldst/cd858f66dbd55ec0fc271a21e07ae975 to your computer and use it in GitHub Desktop.
Save dggoldst/cd858f66dbd55ec0fc271a21e07ae975 to your computer and use it in GitHub Desktop.
library(rvest)
library(tidyr)
library(splines)
library(stringr)
library(ggplot2)
library(dplyr)
setwd("C:/Dropbox/Projects/20160705_Calories_Per_Meal")
men = read_html("http://health.gov/dietaryguidelines/2015/guidelines/appendix-2/") %>%
html_node(xpath = '//*[@id="table-a2-1"]/div/div[1]/table') %>%
html_table()
women = read_html("http://health.gov/dietaryguidelines/2015/guidelines/appendix-2/") %>%
html_node(xpath = '//*[@id="table-a2-1"]/div/div[2]/table') %>%
html_table()
df = rbind(men, women) %>%
mutate(sex = c(rep("male", 30), rep("female", 30)))
names(df) = c("prevage", "sedentary", "moderate", "active", "sex")
df = df %>%
gather(activity, calories, -prevage, -sex) %>%
mutate(calories = as.numeric(gsub(',', '', calories)),
prevage = ifelse(str_detect(prevage, "76"), "76-79", prevage))
expand_years <- function(df) {
if (str_detect(df$prevage, "-")) {
a <- as.numeric(str_split_fixed(df$prevage, '-', 2))
data.frame(df, age = as.numeric(seq(a[1], a[2])))
} else {
data.frame(df, age = as.numeric(df$prevage))
}
}
df = df %>%
rowwise() %>%
do(expand_years(.)) %>%
ungroup() %>%
mutate(age = as.numeric(age))
write.csv(df, file = "calories_by_age_sex_activity.csv", row.names = FALSE)
mark_bilinear = function(calories) {
firstmaxloc = which.max(calories)
lastmaxloc = max(which(calories == max(calories)))
midmax = round((firstmaxloc + lastmaxloc) / 2, 0)
c(rep("B", midmax), rep("A", length(calories) - midmax)) #B is before A is after
}
mark_maxzone = function(calories) {
firstmaxloc = which.max(calories)
lastmaxloc = max(which(calories == max(calories)))
c(
rep("B", firstmaxloc - 1), #before
rep("O", lastmaxloc - firstmaxloc + 1), #on
rep("A", length(calories) - lastmaxloc) #after
)
}
#Try fitting to unique vals
sdf = df %>%
group_by(prevage, sex, activity) %>%
filter(row_number() == 1) %>%
group_by(sex, activity) %>%
mutate(maxzone = mark_maxzone(calories),
bilinear = mark_bilinear(calories)) %>%
ungroup()
#small df
sdf = sdf %>%
mutate(
fm1 = predict(lm(
data = sdf, calories ~ sex * activity * maxzone * age
)),
fm2 = predict(lm(
data = sdf, calories ~ sex * activity * bilinear * age
)),
fm3 = predict(lm(
data = sdf, calories ~ sex * activity * bilinear * poly(age, 2)
)),
fm1ad = abs(fm1 - calories),
fm2ad = abs(fm2 - calories),
fm3ad = abs(fm3 - calories)
)
summary(sdf)
#tri partate
p = ggplot(sdf, aes(
x = age,
y = calories,
group = activity,
color = activity
)) +
geom_point(size = .5) +
geom_line(aes(y = fm1)) +
theme_bw() +
theme(legend.position = "bottom") +
scale_y_continuous(breaks = seq(1000, 3500, by = 250)) +
facet_grid(sex ~ .)
p
ggsave(p,
file = "three.part.linear.png",
width = 4,
height = 8)
#bilinear
p = ggplot(sdf, aes(
x = age,
y = calories,
group = activity,
color = activity
)) +
geom_point(size = .5) +
geom_line(aes(y = fm2)) +
theme_bw() +
theme(legend.position = "bottom") +
scale_y_continuous(breaks = seq(1000, 3500, by = 250)) +
facet_grid(sex ~ .)
p
ggsave(p,
file = "two.part.linear.png",
width = 4,
height = 8)
#poly2 tri
p = ggplot(sdf, aes(
x = age,
y = calories,
group = activity,
color = activity
)) +
geom_point(size = .5) +
geom_line(aes(y = fm3)) +
theme_bw() +
theme(legend.position = "bottom") +
scale_y_continuous(breaks = seq(1000, 3500, by = 250)) +
facet_grid(sex ~ .)
p
ggsave(p,
file = "two.part.poly.png",
width = 4,
height = 8)
#raw
p = ggplot(sdf, aes(
x = age,
y = calories,
group = activity,
color = activity
)) +
geom_point(size = .5) +
geom_line() +
theme_bw() +
theme(legend.position = "bottom") +
scale_y_continuous(breaks = seq(1000, 3500, by = 250)) +
facet_grid(sex ~ .)
p
ggsave(p,
file = "raw.png",
width = 4,
height = 8)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment