Sam Clifford samclifford

## NHANESre.R
library(NHANES)
data(NHANES)
library(tidyverse)
library(extrafont)
library(lme4)
library(RColorBrewer)

NHANES <- mutate(NHANES,
                 SexOrientation = fct_relevel(SexOrientation, "Heterosexual"),
                 Education      = fct_relevel(Education, "High School"))

## binary_preds.R
library(tidyverse)
expand.grid(day = 1:7, week = 1:13) %>%
    mutate(mu = 3*sin(day*2*pi/7)) %>%
    mutate(p = boot::inv.logit(mu)) %>%
    mutate(obs  = rbinom(n(), size = 1, p = p),
           pred = rbinom(n(), size = 1, p = p)) %>%
    gather(key, value, obs, pred) %>%
    mutate(value = factor(value)) %>%
    ggplot(data =., aes(x = day, y = key)) +
    geom_tile(aes(fill = value)) +

## trim_percent.R
trim_percent <- function(x, ...){
    require(magrittr)
    require(scales)
    require(stringr)
    scales::percent(x, ...) %>%
        stringr::str_replace(string = ., pattern =  "0+\\%", replacement = "\\%") %>%
        stringr::str_replace(string = ., pattern = "\\.\\%", replacement = "\\%")
}

## inla_prior_poisson.R
if (!require(INLA)){
    install.packages("INLA",
                     repos=c(getOption("repos"),
                             INLA="https://inla.r-inla-download.org/R/testing"),
                     dep=TRUE)
}

library(INLA)
library(tidyverse)
library(broom)

## tidy_nfl.R
library(tidyverse)
library(hrbrthemes)

# download https://github.com/rfordatascience/tidytuesday/blob/master/data/tidy_tuesday_week2.xlsx
football <- read_xlsx("data/tidy_tuesday_week2.xlsx")

# get the top 16 paid players in each position for each year
to_plot <- football %>%
  mutate(Team = 1:nrow(.)) %>%
  gather(position, salary, -c(year, Team)) %>%

## tuition.R
library(tidyverse)

# download from https://github.com/rfordatascience/tidytuesday/blob/master/data/us_avg_tuition.xlsx

dat <- read_xlsx("us_avg_tuition.xlsx") %>%
  gather(Year, Tuition, -State) %>%
  separate(col = Year, into =  c("Start", "End"), sep = "-") %>%
  mutate_at(.vars = c("Start", "End"), .funs = parse_number) %>%
  mutate(End = End + 2000)

## string_triangle_problem.R
n_sims <- 1e5

breaking_convention <- "all_at_once" # or stick_breaking

if (breaking_convention == "stick_breaking"){
  break_locations <- matrix(runif(n = n_sims), ncol=1)
  sorted_break_locations <- cbind(break_locations, matrix(runif(n = n_sims, min = break_locations, max=1), ncol=1))
} else {
  break_locations <- matrix(runif(n = 2*n_sims), ncol=2)
  sorted_break_locations <- t(apply(break_locations, 1, sort))

## spider
library(tidyverse)
library(magrittr)
library(purrr)
library(forcats)

base <- expand.grid(chem = c("Th", "Ta", "Nb", "La", "Ce", "P", "Zr", "Hf", "Sm", "Ti", "Y", "Yb", "Lu"),
            id=1:7)


grouping <- data.frame(id=1:7,

## geom_wheat
library(tidyverse)

tibble(x = rnorm(n=100)) %>%
  arrange(x) %>%
  mutate(bin = base::cut(x, breaks=pretty(x,n=20))) %>%
  group_by(bin) %>%
  mutate(count = 1:n()) %>%
  ggplot(data=., aes(x=x, y=count)) +
  geom_path(aes(group=bin)) +
  geom_point(size=0.5) +

## geom_qq_conf.R
# code copied and amended from http://stackoverflow.com/questions/4357031/qqnorm-and-qqline-in-ggplot2/

library(ggplot2)

gg_qq_conf <- function(x, distribution = "norm",
                         ...,
                         line.estimate = NULL,
                         conf = 0.95,
                         labels = names(x)){
  q.function <- eval(parse(text = paste0("q", distribution)))
	library(NHANES)
	data(NHANES)
	library(tidyverse)
	library(extrafont)
	library(lme4)
	library(RColorBrewer)

	NHANES <- mutate(NHANES,
	SexOrientation = fct_relevel(SexOrientation, "Heterosexual"),
	Education = fct_relevel(Education, "High School"))
	library(tidyverse)
	expand.grid(day = 1:7, week = 1:13) %>%
	mutate(mu = 3sin(day2*pi/7)) %>%
	mutate(p = boot::inv.logit(mu)) %>%
	mutate(obs = rbinom(n(), size = 1, p = p),
	pred = rbinom(n(), size = 1, p = p)) %>%
	gather(key, value, obs, pred) %>%
	mutate(value = factor(value)) %>%
	ggplot(data =., aes(x = day, y = key)) +
	geom_tile(aes(fill = value)) +
	trim_percent <- function(x, ...){
	require(magrittr)
	require(scales)
	require(stringr)
	scales::percent(x, ...) %>%
	stringr::str_replace(string = ., pattern = "0+\\%", replacement = "\\%") %>%
	stringr::str_replace(string = ., pattern = "\\.\\%", replacement = "\\%")
	}
	if (!require(INLA)){
	install.packages("INLA",
	repos=c(getOption("repos"),
	INLA="https://inla.r-inla-download.org/R/testing"),
	dep=TRUE)
	}

	library(INLA)
	library(tidyverse)
	library(broom)
	library(tidyverse)
	library(hrbrthemes)

	# download https://github.com/rfordatascience/tidytuesday/blob/master/data/tidy_tuesday_week2.xlsx
	football <- read_xlsx("data/tidy_tuesday_week2.xlsx")

	# get the top 16 paid players in each position for each year
	to_plot <- football %>%
	mutate(Team = 1:nrow(.)) %>%
	gather(position, salary, -c(year, Team)) %>%
	library(tidyverse)

	# download from https://github.com/rfordatascience/tidytuesday/blob/master/data/us_avg_tuition.xlsx

	dat <- read_xlsx("us_avg_tuition.xlsx") %>%
	gather(Year, Tuition, -State) %>%
	separate(col = Year, into = c("Start", "End"), sep = "-") %>%
	mutate_at(.vars = c("Start", "End"), .funs = parse_number) %>%
	mutate(End = End + 2000)
	n_sims <- 1e5

	breaking_convention <- "all_at_once" # or stick_breaking

	if (breaking_convention == "stick_breaking"){
	break_locations <- matrix(runif(n = n_sims), ncol=1)
	sorted_break_locations <- cbind(break_locations, matrix(runif(n = n_sims, min = break_locations, max=1), ncol=1))
	} else {
	break_locations <- matrix(runif(n = 2*n_sims), ncol=2)
	sorted_break_locations <- t(apply(break_locations, 1, sort))
	library(tidyverse)
	library(magrittr)
	library(purrr)
	library(forcats)

	base <- expand.grid(chem = c("Th", "Ta", "Nb", "La", "Ce", "P", "Zr", "Hf", "Sm", "Ti", "Y", "Yb", "Lu"),
	id=1:7)


	grouping <- data.frame(id=1:7,
	library(tidyverse)

	tibble(x = rnorm(n=100)) %>%
	arrange(x) %>%
	mutate(bin = base::cut(x, breaks=pretty(x,n=20))) %>%
	group_by(bin) %>%
	mutate(count = 1:n()) %>%
	ggplot(data=., aes(x=x, y=count)) +
	geom_path(aes(group=bin)) +
	geom_point(size=0.5) +
	# code copied and amended from http://stackoverflow.com/questions/4357031/qqnorm-and-qqline-in-ggplot2/

	library(ggplot2)

	gg_qq_conf <- function(x, distribution = "norm",
	...,
	line.estimate = NULL,
	conf = 0.95,
	labels = names(x)){
	q.function <- eval(parse(text = paste0("q", distribution)))