Clay Ford clayford

## count_pairs.R
# Table 2.8, Agresti (2002) p. 57
M <- as.table(rbind(c(1, 3, 10, 6),
                    c(2, 3, 10, 7),
                    c(1, 6, 14, 12),
                    c(0, 1, 9, 11)))
dimnames(M) <- list(`income` = c("<15", "15-25", "25-40", ">40"),
                    `job satisfaction` = c("VD", "LD", "MS","VS"))

# count concordant pairs
n <- nrow(M)

## pocock_fig_2.R
# replicate Fig 2 of
# Pocock, S.J., Assmann, S.E., Enos, L.E. and Kasten, L.E. (2002),
# Subgroup analysis, covariate adjustment and baseline comparisons in clinical trial reporting: current practiceand problems.
# Statist. Med., 21: 2917-2930. https://doi.org/10.1002/sim.1296

Z_x <- seq(-2, 2, 0.1)
rho <- c(0, 0.1, 0.3, 0.5, 0.7, 0.9)
d <- expand.grid(Z_x = Z_x, rho = rho)
Z_a <- qnorm(0.025, lower.tail = F)
d$size <- pnorm((Z_a - d$Z_x*d$rho)/sqrt(1 - d$rho^2),

## simple_slopes_data.R
# simulate data similar to that used in Aiken & West
# Aiken, L. S. and West, S.G. (1991). Multiple Regression: Testing and Interpreting Interactions. Newbury Park, Calif: Sage Publications.

# The authors do not provide data but give summary stats of data in Table 2.1 (p. 11)
# The only parameter I had to guess at was the residual standard error. I assumed N(0,5)

library(MASS)
covar <- 0.42*2.20*0.95 # convert correlation to covariance
set.seed(1)
xz <- mvrnorm(n = 400, mu = c(5, 10),

## stepwise_selection_simulation.R
# generate population with collinearity
n <- 10000
x1 <- runif(n, -3, 3)
x2 <- x1 * 2 + rnorm(n, sd = 0.3)
plot(x1, x2) # collinearity!
x3 <- rnorm(n, 10, 2)
x4 <- x3 * -4 + rnorm(n, sd = 0.5)
plot(x3, x4) # collinearity!
x5 <- rexp(n = n)
x6 <- runif(n = n)

## observed_power_simulation.R
# https://www.sciencedirect.com/science/article/pii/S0022480420305023?via%3Dihub

# illustrates that tests with small P-values always have high observed power (on the left side, where the P-value is close to 0 and observed power close to 1).

x1 <- rnorm(30, mean = 2, sd = 2)
x2 <- rnorm(30, mean = 3, sd = 2)
tout <- t.test(x1, x2)
obs_eff <- diff(tout$estimate)
pout <- power.t.test(n = 30, delta = abs(obs_eff), sd = 2, sig.level = 0.05)
pout$power

## load_planar_dataset.R
# R version of load_planar_dataset() on https://datascience-enthusiast.com/DL/Planar-data-classification-with-one-hidden-layer.html

# Could use python code to export result of load_planar_dataset() as CSV:
# import pandas as pd
# df1 = pd.DataFrame(np.transpose(X), columns = ['X1','X2'])
# df2 = pd.DataFrame(np.transpose(Y), columns = ['Y'])
# df = pd.concat([df1, df2], axis = 1)
# df.to_csv('planar_flower.csv', index = False)

# Or create data in R as a data frame

## simulate_proportional_odds_regression.R
# Clay Ford
# 2020-06-22

# Simulate data from a proportional odds model with proportional odds assumption
# satisfied.

# 300 observations and a grouping variable (example: democratic/republican)
n <- 300
set.seed(1)
grp <- sample(0:1, size = n, replace = TRUE)

## pivot_longer_example.R
library(tidyverse)
d1 <- tibble(name = c("Clay", "Laura"),
             score_1 = c(88, 99),
             score_2 = c(77, 88),
             score_3 = c(55, 66),
             survey_1 = c(4, 5),
             survey_2 = c(3, 3),
             survey_3 = c(2, 5))
d1
# A tibble: 2 x 7

## Moving_R_programs_to_Rivanna.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                clayford
                / Moving_R_programs_to_Rivanna.md
            
            
              Last active
              October 18, 2019 15:40
            
              
                Notes from Moving R programs to Rivanna workshop, 10/17/19
              
          
    Moving R programs to Rivanna notes

Workshop date: 10/17/2019
Acessing Rivanna

Home directory: 50 Mb storage

Scratch: 10 TB (90 day limit)

Can purchase storage; requires a PTAO

  
## rlm_with_bootstrap.R


library(car)
library(MASS)

# generate data with slight non-constant variance
x1 <- gl(n = 3, k = 400, labels = c("A","B","C"))
x2 <- gl(n = 2, k = 600, labels = c("1","2"))
set.seed(1)
y <- 1 + 1.2*(x1 == "B") + 1.3*(x1 == "C") -0.5*(x2 == "2") +
	# Table 2.8, Agresti (2002) p. 57
	M <- as.table(rbind(c(1, 3, 10, 6),
	c(2, 3, 10, 7),
	c(1, 6, 14, 12),
	c(0, 1, 9, 11)))
	dimnames(M) <- list(`income` = c("<15", "15-25", "25-40", ">40"),
	`job satisfaction` = c("VD", "LD", "MS","VS"))

	# count concordant pairs
	n <- nrow(M)
	# replicate Fig 2 of
	# Pocock, S.J., Assmann, S.E., Enos, L.E. and Kasten, L.E. (2002),
	# Subgroup analysis, covariate adjustment and baseline comparisons in clinical trial reporting: current practiceand problems.
	# Statist. Med., 21: 2917-2930. https://doi.org/10.1002/sim.1296

	Z_x <- seq(-2, 2, 0.1)
	rho <- c(0, 0.1, 0.3, 0.5, 0.7, 0.9)
	d <- expand.grid(Z_x = Z_x, rho = rho)
	Z_a <- qnorm(0.025, lower.tail = F)
	d$size <- pnorm((Z_a - d$Z_x*d$rho)/sqrt(1 - d$rho^2),
	# simulate data similar to that used in Aiken & West
	# Aiken, L. S. and West, S.G. (1991). Multiple Regression: Testing and Interpreting Interactions. Newbury Park, Calif: Sage Publications.

	# The authors do not provide data but give summary stats of data in Table 2.1 (p. 11)
	# The only parameter I had to guess at was the residual standard error. I assumed N(0,5)

	library(MASS)
	covar <- 0.422.200.95 # convert correlation to covariance
	set.seed(1)
	xz <- mvrnorm(n = 400, mu = c(5, 10),
	# generate population with collinearity
	n <- 10000
	x1 <- runif(n, -3, 3)
	x2 <- x1 * 2 + rnorm(n, sd = 0.3)
	plot(x1, x2) # collinearity!
	x3 <- rnorm(n, 10, 2)
	x4 <- x3 * -4 + rnorm(n, sd = 0.5)
	plot(x3, x4) # collinearity!
	x5 <- rexp(n = n)
	x6 <- runif(n = n)
	# https://www.sciencedirect.com/science/article/pii/S0022480420305023?via%3Dihub

	# illustrates that tests with small P-values always have high observed power (on the left side, where the P-value is close to 0 and observed power close to 1).

	x1 <- rnorm(30, mean = 2, sd = 2)
	x2 <- rnorm(30, mean = 3, sd = 2)
	tout <- t.test(x1, x2)
	obs_eff <- diff(tout$estimate)
	pout <- power.t.test(n = 30, delta = abs(obs_eff), sd = 2, sig.level = 0.05)
	pout$power
	# R version of load_planar_dataset() on https://datascience-enthusiast.com/DL/Planar-data-classification-with-one-hidden-layer.html

	# Could use python code to export result of load_planar_dataset() as CSV:
	# import pandas as pd
	# df1 = pd.DataFrame(np.transpose(X), columns = ['X1','X2'])
	# df2 = pd.DataFrame(np.transpose(Y), columns = ['Y'])
	# df = pd.concat([df1, df2], axis = 1)
	# df.to_csv('planar_flower.csv', index = False)

	# Or create data in R as a data frame
	# Clay Ford
	# 2020-06-22

	# Simulate data from a proportional odds model with proportional odds assumption
	# satisfied.

	# 300 observations and a grouping variable (example: democratic/republican)
	n <- 300
	set.seed(1)
	grp <- sample(0:1, size = n, replace = TRUE)
	library(tidyverse)
	d1 <- tibble(name = c("Clay", "Laura"),
	score_1 = c(88, 99),
	score_2 = c(77, 88),
	score_3 = c(55, 66),
	survey_1 = c(4, 5),
	survey_2 = c(3, 3),
	survey_3 = c(2, 5))
	d1
	# A tibble: 2 x 7


	library(car)
	library(MASS)

	# generate data with slight non-constant variance
	x1 <- gl(n = 3, k = 400, labels = c("A","B","C"))
	x2 <- gl(n = 2, k = 600, labels = c("1","2"))
	set.seed(1)
	y <- 1 + 1.2(x1 == "B") + 1.3(x1 == "C") -0.5*(x2 == "2") +