variani/admixed-mapping.R

## admixed-mapping.R
### Step 1: load libraries
library(devtools)
load_all("~/git/hemostat/copdgene")
load_all("~/git/variani/matlm")

library(gaston)

### Step 2: load the COPDGene data
# phenotypes
phen <- copdgene::copdgene_phen(progress = FALSE)
ids_phen <- phen$sid

# local ancestry segments
acs <- copdgene::copdgene_acs()

# ARM & EARM matrices used further in the linear mixed model (as random effects)
arm <- copdgene::copdgene_arm(ids = ids_phen)

#earm <- copdgene::copdgene_arm(ids = ids_phen, exposure = "SmokCigNow")
val_exposure <- phen[, "SmokCigNow"]
D_exposure <- sapply(val_exposure , function(x) as.numeric(x == val_exposure))
earm <- D * arm # element-wise multiplication of matrices

### Step 3: fit linear mixed model (LMM)
form <- FEV1pp_utah ~ Age_Enroll + gender + Duration_Smoking + log_CigPerDaySmokAvg + SmokCigarNow + SmokCigNow + SmokCigNow0_15 + acs_global2 + acs_global2*SmokCigNow
# note: the interaction term with global ancestry `acs_global2*SmokCigNow` is included in the model

y <- model.extract(model.frame(form, phen), "response")
X <- model.matrix(form, phen)

# random effects of medical center
Zc <- model.matrix(~ ccenter - 1, phen)
Kc <- tcrossprod(Zc)

# random effects of heterogeneity
D1 <- (phen$CigPerDaySmokNow0_gr3 == "0") %>% as.numeric %>% diag

K <- list(Kc, D1, arm, earm)
mod <- gaston::lmm.aireml(y, X, K = K)

### Step 4: compute V = var(y) matrix estimated by LMM
vmat <- c(K, list(diag(nrow(phen))))
vsigma <- with(mod, c(tau, sigma2))

varcov <- lapply(seq(1, length(vmat)), function(i) vsigma[i] * vmat[[i]])
varcov <- Reduce("+", varcov)
rownames(varcov) <- ids_phen
colnames(varcov) <- ids_phen

### Step 5: run association scan using GLS
decomp <- matlm::decompose_varcov(varcov, method = "evd", output = "all")
transform <- decomp$transform

assoc <- matlm::matlm(form, phen, pred = acs, int = "SmokCigNow", transform = transform, ids = ids_phen)
# for each predictor (say `x`) stored in columns of `pred`
# - added three terms in the model formula `form`: main effect `x`, main effect `int` and interaction `x*int`
# - fitted model with the updated formula and transformation (that corresponds to GLS)
# - performed test for the interaction `x*int` term

### tep 6: save results
saveRDS(assoc, file = "assoc_int_SmokCigNow_FEV1pp_utah.rds")
	### Step 1: load libraries
	library(devtools)
	load_all("~/git/hemostat/copdgene")
	load_all("~/git/variani/matlm")

	library(gaston)

	### Step 2: load the COPDGene data
	# phenotypes
	phen <- copdgene::copdgene_phen(progress = FALSE)
	ids_phen <- phen$sid

	# local ancestry segments
	acs <- copdgene::copdgene_acs()

	# ARM & EARM matrices used further in the linear mixed model (as random effects)
	arm <- copdgene::copdgene_arm(ids = ids_phen)

	#earm <- copdgene::copdgene_arm(ids = ids_phen, exposure = "SmokCigNow")
	val_exposure <- phen[, "SmokCigNow"]
	D_exposure <- sapply(val_exposure , function(x) as.numeric(x == val_exposure))
	earm <- D * arm # element-wise multiplication of matrices

	### Step 3: fit linear mixed model (LMM)
	form <- FEV1pp_utah ~ Age_Enroll + gender + Duration_Smoking + log_CigPerDaySmokAvg + SmokCigarNow + SmokCigNow + SmokCigNow0_15 + acs_global2 + acs_global2*SmokCigNow
	# note: the interaction term with global ancestry `acs_global2*SmokCigNow` is included in the model

	y <- model.extract(model.frame(form, phen), "response")
	X <- model.matrix(form, phen)

	# random effects of medical center
	Zc <- model.matrix(~ ccenter - 1, phen)
	Kc <- tcrossprod(Zc)

	# random effects of heterogeneity
	D1 <- (phen$CigPerDaySmokNow0_gr3 == "0") %>% as.numeric %>% diag

	K <- list(Kc, D1, arm, earm)
	mod <- gaston::lmm.aireml(y, X, K = K)

	### Step 4: compute V = var(y) matrix estimated by LMM
	vmat <- c(K, list(diag(nrow(phen))))
	vsigma <- with(mod, c(tau, sigma2))

	varcov <- lapply(seq(1, length(vmat)), function(i) vsigma[i] * vmat[[i]])
	varcov <- Reduce("+", varcov)
	rownames(varcov) <- ids_phen
	colnames(varcov) <- ids_phen

	### Step 5: run association scan using GLS
	decomp <- matlm::decompose_varcov(varcov, method = "evd", output = "all")
	transform <- decomp$transform

	assoc <- matlm::matlm(form, phen, pred = acs, int = "SmokCigNow", transform = transform, ids = ids_phen)
	# for each predictor (say `x`) stored in columns of `pred`
	# - added three terms in the model formula `form`: main effect `x`, main effect `int` and interaction `x*int`
	# - fitted model with the updated formula and transformation (that corresponds to GLS)
	# - performed test for the interaction `x*int` term

	### tep 6: save results
	saveRDS(assoc, file = "assoc_int_SmokCigNow_FEV1pp_utah.rds")