Skip to content

Instantly share code, notes, and snippets.

@ben-domingue
Last active August 29, 2015 14:05
Show Gist options
  • Select an option

  • Save ben-domingue/f29c74f54dc48da99066 to your computer and use it in GitHub Desktop.

Select an option

Save ben-domingue/f29c74f54dc48da99066 to your computer and use it in GitHub Desktop.
make_grs<-function(nm,df,fam.id="family",id="subjectID",
threshold.seq,
plink="/hrsshare/analytic_sample/hrs_geno_final",
resid.fm=NULL) {
dir.nm<-paste0("/tmp/",nm)
system(paste("mkdir",dir.nm))
getwd()->dir.save
setwd(dir.nm)
if (!is.null(resid.fm)) {
paste(nm,"~",resid.fm)->resid.fm
df[,all.vars(formula(resid.fm))]->tmp
df[rowSums(is.na(tmp))==0,]->df
lm(resid.fm,df)->mod
mod$residuals->df$res
} else df[[nm]]->df$res
write.table(df[,c(fam.id,id)],file="people.txt",quote=FALSE,row.names=FALSE,col.names=FALSE)
#split sample for cv
sample(1:nrow(df),round(nrow(df)/2))->index
df[index,]->df1
df[!(1:nrow(df) %in% index),]->df2
#
write.table(df1[,c(fam.id,id,"res")],file="pheno1.txt",quote=FALSE,row.names=FALSE,col.names=FALSE)
write.table(df1[,c(fam.id,id)],file="people1.txt",quote=FALSE,row.names=FALSE,col.names=FALSE)
paste0("plink --noweb --bfile ",plink," --keep people1.txt --pheno pheno1.txt --assoc --out gwa1")->cmd
system(cmd)
max(threshold.seq)->threshold.M
system(paste0("awk '$9<",threshold.M," {print $0}' gwa1.qassoc | sort -k9 -g > top_hits.txt"))
read.table("top_hits.txt")->weights
write.table(df2[,c(fam.id,id,"res")],file="pheno2.txt",quote=FALSE,row.names=FALSE,col.names=FALSE)
write.table(df2[,c(fam.id,id)],file="people2.txt",quote=FALSE,row.names=FALSE,col.names=FALSE)
write.table(weights[,2],file="snps.txt",quote=FALSE,row.names=FALSE,col.names=FALSE)
paste0("plink --noweb --bfile ",plink," --keep people2.txt --extract snps.txt --pheno pheno2.txt --assoc --out gwa2")->cmd
system(cmd)
grs<-list()
for (threshold in threshold.seq) {
read.table("top_hits.txt")->weights
weights[,2][weights[,9]<threshold]->snps
read.table("gwa2.qassoc",header=TRUE)->hits
hits[!is.na(hits$P),]->hits
hits[hits$SNP %in% snps,]->weights
if (nrow(weights)>0) {
write.table(weights[,2],file="top_snps.txt",quote=FALSE,row.names=FALSE,col.names=FALSE)
paste0("plink --noweb --bfile ",plink," --keep people.txt --extract top_snps.txt --recode --transpose --recode12 --out top_hits")->cmd
system(cmd)
#
read.table("top_hits.tfam")->fam
nrow(fam)->N
readLines("top_hits.tped")->tped
snp.df<-list()
for (i in 1:length(tped)) {
tped[i]->y
seq(1,N*2,by=2)->odd.index
odd.index+1->even.index
strsplit(y," ")[[1]]->y
y[2]->tr.rs
as.numeric(y[-(1:4)])-1->y
ifelse(!(y %in% 0:1),NA,y)->y
if (length(y)==N*2 & length(unique(y))>1) {
y[odd.index]+y[even.index]->y
y->snp.df[[tr.rs]]
}
}
do.call("cbind",snp.df)->snp.df
rownames(snp.df)<-fam[,2]
#
match(colnames(snp.df),weights[,2])->index
weights[index,]->weights
weights[,5]<0 -> flip.test
for (i in which(flip.test)) abs(2-snp.df[,i])->snp.df[,i]
weights[,7]->wt
wt/sum(wt)->wt
apply(snp.df,1,weighted.mean,w=wt,na.rm=TRUE)->tab
(tab-mean(tab,na.rm=TRUE))/sd(tab,na.rm=TRUE)->grs[[as.character(threshold)]]
} else NULL->grs[[as.character(threshold)]]
}
#
for (i in 1:length(grs)) {
grs[[i]]->y
data.frame(subjectID=names(y),y)->z
names(z)[2]<-paste("thr_",names(grs)[i],sep="")
z->grs[[i]]
}
grs[[1]]->z
for (i in 2:length(grs)) merge(z,grs[[i]],all=TRUE)->z
setwd(dir.save)
z
}
## library(HeritHelper)
## setwd("~/hrs/paa_poster")
## load(file="pheno_data.Rdata")
## make_grs("r8weight",df,
## resid.fm=paste("sex+rabyear+",paste0("C",1:5,collapse="+")),
## threshold.seq=c(10e-3,10e-4,10e-5,10e-6)
## )->grs
## save(grs,file="/tmp/weight_grs.Rdata")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment