Skip to content

Instantly share code, notes, and snippets.

@brevans
Last active April 8, 2016 17:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brevans/75fefa79710e539a63052682cb7b80eb to your computer and use it in GitHub Desktop.
Save brevans/75fefa79710e539a63052682cb7b80eb to your computer and use it in GitHub Desktop.
### Simulations of DAPC analysis to determine probability of seeing random loading values as high as real ones.
setwd("/home/be59/working/jacobina_adegenet")
library('adegenet')
x.pairs.gl = read.PLINK("sources_only.raw", map.file="4ade.map")
dapc.gen1 = dapc(x.pairs.gl,x.pairs.gl$pop,n.pca=7,n.da=1)
sim.list = list()
for (i in 1:100) ## 100 simulations
{
x.gen = as.data.frame(x.pairs.gl) ## make data frame from the genlight object
x.gen.rand = apply(x.gen,2,function(x)sample(x)) ## Rearrange genotype assignments at each locus
row.names(x.gen.rand) = row.names(x.gen) ## Assign row names
new.gen=new("genlight",x.gen.rand,ploidy=2) ## Make genlight from randomized data frame
grp = x.pairs.gl$pop ## grouping factor
gen = new.gen
dapc.gen.rand = dapc(gen,grp,n.pca=7,n.da=1) ## Do preliminary dapc, with high number of PCs
dapc_rand.optim = optim.a.score(dapc.gen.rand,n.sim=1000,smart=F) ## A-score optimization to determine number of PCs that should be retained.
dapc.gen.rand = dapc(gen,grp,n.pca=dapc_rand.optim$best,n.da=1) ## perform dapc with right number of PCs
sim.list[[i]]=dapc.gen.rand$var.contr ## Add variable contributions to list
}
x = dapc.gen1$var.contr ## The true list of variable contributions.
x.rand = unlist(sim.list)
x.rand.o=order(x.rand)
## Determine minimum FDR
v.p.list = c()
hits.list = c()
false.list = c()
fdr.list = c()
for (i in 1:400)
{
p = i * 0.00005 ## loading value
v.p.list[i] = x.rand[x.rand.o[round(length(x.rand) - (length(x.rand)*p))]] ## Loading value corresponding to p, but what is x.rand.o??
hits.list[i] = length(which(x>=v.p.list[i])) ## Number of hits at p in real data
false.list[i] = p*length(x) ## Expected number of false hits in real data.
fdr.list[i] = false.list[i]/hits.list[i]
}
pdf("fdr.pdf")
x.tab=cbind(v.p.list,hits.list,false.list,fdr.list)
plot(x.tab[,1],x.tab[,2],type="l",ylab="# of Hits",xlab="Variable Contributions",main="Hits(black) and False hits(red) at different loading values")
lines(x.tab[,1],x.tab[,3],col = "red")
abline(v=x.tab[which(fdr.list==min(fdr.list)),1],col="blue")
hit.thresh=x.tab[which(x.tab[,4]==min(x.tab[,4])),1] ## threshold corresponding to minimum fdr
dev.off()
#tiff(filename=paste(folder,"figure3t.tiff",sep=""),compression=c("lzw"),res=400,height=5,width=5,units="in",pointsize=10)
#pdf()
#par(mfrow=c(2,2)) ## two windows in pdf
## Want to use minimum amount of PCs
#dapc.gen1 = dapc(gen,grp,n.pca=dapc_pairs.optim$best,n.da=1) ## Re-run dapc with the best number of PCs
#scatter(dapc.gen1)
#loadingplot(dapc.gen1$var.contr,axis=1,threshold=hit.thresh)
#dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment