RyanSchu/PCA.md

## eigenplot.R
library(dplyr)
library(tidyr)
library(ggplot2)
library(argparse)

parser <- ArgumentParser()
parser$add_argument("--val",help="full path to eigenvalue file")
parser$add_argument("--vec",help="full path to eigenvector file")
parser$add_argument("-o", "--outputdir", help="directory where you would like to output your plots")
args <- parser$parse_args()
"%&%" = function(a,b) paste (a,b,sep="")

pcaplots <- args$outputdir %&% "/pca_plots.pdf"
pcs <- read.table(args$vec,header=T)
eval <- scan(args$val)[1:10]
skree<-round(eval/sum(eval),3)
skree<-cbind.data.frame(skree,c(1,2,3,4,5,6,7,8,9,10))
colnames(skree)<-c("percent_var", "PC")

pdf(pcaplots)

ggplot(data=skree, aes(x=PC, y=percent_var)) + geom_point() + geom_line() + scale_x_continuous(breaks = 1:10) + ggtitle("Proportion of variance explained")

#PCA Plot 1 (PC1 vs PC2)
ggplot() + geom_point(data=pcs,aes(x=PC1,y=PC2)) + theme_bw() + scale_colour_brewer(palette="Set1") + ggtitle("PC1 vs PC2")

#PCA Plot 2 (PC1 vs PC3)
ggplot() + geom_point(data=pcs,aes(x=PC1,y=PC3)) + theme_bw() + scale_colour_brewer(palette="Set1") + ggtitle("PC1 vs PC3")

#PCA Plot 1 (PC2 vs PC3)
ggplot() + geom_point(data=pcs,aes(x=PC2,y=PC3)) + theme_bw() + scale_colour_brewer(palette="Set1") + ggtitle("PC2 vs PC3")

dev.off()
#write.table(samples, args$QCdir %&% "/PCA/GWAS_PCA.txt",quote=F,row.names=F,col.names=F)

#afrpcs <- read.table("/home/angela/px_yri_chol/QC/QCStep6/QCStep6e/QCStep6e.evec",skip=1)
#afrcdf <- afrpcs %>% rename(PC1=V2,PC2=V3,PC3=V4,PC4=V5,PC5=V6,PC6=V7,PC7=V8,PC8=V9,PC9=V10,PC10=V11) %>% mutate(pop=ifelse(grepl("TC",V1),"GWAS","GWAS"))
#eval <- scan("/home/angela/px_yri_chol/QC/QCStep6/QCStep6e/QCStep6e.eval")[1:10]
#round(eval/sum(eval),3)

## PCA.md

      
    Raw
  

              PCA.md
            
          
    These scripts are meant to be run from the command line. Save the eigenplot.R script attached. You can then run it from the command line as such
Rscript eigenplot.R --val Path/to/pca.eigencal --vec Path/to/pca.eigenvec --o /directory/to/output

This will make the pdf file pca_plots in the specified output directory.
	library(dplyr)
	library(tidyr)
	library(ggplot2)
	library(argparse)

	parser <- ArgumentParser()
	parser$add_argument("--val",help="full path to eigenvalue file")
	parser$add_argument("--vec",help="full path to eigenvector file")
	parser$add_argument("-o", "--outputdir", help="directory where you would like to output your plots")
	args <- parser$parse_args()
	"%&%" = function(a,b) paste (a,b,sep="")

	pcaplots <- args$outputdir %&% "/pca_plots.pdf"
	pcs <- read.table(args$vec,header=T)
	eval <- scan(args$val)[1:10]
	skree<-round(eval/sum(eval),3)
	skree<-cbind.data.frame(skree,c(1,2,3,4,5,6,7,8,9,10))
	colnames(skree)<-c("percent_var", "PC")

	pdf(pcaplots)

	ggplot(data=skree, aes(x=PC, y=percent_var)) + geom_point() + geom_line() + scale_x_continuous(breaks = 1:10) + ggtitle("Proportion of variance explained")

	#PCA Plot 1 (PC1 vs PC2)
	ggplot() + geom_point(data=pcs,aes(x=PC1,y=PC2)) + theme_bw() + scale_colour_brewer(palette="Set1") + ggtitle("PC1 vs PC2")

	#PCA Plot 2 (PC1 vs PC3)
	ggplot() + geom_point(data=pcs,aes(x=PC1,y=PC3)) + theme_bw() + scale_colour_brewer(palette="Set1") + ggtitle("PC1 vs PC3")

	#PCA Plot 1 (PC2 vs PC3)
	ggplot() + geom_point(data=pcs,aes(x=PC2,y=PC3)) + theme_bw() + scale_colour_brewer(palette="Set1") + ggtitle("PC2 vs PC3")

	dev.off()
	#write.table(samples, args$QCdir %&% "/PCA/GWAS_PCA.txt",quote=F,row.names=F,col.names=F)

	#afrpcs <- read.table("/home/angela/px_yri_chol/QC/QCStep6/QCStep6e/QCStep6e.evec",skip=1)
	#afrcdf <- afrpcs %>% rename(PC1=V2,PC2=V3,PC3=V4,PC4=V5,PC5=V6,PC6=V7,PC7=V8,PC8=V9,PC9=V10,PC10=V11) %>% mutate(pop=ifelse(grepl("TC",V1),"GWAS","GWAS"))
	#eval <- scan("/home/angela/px_yri_chol/QC/QCStep6/QCStep6e/QCStep6e.eval")[1:10]
	#round(eval/sum(eval),3)