Created
January 25, 2016 11:10
-
-
Save jwaage/82b61ee713eac7b4fd10 to your computer and use it in GitHub Desktop.
Get genotypes from SQL server (must be on COPSAC net)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
getSNPImputed <- function(SNPs) | |
{ | |
#Counting allele 1 | |
if (!require("data.table")) stop("Install packages data.table") | |
if (!require("RMySQL")) stop("Install packages RMySQL") | |
if (!require("rio")) stop("Install packages rio") | |
if (!require("gtools")) stop("Install packages gtools") | |
convertProbsToDoses <- function(props, alleleToReturnAorB="A") | |
{ | |
doseSplit <- apply(props, FUN=function(x){strsplit(x["doses"],split=";")}, MARGIN=1) | |
doseSplit <- lapply(doseSplit, FUN=function(x){as.numeric(x$"doses")}) | |
doses <- do.call("rbind", doseSplit) | |
sequentialIndex <- seq(1,ncol(doses),3) | |
AADoses <- doses[,sequentialIndex] | |
ABDoses <- doses[,sequentialIndex+1] | |
BBDoses <- doses[,sequentialIndex+2] | |
if (alleleToReturnAorB=="A") return ((AADoses*2)+(ABDoses)) | |
if (alleleToReturnAorB=="B") return ((BBDoses*2)+(ABDoses)) | |
} | |
SNPs_formatted <- paste("'", SNPs, "'", sep="", collapse=",") | |
get_probs_string <- paste("SELECT * FROM doses WHERE rs_id in (", SNPs_formatted, ")") | |
get_info_string <- paste("SELECT * FROM info WHERE rs_id in (", SNPs_formatted, ")") | |
message("Connecting to sql-server..") | |
con <- dbConnect(RMySQL::MySQL(), dbname = "EE1000g", user="copsac", password="copsac", host="192.168.1.50") | |
message("Fetching doses..") | |
rs <- dbSendQuery(con, get_probs_string) | |
probs <- dbFetch(rs) | |
if(!nrow(probs)) stop("No SNPs are imputed...") | |
doses <- convertProbsToDoses(probs) | |
message("Fetching metadata") | |
rs <- dbSendQuery(con, get_info_string) | |
info <- dbFetch(rs, n=-1) | |
rs <- dbSendQuery(con, "SELECT * FROM samples") | |
samples <- dbFetch(rs, n=-1) | |
if(nrow(info)) | |
{ | |
message("Exporting data...") | |
export(info, "info.xlsx") | |
if (is.null(nrow(doses))) | |
{ doses_DT <- data.table(doses) } else | |
{ doses_DT <- data.table(t(doses)) } | |
colnames(doses_DT) <- info$rs_id | |
final_DT <- data.table(samples, doses_DT) | |
final_DT <- final_DT[mixedorder(final_DT$ID)] | |
final_DT[ nchar(ID)<4, source:="COPSAC2000"] | |
final_DT[ nchar(ID)==4, source:="COPSAC2010"] | |
final_DT[ nchar(ID)>4, source:="PKU_SEVERE_ASTHMA"] | |
colnames(final_DT)[9] <- paste(colnames(final_DT)[9], as.character(probs[5]), sep="_") | |
export(final_DT, "genotypes.xlsx") | |
} else | |
{ | |
stop("No rows returned") | |
} | |
} | |
getSNPGenotyped <- function(SNPs) | |
{ | |
if (!require("snpStats")) stop("Install packages snpStats from BioConductor") | |
message("Loading genotypes...") | |
load('~/Dropbox/dbac/01_projekter/major projects/copsac/gwas/50_exome_express_source/source_V3.RData') | |
#load("/media/data/original_data/Exome_express/genotyped_qced_for_imputation/source_V3.RData") | |
message("Processing genotypes...") | |
geno <- genotypes@.Data | |
# geno <- genotypesOK@.Data | |
geno <- as.numeric(geno) | |
dim(geno) = c(nrow(genotypes), ncol(genotypes)) | |
gc() | |
if(FALSE%in%(SNPs%in%(colnames(genotypes)))) stop ("1 or more SNPs not genotypes") else | |
{ | |
chip <- data.frame(id=fam_info, | |
geno=as.integer(genotypes[,SNPs])-1, | |
stringsAsFactors=F) | |
} | |
return (chip) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment