Skip to content

Instantly share code, notes, and snippets.

@jwaage
Created January 25, 2016 11:10
Show Gist options
  • Save jwaage/82b61ee713eac7b4fd10 to your computer and use it in GitHub Desktop.
Save jwaage/82b61ee713eac7b4fd10 to your computer and use it in GitHub Desktop.
Get genotypes from SQL server (must be on COPSAC net)
getSNPImputed <- function(SNPs)
{
#Counting allele 1
if (!require("data.table")) stop("Install packages data.table")
if (!require("RMySQL")) stop("Install packages RMySQL")
if (!require("rio")) stop("Install packages rio")
if (!require("gtools")) stop("Install packages gtools")
convertProbsToDoses <- function(props, alleleToReturnAorB="A")
{
doseSplit <- apply(props, FUN=function(x){strsplit(x["doses"],split=";")}, MARGIN=1)
doseSplit <- lapply(doseSplit, FUN=function(x){as.numeric(x$"doses")})
doses <- do.call("rbind", doseSplit)
sequentialIndex <- seq(1,ncol(doses),3)
AADoses <- doses[,sequentialIndex]
ABDoses <- doses[,sequentialIndex+1]
BBDoses <- doses[,sequentialIndex+2]
if (alleleToReturnAorB=="A") return ((AADoses*2)+(ABDoses))
if (alleleToReturnAorB=="B") return ((BBDoses*2)+(ABDoses))
}
SNPs_formatted <- paste("'", SNPs, "'", sep="", collapse=",")
get_probs_string <- paste("SELECT * FROM doses WHERE rs_id in (", SNPs_formatted, ")")
get_info_string <- paste("SELECT * FROM info WHERE rs_id in (", SNPs_formatted, ")")
message("Connecting to sql-server..")
con <- dbConnect(RMySQL::MySQL(), dbname = "EE1000g", user="copsac", password="copsac", host="192.168.1.50")
message("Fetching doses..")
rs <- dbSendQuery(con, get_probs_string)
probs <- dbFetch(rs)
if(!nrow(probs)) stop("No SNPs are imputed...")
doses <- convertProbsToDoses(probs)
message("Fetching metadata")
rs <- dbSendQuery(con, get_info_string)
info <- dbFetch(rs, n=-1)
rs <- dbSendQuery(con, "SELECT * FROM samples")
samples <- dbFetch(rs, n=-1)
if(nrow(info))
{
message("Exporting data...")
export(info, "info.xlsx")
if (is.null(nrow(doses)))
{ doses_DT <- data.table(doses) } else
{ doses_DT <- data.table(t(doses)) }
colnames(doses_DT) <- info$rs_id
final_DT <- data.table(samples, doses_DT)
final_DT <- final_DT[mixedorder(final_DT$ID)]
final_DT[ nchar(ID)<4, source:="COPSAC2000"]
final_DT[ nchar(ID)==4, source:="COPSAC2010"]
final_DT[ nchar(ID)>4, source:="PKU_SEVERE_ASTHMA"]
colnames(final_DT)[9] <- paste(colnames(final_DT)[9], as.character(probs[5]), sep="_")
export(final_DT, "genotypes.xlsx")
} else
{
stop("No rows returned")
}
}
getSNPGenotyped <- function(SNPs)
{
if (!require("snpStats")) stop("Install packages snpStats from BioConductor")
message("Loading genotypes...")
load('~/Dropbox/dbac/01_projekter/major projects/copsac/gwas/50_exome_express_source/source_V3.RData')
#load("/media/data/original_data/Exome_express/genotyped_qced_for_imputation/source_V3.RData")
message("Processing genotypes...")
geno <- genotypes@.Data
# geno <- genotypesOK@.Data
geno <- as.numeric(geno)
dim(geno) = c(nrow(genotypes), ncol(genotypes))
gc()
if(FALSE%in%(SNPs%in%(colnames(genotypes)))) stop ("1 or more SNPs not genotypes") else
{
chip <- data.frame(id=fam_info,
geno=as.integer(genotypes[,SNPs])-1,
stringsAsFactors=F)
}
return (chip)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment