Created
October 16, 2013 21:48
-
-
Save zkamvar/7015503 to your computer and use it in GitHub Desktop.
A function for converting hapmap formatted files to data frames for the purpose of importing them into adegenet's genlight object.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#==============================================================================# | |
# hapmap2df.r Convert hapmap files to a data frame. | |
# | |
# Author: Zhian N. Kamvar | |
# License: GPLv3 | |
# Year: 2013 | |
# Waranty: NONE | |
# | |
# This function will read in hapmap formatted files and convert them to a data | |
# frame for the purposes of importing them to genlight objects. Example: | |
# | |
# if(!require(adegenet)){ | |
# install.packages("adegenet") | |
# } | |
# hmp <- read.table(my_hapmap_file.hmp, sep = "\t", skip = 1) | |
# names(hmp) <- strsplit(readLines(my_hapmap_file.hmp, 1), "\t")[[1]] | |
# hmp.df <- hapmap2df(hmp) | |
# | |
# # If you have windows: | |
# hmp.gl <- new("genlight", hmp.df, mc.cores = 1L) | |
# | |
# # Else: | |
# hmp.gl <- new("genlight", hmp.df) | |
# | |
# # Visual stuff from the adegenet genomics manual: | |
# plot(hmp.gl) | |
# myFreq <- glMean(hmp.gl) | |
# myFreq <- c(myFreq, 1-myFreq) | |
# hist(myFreq, proba = TRUE, col = "darkseagreen3", xlab = "Allele frequencies", | |
# main = "Distribution of allele frequencies", nclass = 20) | |
# temp <- density(myFreq, bw = .05) | |
# lines(temp$x, temp$y*2, lwd = 3) | |
#==============================================================================# | |
hapmap2df <- function(x, gt.col = 2, first.col = 12, last.col = ncol(x)){ | |
ambi <- c("S","M","Y","R","K","W") | |
names(ambi) <- c("A/T","C/G","A/C","G/T","A/G","C/T") | |
genotypes <- levels(x[[gt.col]]) | |
major_alleles <- unlist(strsplit(genotypes, "/")) | |
major_alleles <- major_alleles[1:(length(major_alleles)) %% 2 == 1] | |
print(major_alleles) | |
levels(x[[gt.col]]) <- major_alleles | |
gtlevs <- unique(levels(x[[gt.col]])) | |
print(gtlevs) | |
x[c(gt.col, first.col:last.col)] <- lapply(x[c(gt.col, first.col:last.col)], as.character) | |
x[first.col:last.col][apply(x[first.col:last.col], 2, `==`, x[[gt.col]])] <- 2 | |
x[first.col:last.col][apply(x[first.col:last.col], 2, `%in%`, gtlevs)] <- 0 | |
x[first.col:last.col][apply(x[first.col:last.col], 2, `%in%`, "N")] <- NA | |
x[first.col:last.col][apply(x[first.col:last.col], 2, `%in%`, ambi)] <- 1 | |
out_matrix <- t(x[first.col:last.col]) | |
out_matrix <- apply(out_matrix, 2, as.numeric) | |
colnames(out_matrix) <- x[[1]] | |
rownames(out_matrix) <- names(x)[first.col:last.col] | |
return(out_matrix) | |
} | |
if ((.Platform)$OS.type == "windows"){ | |
mclapply <- function (X, FUN, ..., mc.preschedule = TRUE, mc.set.seed = TRUE, | |
mc.silent = FALSE, mc.cores = 1L, mc.cleanup = TRUE, | |
mc.allow.recursive = TRUE){ | |
cores <- as.integer(mc.cores) | |
if (cores < 1L) | |
stop("'mc.cores' must be >= 1") | |
if (cores > 1L) | |
lapply(X, FUN, ...) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
installation