Skip to content

Instantly share code, notes, and snippets.

@zkamvar
Created October 16, 2013 21:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zkamvar/7015503 to your computer and use it in GitHub Desktop.
Save zkamvar/7015503 to your computer and use it in GitHub Desktop.
A function for converting hapmap formatted files to data frames for the purpose of importing them into adegenet's genlight object.
#==============================================================================#
# hapmap2df.r Convert hapmap files to a data frame.
#
# Author: Zhian N. Kamvar
# License: GPLv3
# Year: 2013
# Waranty: NONE
#
# This function will read in hapmap formatted files and convert them to a data
# frame for the purposes of importing them to genlight objects. Example:
#
# if(!require(adegenet)){
# install.packages("adegenet")
# }
# hmp <- read.table(my_hapmap_file.hmp, sep = "\t", skip = 1)
# names(hmp) <- strsplit(readLines(my_hapmap_file.hmp, 1), "\t")[[1]]
# hmp.df <- hapmap2df(hmp)
#
# # If you have windows:
# hmp.gl <- new("genlight", hmp.df, mc.cores = 1L)
#
# # Else:
# hmp.gl <- new("genlight", hmp.df)
#
# # Visual stuff from the adegenet genomics manual:
# plot(hmp.gl)
# myFreq <- glMean(hmp.gl)
# myFreq <- c(myFreq, 1-myFreq)
# hist(myFreq, proba = TRUE, col = "darkseagreen3", xlab = "Allele frequencies",
# main = "Distribution of allele frequencies", nclass = 20)
# temp <- density(myFreq, bw = .05)
# lines(temp$x, temp$y*2, lwd = 3)
#==============================================================================#
hapmap2df <- function(x, gt.col = 2, first.col = 12, last.col = ncol(x)){
ambi <- c("S","M","Y","R","K","W")
names(ambi) <- c("A/T","C/G","A/C","G/T","A/G","C/T")
genotypes <- levels(x[[gt.col]])
major_alleles <- unlist(strsplit(genotypes, "/"))
major_alleles <- major_alleles[1:(length(major_alleles)) %% 2 == 1]
print(major_alleles)
levels(x[[gt.col]]) <- major_alleles
gtlevs <- unique(levels(x[[gt.col]]))
print(gtlevs)
x[c(gt.col, first.col:last.col)] <- lapply(x[c(gt.col, first.col:last.col)], as.character)
x[first.col:last.col][apply(x[first.col:last.col], 2, `==`, x[[gt.col]])] <- 2
x[first.col:last.col][apply(x[first.col:last.col], 2, `%in%`, gtlevs)] <- 0
x[first.col:last.col][apply(x[first.col:last.col], 2, `%in%`, "N")] <- NA
x[first.col:last.col][apply(x[first.col:last.col], 2, `%in%`, ambi)] <- 1
out_matrix <- t(x[first.col:last.col])
out_matrix <- apply(out_matrix, 2, as.numeric)
colnames(out_matrix) <- x[[1]]
rownames(out_matrix) <- names(x)[first.col:last.col]
return(out_matrix)
}
if ((.Platform)$OS.type == "windows"){
mclapply <- function (X, FUN, ..., mc.preschedule = TRUE, mc.set.seed = TRUE,
mc.silent = FALSE, mc.cores = 1L, mc.cleanup = TRUE,
mc.allow.recursive = TRUE){
cores <- as.integer(mc.cores)
if (cores < 1L)
stop("'mc.cores' must be >= 1")
if (cores > 1L)
lapply(X, FUN, ...)
}
}
@zkamvar
Copy link
Author

zkamvar commented Feb 13, 2015

installation

devtools::source_gist(7015503)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment