Skip to content

Instantly share code, notes, and snippets.

@zkamvar
Last active Aug 29, 2015
Embed
What would you like to do?
Polyploid data generator
library("stringr")
library("adegenet")
stopifnot(packageVersion("adegenet") >= 2)
generate_polyploid_data <- function(nind = 10, nloc = 2, maxploid = 4, sep = "/", genind = TRUE){
locnames <- paste("locus", seq(nloc), sep = "_")
indnames <- paste("sample", seq(nind), sep = "_")
res <- lapply(seq(nloc), generate_locus, nind, maxploid, sep)
names(res) <- locnames
res <- data.frame(res, row.names = indnames, stringsAsFactors = FALSE)
ploidy <- get_sample_ploidy(res)
if (genind){
res <- df2genind(res, sep = "/", ploidy = ploidy)
}
return(res)
}
generate_locus <- function(x, nind = 10, maxploid = 4, sep = "/"){
vapply(seq(nind), locus_sampler, character(1), maxploid, sep)
}
locus_sampler <- function(x, maxploid, sep = "/"){
nalleles <- sample(maxploid, 1)
paste(sample(100, nalleles, replace = TRUE), collapse = sep)
}
get_sample_ploidy <- function(z){
res <- apply(z, 1, function(i) vapply(str_extract_all(i, "/"), length, integer(1)))
apply(res, 2, max) + 1
}
fill_zero <- function(x, maxploid, mat = FALSE){
if (length(x) < maxploid){
if (!mat){
zeroes <- paste(rep(0, maxploid - length(x)), collapse = "/")
res <- paste(x, collapse = "/")
res <- paste(zeroes, res, sep = "/")
} else {
res <- c(rep(0.0, maxploid - length(x)), as.numeric(x))
}
} else {
if (!mat){
res <- paste(x, collapse = "/")
} else {
res <- as.numeric(x)
}
}
return(res)
}
fill_zero_locus <- function(x, sep = "/", maxploid, mat = FALSE){
x <- strsplit(x, sep)
if (mat){
result <- numeric(maxploid)
} else {
result <- character(1)
}
return(t(vapply(x, fill_zero, result, maxploid, mat)))
}
generate_bruvo_mat <- function(x, maxploid, sep = "/", mat = FALSE){
if (mat){
result <- matrix(numeric(nrow(x)*maxploid), ncol = maxploid, nrow = nrow(x))
} else {
result <- character(nrow(x))
}
res <- vapply(x, fill_zero_locus, result, sep, maxploid, mat)
if (length(dim(res)) > 2){
redim <- dim(res)
dim(res) <- c(redim[1], redim[2]*redim[3])
xcols <- colnames(x)
maxseq <- seq(maxploid)
colnames(res) <- vapply(xcols, FUN = paste,
FUN.VALUE = character(maxploid), maxseq, sep = ".")
} else {
colnames(res) <- colnames(x)
}
rownames(res) <- rownames(x)
return(res)
}
@zkamvar
Copy link
Author

zkamvar commented Apr 3, 2015

Example:

generate_bruvo_mat(generate_polyploid_data(genind = FALSE), 4, mat = TRUE)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment