Last active
October 30, 2019 03:06
-
-
Save geneva/5777325 to your computer and use it in GitHub Desktop.
A collection of R Functions for analysis of AFLP data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################### | |
# Functions for the analysis of AFLP data # | |
# Anthony J Geneva # | |
# 9 April 2013 # | |
########################################### | |
########################################### | |
# This function converts the binary table | |
# output from RawGeno to a format that can | |
# be read by AFLPScore | |
# To use type | |
# RawGeno_2_AFLPscore("outfile.txt") | |
# to create a coverted file with | |
# name outfile.txt | |
########################################### | |
RawGeno_2_AFLPscore <- function(outfilename) | |
{ | |
dat <- data.binary$data.height.raw | |
tdat <- t(dat) | |
names <- vector(length = ncol(tdat)) | |
names[1] <-"" | |
for (i in 2:ncol(tdat)) | |
{ | |
names[i] <- paste("Loc_",i,sep="") | |
} | |
colnames(tdat) <- names | |
tdat[which(tdat==0)] = NA | |
write.table(tdat, outfilename, na="", quote=FALSE, sep="\t") | |
} | |
########################################### | |
# end function | |
########################################### | |
########################################### | |
# BEGIN Error Rate Functions | |
# | |
# the makeTable function borrows heavily | |
# from AFLPscore v1.4 by Raj Whitlock | |
# shef.ac.uk/molecol/software/aflpscore | |
# | |
# Whitlock et al. 2008 Mol Ecol Res. 8:725-735 | |
########################################### | |
makeTable <- function(filename,binthresh,allelethresh){ | |
gen<-read.delim(paste(filename),header=TRUE) | |
nloci0<-dim(gen)[2] | |
gen[is.na(gen)] <- 0 | |
empty_rows<-as.vector(gen[rowSums(gen)==0,1]) | |
emptyindex<-as.numeric(rownames(gen[rowSums(gen)==0,])) | |
if((length(emptyindex)>0)==T){gen<-gen[-emptyindex,]} | |
gen<-as.data.frame(gen) | |
nloci<-dim(gen)[2] | |
rowss<-as.vector(rowSums(gen,na.rm=TRUE)) | |
medianintense<-median(rowss) | |
intenseweight<-medianintense/rowss | |
gen2<-gen*intenseweight | |
locusintense2<-apply(gen2,2,FUN=function(x)((sum(x[x>0]))/(length(x[x>0])))) | |
locusintense2[is.na(locusintense2)] <- 0 | |
locusintense2[locusintense2<binthresh]<-0 | |
locusintense2[locusintense2>=binthresh]<-1 | |
ret_loci<-locusintense2[locusintense2==1] | |
gen3<-gen2[,names(ret_loci)] | |
gen3[gen3<allelethresh]<-0 | |
gen3[gen3>=allelethresh]<-1 | |
row.names(gen3) <- row.names(gen) | |
write.table(gen3, file=paste(filename,"_L",binthresh,"_P",allelethresh,".txt",sep=""), sep="\t", col.names=TRUE, row.names=TRUE, quote=FALSE) | |
} | |
errorRate <- function(filename, duplicates){ | |
dat <- read.table(filename, header=T) | |
set <- matrix(ncol=ncol(dat), nrow=nrow(dat)) | |
i <- 3 | |
set <- dat[1,]+dat[2,] | |
while(i<((2*duplicates)+1)) | |
{ | |
x <- dat[i,]+dat[(i+1),] | |
set <- rbind(set,x) | |
i <- i + 2 | |
} | |
error <- sum(set[,]==1)/(ncol(dat)*duplicates) | |
return(error) | |
} | |
########################################### | |
# END Error Rate Functions | |
########################################### | |
########################################### | |
# BEGIN Error Rate Script | |
# | |
# this script creates AFLP scoring matrices | |
# using a range of locus and phenotype | |
# thersholds then assesses error rate given | |
# a set of duplicate samples at the top of | |
# a phenotype table | |
########################################### | |
#setwd(###working directory where the files are###) | |
files <- c("DM_Plate1_m47_e1_RawGeno.txt", "DM_Plate1_m47_e2_RawGeno.txt", | |
"DM_Plate1_m49_e1_RawGeno.txt", "DM_Plate1_m49_e2_RawGeno.txt", | |
"DM_Plate1_m51_e1_RawGeno.txt", "DM_Plate1_m51_e2_RawGeno.txt") # list filenames | |
dups <- c(16,16,16,16,16,16) #number of duplicate pairs per file | |
locus_list <- c(100,250,500,750) | |
phenotype_list <- c(100,250,500,750,1000) | |
write.table("Error Rates", file="error_Tables.csv", col.names=FALSE, row.names=FALSE, quote=FALSE, sep=",") | |
write.table("\n", file="error_Tables.csv", append=T,col.names=FALSE, row.names=FALSE, sep=",", quote=F) | |
for (i in 1:length(files)){ | |
err <- matrix(ncol=length(locus_list), nrow=length(phenotype_list)+1) | |
colnames(err) <- locus_list | |
row.names(err) <- c(phenotype_list,"loci") | |
for (j in 1:length(locus_list)){ | |
for (k in 1:length(phenotype_list)){ | |
try(makeTable(files[i],locus_list[j],phenotype_list[k])) | |
try(err[k,j] <- errorRate(paste(files[i],"_L",locus_list[j],"_P",phenotype_list[k],".txt",sep=""),dups[i])) | |
} | |
try(err[nrow(err),j] <- ncol(read.table(paste(files[i],"_L",locus_list[j],"_P",phenotype_list[k],".txt",sep=""), header=T))) | |
} | |
write.table(paste(files[i]), file="error_Tables.csv", append=T,col.names=FALSE, row.names=FALSE, sep=",", quote=F) | |
write.table(err, file="error_Tables.csv", quote=FALSE, append=T, sep=",") | |
write.table("\n", file="error_Tables.csv", append=T,col.names=FALSE, row.names=FALSE, sep=",", quote=F) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment