Skip to content

Instantly share code, notes, and snippets.

@geneva
Last active October 30, 2019 03:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save geneva/5777325 to your computer and use it in GitHub Desktop.
Save geneva/5777325 to your computer and use it in GitHub Desktop.
A collection of R Functions for analysis of AFLP data
###########################################
# Functions for the analysis of AFLP data #
# Anthony J Geneva #
# 9 April 2013 #
###########################################
###########################################
# This function converts the binary table
# output from RawGeno to a format that can
# be read by AFLPScore
# To use type
# RawGeno_2_AFLPscore("outfile.txt")
# to create a coverted file with
# name outfile.txt
###########################################
RawGeno_2_AFLPscore <- function(outfilename)
{
dat <- data.binary$data.height.raw
tdat <- t(dat)
names <- vector(length = ncol(tdat))
names[1] <-""
for (i in 2:ncol(tdat))
{
names[i] <- paste("Loc_",i,sep="")
}
colnames(tdat) <- names
tdat[which(tdat==0)] = NA
write.table(tdat, outfilename, na="", quote=FALSE, sep="\t")
}
###########################################
# end function
###########################################
###########################################
# BEGIN Error Rate Functions
#
# the makeTable function borrows heavily
# from AFLPscore v1.4 by Raj Whitlock
# shef.ac.uk/molecol/software/aflpscore
#
# Whitlock et al. 2008 Mol Ecol Res. 8:725-735
###########################################
makeTable <- function(filename,binthresh,allelethresh){
gen<-read.delim(paste(filename),header=TRUE)
nloci0<-dim(gen)[2]
gen[is.na(gen)] <- 0
empty_rows<-as.vector(gen[rowSums(gen)==0,1])
emptyindex<-as.numeric(rownames(gen[rowSums(gen)==0,]))
if((length(emptyindex)>0)==T){gen<-gen[-emptyindex,]}
gen<-as.data.frame(gen)
nloci<-dim(gen)[2]
rowss<-as.vector(rowSums(gen,na.rm=TRUE))
medianintense<-median(rowss)
intenseweight<-medianintense/rowss
gen2<-gen*intenseweight
locusintense2<-apply(gen2,2,FUN=function(x)((sum(x[x>0]))/(length(x[x>0]))))
locusintense2[is.na(locusintense2)] <- 0
locusintense2[locusintense2<binthresh]<-0
locusintense2[locusintense2>=binthresh]<-1
ret_loci<-locusintense2[locusintense2==1]
gen3<-gen2[,names(ret_loci)]
gen3[gen3<allelethresh]<-0
gen3[gen3>=allelethresh]<-1
row.names(gen3) <- row.names(gen)
write.table(gen3, file=paste(filename,"_L",binthresh,"_P",allelethresh,".txt",sep=""), sep="\t", col.names=TRUE, row.names=TRUE, quote=FALSE)
}
errorRate <- function(filename, duplicates){
dat <- read.table(filename, header=T)
set <- matrix(ncol=ncol(dat), nrow=nrow(dat))
i <- 3
set <- dat[1,]+dat[2,]
while(i<((2*duplicates)+1))
{
x <- dat[i,]+dat[(i+1),]
set <- rbind(set,x)
i <- i + 2
}
error <- sum(set[,]==1)/(ncol(dat)*duplicates)
return(error)
}
###########################################
# END Error Rate Functions
###########################################
###########################################
# BEGIN Error Rate Script
#
# this script creates AFLP scoring matrices
# using a range of locus and phenotype
# thersholds then assesses error rate given
# a set of duplicate samples at the top of
# a phenotype table
###########################################
#setwd(###working directory where the files are###)
files <- c("DM_Plate1_m47_e1_RawGeno.txt", "DM_Plate1_m47_e2_RawGeno.txt",
"DM_Plate1_m49_e1_RawGeno.txt", "DM_Plate1_m49_e2_RawGeno.txt",
"DM_Plate1_m51_e1_RawGeno.txt", "DM_Plate1_m51_e2_RawGeno.txt") # list filenames
dups <- c(16,16,16,16,16,16) #number of duplicate pairs per file
locus_list <- c(100,250,500,750)
phenotype_list <- c(100,250,500,750,1000)
write.table("Error Rates", file="error_Tables.csv", col.names=FALSE, row.names=FALSE, quote=FALSE, sep=",")
write.table("\n", file="error_Tables.csv", append=T,col.names=FALSE, row.names=FALSE, sep=",", quote=F)
for (i in 1:length(files)){
err <- matrix(ncol=length(locus_list), nrow=length(phenotype_list)+1)
colnames(err) <- locus_list
row.names(err) <- c(phenotype_list,"loci")
for (j in 1:length(locus_list)){
for (k in 1:length(phenotype_list)){
try(makeTable(files[i],locus_list[j],phenotype_list[k]))
try(err[k,j] <- errorRate(paste(files[i],"_L",locus_list[j],"_P",phenotype_list[k],".txt",sep=""),dups[i]))
}
try(err[nrow(err),j] <- ncol(read.table(paste(files[i],"_L",locus_list[j],"_P",phenotype_list[k],".txt",sep=""), header=T)))
}
write.table(paste(files[i]), file="error_Tables.csv", append=T,col.names=FALSE, row.names=FALSE, sep=",", quote=F)
write.table(err, file="error_Tables.csv", quote=FALSE, append=T, sep=",")
write.table("\n", file="error_Tables.csv", append=T,col.names=FALSE, row.names=FALSE, sep=",", quote=F)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment