jimmyodonnell/heterozygote_combine

## heterozygote_combine
# For any two DNA sequences, this code will generate a single sequence which represents differences among them with the IUPAC ambiguities.

het.cbm <- function(x1, x2){
	if (length(x1)!=length(x2)) stop("sequences are not the same length!")
	oot <- paste(x1, x2, sep="")
# define nucleotides
	nucleotides<-c("A", "C", "G", "T")

# make a matrix of all possible combinations of nucleotides
	combos <- matrix(NA, 4,4)
	dimnames(combos)<-list(nucleotides,nucleotides)
	for(i in 1:length(nucleotides)){
		for(j in 1:length(nucleotides)){
			combos[i,j] <- paste(nucleotides[i], nucleotides[j], sep="")
		}
	}

# The IUPAC ambiguity codes (ordered appropriately according to our matrix- by columns then rows)
IUPAC <- c("A", "M", "R", "W", "M", "C", "S", "Y", "R", "S", "G", "K", "W", "Y", "K", "T")

# put that into a matrix format (might not be necessary)
outie <- matrix(IUPAC, 4,4)
dimnames(outie) <- list(nucleotides,nucleotides)

# What is the IUPAC code for each of the elements of our pasted together sequence?
# outie[match(oot, combos)]
# IUPAC[match(oot, combos)]
yep <- outie[match(oot, combos)]
yep
}

# Example:
fakedata<-c("A","C","G","T","A","T") # Make up a DNA sequence
seq1<-c(fakedata,fakedata) # paste it end to end
seq2<-c(fakedata,rev(fakedata)) # paste the forward and reverse sequence end to end (so we're sure we'll have differences)
het.cbm(seq1,seq2)


# THE IUPAC CODES:
# R	A or G
# Y	C or T
# S	G or C
# W	A or T
# K	G or T
# M	A or C
# N	any base
	# For any two DNA sequences, this code will generate a single sequence which represents differences among them with the IUPAC ambiguities.

	het.cbm <- function(x1, x2){
	if (length(x1)!=length(x2)) stop("sequences are not the same length!")
	oot <- paste(x1, x2, sep="")
	# define nucleotides
	nucleotides<-c("A", "C", "G", "T")

	# make a matrix of all possible combinations of nucleotides
	combos <- matrix(NA, 4,4)
	dimnames(combos)<-list(nucleotides,nucleotides)
	for(i in 1:length(nucleotides)){
	for(j in 1:length(nucleotides)){
	combos[i,j] <- paste(nucleotides[i], nucleotides[j], sep="")
	}
	}

	# The IUPAC ambiguity codes (ordered appropriately according to our matrix- by columns then rows)
	IUPAC <- c("A", "M", "R", "W", "M", "C", "S", "Y", "R", "S", "G", "K", "W", "Y", "K", "T")

	# put that into a matrix format (might not be necessary)
	outie <- matrix(IUPAC, 4,4)
	dimnames(outie) <- list(nucleotides,nucleotides)

	# What is the IUPAC code for each of the elements of our pasted together sequence?
	# outie[match(oot, combos)]
	# IUPAC[match(oot, combos)]
	yep <- outie[match(oot, combos)]
	yep
	}

	# Example:
	fakedata<-c("A","C","G","T","A","T") # Make up a DNA sequence
	seq1<-c(fakedata,fakedata) # paste it end to end
	seq2<-c(fakedata,rev(fakedata)) # paste the forward and reverse sequence end to end (so we're sure we'll have differences)
	het.cbm(seq1,seq2)


	# THE IUPAC CODES:
	# R A or G
	# Y C or T
	# S G or C
	# W A or T
	# K G or T
	# M A or C
	# N any base