Last active
August 29, 2015 14:26
-
-
Save jaredsampson/afe24bd0f002d818bb90 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Bio | |
print(Bio.__version__) | |
from Bio.Seq import Seq | |
def degenerate_codon(codon): | |
try: | |
#translate codon | |
seq = Seq(codon) | |
aa = str(seq.translate()) | |
#dictionary relating aa to degnerate codon | |
degen_dict = {'A': 'GCN', 'C': 'TGY', 'D': 'GAY', 'E': 'GAR', | |
'F': 'TTY', 'G': 'GGN', 'H': 'CAY', 'I': 'ATH', | |
'K': 'AAR', 'M': 'ATG', 'N': 'AAY', 'P': 'CCN', | |
'Q': 'CAR', 'T': 'ACN', 'V': 'GTN', 'W': 'TGG', | |
'X': 'NNN', 'Y': 'TAY'} | |
#dealing with multiple serine codons | |
if aa == 'S': | |
if codon in ('AGT', 'ACG'): | |
return('AGY') | |
else: | |
return('TCN') | |
#dealing with multiple leucine codons | |
if aa == 'L': | |
if codon in ('TTA', 'TTG'): | |
return('TTR') | |
else: | |
return('CTN') | |
# dealing with multiple arginine codons | |
if aa == 'R': | |
if codon in ('AGA', 'AGG'): | |
return('AGR') | |
else: | |
return('CGN') | |
#main block to deal with most of the codons | |
elif aa in degen_dict.keys(): | |
#convert back to string before returning the codon | |
return(degen_dict[aa]) | |
else: | |
#Return the stop codons | |
return(codon.upper()) | |
#The except block allows the return of --- and ??? if those are the codons | |
except: | |
if codon == '???': | |
return('NNN') | |
else: | |
return(codon) | |
def degenerate_sequence(seq): | |
#if not divisible by 3, slice off end of sequence to make divisible by 3 | |
seq_length = len(seq) | |
i=0 | |
degen_string="" | |
while i < seq_length: | |
degen_string += degenerate_codon(seq[i:i+3]) | |
i = i+3 | |
return degen_string | |
test_seq ='TTTTTCTTATTGCTTCTCCTACTGATTATCATAATGGTTGTTGTCGTAGTGTCTTCCTCATCGCCTCCCCCACCGACTACCACAACGGCTGCCGCAGCGTATTACTAATAGCATCACCAACAGAATAACAAAAAGGATGACGAAGAGTGTTGCTGATGGCGTCGCCGACGGAGTAGCAGAAGGGGTGGCGGAGGGNNN---???' | |
test_output=degenerate_sequence(test_seq) | |
#using the webtool @http://www.phylotools.com/ptdegenwebservice.htm I have the Perl output | |
#perl_output = 'TTYTTYYTNYTNYTNYTNYTNYTNATHATHATHATGGTNGTNGTNGTNGTNTCNTCNTCNTCNCCNCCNCCNCCNACNACNACNACNGCNGCNGCNGCNTAYTAYTAATAGCAYCAYCARCARAAYAAYAARAARGAYGAYGARGARTGYTGYTGATGGMGNMGNMGNMGNAGYAGYMGNMGNGGNGGNGGNGGNNNN---NNN' | |
#test_output = 'TTYTTYYTNYTNYTNYTNYTNYTNATHATHATHATGGTNGTNGTNGTNGTNTCNTCNTCNTCNCCNCCNCCNCCNACNACNACNACNGCNGCNGCNGCNTAYTAYTAATAGCAYCAYCARCARAAYAAYAARAARGAYGAYGARGARTGYTGYTGATGGMGNMGNMGNMGNAGYTCNMGNMGNGGNGGNGGNGGNNNN---NNN' | |
#print(test_output) | |
#print(perl_output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment