Last active
August 29, 2015 13:57
-
-
Save lmmx/9871296 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
peptide = '1MKLHYVAVLTLAILMFLTWLPESLSCNKALCASDVSKCLIQELCQCRPGEGNCACCKECM6061LCLGALWDECCDCVGMCNPRNYADTPPTSKSTVEELHEPIPSLFRALTEGDTQLNWNIVS120121FPVAEELSHHENLVSFLETVNQPHHQNVSVPSNNVHAPYSSDKEHMCTVVYFDDCMSIHQ180181CKISCESMGASKYRWFHNACCECIGPECIDYGSKTVKCMNCMF223'.match(/[a-zA-Z]/g).join(''); | |
dna = '1CGGCGGGAGGCGCGGCCTGGCCTCGCACTCAAAGCCGCCGCAGCGCGCCCCGGGCTCGGC60 61CGACCCGGCGGGGATCTAGGGGTGGGCGACTTCGCGGGACCGTGGCGCATGTTTCCTGGG120 121AGTTACTGATCATCTTCTTTGAAGAAACATGAAGTTACACTATGTTGCTGTGCTTACTCT180 181AGCCATCCTGATGTTCCTGACATGGCTTCCAGAATCACTGAGCTGTAACAAAGCACTCTG240 241TGCTAGTGATGTGAGCAAATGCCTCATTCAGGAGCTCTGCCAGTGCCGGCCGGGAGAAGG300 301CAATTGCGCCTGCTGTAAGGAGTGCATGCTGTGTCTTGGGGCCCTTTGGGACGAGTGCTG360 361TGACTGTGTTGGTATGTGTAATCCTCGAAATTATAGTGACACACCTCCAACTTCAAAGAG420 421CACAGTGGAGGAGCTGCATGAACCGATCCCTTCTCTCTTCCGGGCACTCACAGAAGGAGA480 481TACTCAGTTGAATTGGAACATCGTTTCTTTCCCTGTTGCAGAAGAACTTTCACATCATGA540 541GAATCTGGTTTCATTTTTAGAAACTGTGAACCAGCCACACCACCAGAATGTGTCTGTCCC600 601CAGCAATAATGTTCACGCGCCTTATTCCAGTGACAAAGAACACATGTGTACTGTGGTTTA660 661TTTTGATGACTGCATGTCCATACATCAGTGTAAAATATCCTGTGAGTCCATGGGAGCATC720 721CAAATATCGCTGGTTTCATAATGCCTGCTGCGAGTGCATTGGTCCAGAATGTATTGACTA780 781TGGTAGTAAAACTGTCAAATGTATGAACTGCATGTTTTAAAGAAGACAAATGCAAACCAA840 841AGCAACTTAGTAAAATAATAGGTATAAAAAGTTATTCTGTAAGTCTGTTGGTTGTATCTT900 901GTATCAGAATCCCAGTAAGTTAAGTTGTAAAGACTTTGGAATAAGTTTCTTTTAAAAATA960 961TGACATAGCCAGTGATGTGTTTAATTATATAACTGTTCTT1000'.match(/[a-zA-Z]/g).join(''); | |
code = prompt('Insert codon table'); | |
startCodon = "ATG"; | |
var startList = []; | |
for (i=0;i<dna.match(/ATG/g).length;i++) { | |
if (i===0) { | |
startList.push(dna.indexOf("ATG")); | |
} | |
else startList.push(startList[i-1]+dna.substr(startList[i-1]+3).indexOf("ATG")); | |
} | |
// the ORFs somehow start 3 bases early?? use .substr(3)... | |
function ORF(orf,at) | |
{ | |
this.orf=orf; | |
this.at=at; | |
this.len=orf.length; | |
} | |
var ORFlist = []; | |
for (i=0;i<startList.length;i++) { | |
newORF = dna.substr(startList[i]); | |
ORFlist.push(new ORF(newORF,startList[i])); | |
} | |
function PTM(ptm,at) | |
{ | |
this.ptm=ptm; | |
this.at=at; | |
this.len=ptm.length; | |
} | |
var PTMlist = []; | |
var PTMtarget = /N.[ST]/g; // for NX(ST) | |
for (i=0;i<peptide.match(PTMtarget).length;i++) { | |
newPTM = peptide.match(PTMtarget)[0]; | |
PTMlist.push(new PTM(newPTM,peptide.indexOf(newPTM))); | |
} | |
// for now just confirm with Rubular that there's only 1 matching PTM consensus site | |
theORF = ORFlist[1].orf.substr(3); | |
signal = theORF.substr(0,25*3); | |
function codonFor(residueAt) { | |
return theORF.substr(residueAt*3,3)+" at "+residueAt*3; | |
} | |
//////////////////////////////////////////////////////////////////// | |
singles = code.match(/ [(].[)]/g); | |
function Codon(letter,codon,residue) { | |
this.letter = letter; | |
this.codon = codon; | |
this.residue = residue; | |
} | |
var shortList = []; | |
for (i=0;i<code.match(/[A-Z][A-Z][A-Z]/g).length;i++) { | |
if (code[code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i])+7] == 'p') { | |
var short = code.substr(code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i])+4,4); | |
} | |
else var short = code.substr(code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i])+4,3); | |
shortList.push(short); | |
} | |
var codonList = []; | |
for (i=0;i<code.match(/[A-Z][A-Z][A-Z]/g).length;i++) { | |
var short = code.substr(code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i]),3); | |
codonList.push(short); | |
} | |
var letters = []; | |
for (i=0;i<code.match(/[A-Z][A-Z][A-Z]/g).length;i++) { | |
if (shortList[i] == "Stop") { | |
letters.push("—"); | |
} | |
else if (code[code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i])+8] == "(") { | |
letters.push(code[code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i])+9]); | |
} | |
else letters.push(letters[i-1]); | |
} | |
myTable = []; | |
for (i=0;i<codonList.length;i++) { | |
var newCodon = new Codon(letters[i],codonList[i],shortList[i]); | |
myTable.push(newCodon); | |
console.log(newCodon); | |
} | |
// var codons = theORF.match(/.{1,3}/g); | |
var realORF = theORF.substring(0,223*3); | |
var codons = realORF.match(/.{1,3}/g); | |
function ResToCodon(residue) { | |
var results = []; | |
for (i=0;i<myTable.length;i++) { | |
if (myTable[i].residue == residue) { | |
thisCodon = myTable[i].codon; | |
results.push(thisCodon); | |
} | |
} return results; | |
} | |
var stopCodons = ResToCodon("Stop"); | |
for (i=0;i<codons.length;i++) { | |
codons[i] == stopCodons; | |
} | |
function LetterToCodon(letter) { | |
var results = []; | |
for (i=0;i<myTable.length;i++) { | |
if (myTable[i].letter == letter) { | |
thisCodon = myTable[i].codon; | |
results.push(thisCodon); | |
} | |
} return results; | |
} | |
function LetterToRes(letter) { | |
var results = []; | |
for (i=0;i<myTable.length;i++) { | |
if (myTable[i].letter == letter) { | |
thisCodon = myTable[i].codon; | |
results.push(thisCodon); | |
} | |
} | |
} | |
function ResToLetter(residue) { | |
for (i=0;i<myTable.length;i++) { | |
if (myTable[i].residue == residue) return myTable[i].letter; | |
} | |
} | |
function CodonToRes(codon) { | |
for (i=0;i<myTable.length;i++) { | |
if (myTable[i].codon == codon) return myTable[i].residue; | |
} | |
} | |
function CodonToLetter(codon) { | |
for (i=0;i<myTable.length;i++) { | |
if (myTable[i].codon == codon) return myTable[i].letter; | |
} | |
} | |
function translate(seq) { // for a nucleotide sequence split into an array of 3mers | |
var translation = []; | |
for (i=0;i<seq.length;i++) { | |
translation.push(CodonToLetter(seq[i])); | |
} | |
return translation; | |
} | |
// Find the stop codons | |
function stop(atCodon) | |
{ | |
this.orf=theORF.substr(0,(atCodon-1)*3); | |
this.at=atCodon; | |
this.len=this.orf.length; | |
} | |
var allORFs = []; | |
for (i=0;i<codons.length;i++) { | |
if (codons[i] == stopCodons[0] || codons[i] == stopCodons[1] || codons[i] == stopCodons[2]) { | |
var newbie = new stop(i); | |
allORFs.push(newbie); | |
} | |
} | |
var transcripts = []; | |
for (r=0;r<allORFs.length;r++) { | |
transcript = translate(allORFs[r].orf.match(/.{1,3}/g)); | |
transcripts.push(transcript); // must be in 3mers to translate! | |
} | |
if (typeof String.prototype.editAt !== 'function') { | |
String.prototype.editAt = function(index,mut) { | |
return this.substr(0,index)+mut+this.substr(index+1,this.length); | |
}; | |
} | |
mutORF = theORF.editAt(439,"C"); // 439A>C | |
mutCodons = mutORF.match(/.{1,3}/g); | |
// code from previous tutorial, when var seq = 'GGGGCCCTTTGGGACGAGTGCTGTGACTGTGTTGG'; | |
var seq = mutORF; | |
var mismatch = 1; | |
var GCcount = seq.match(/[GC]/g).length; | |
var GCper = GCcount/seq.length*100; | |
var Tm = 81.5+(0.41*GCper)-(675/seq.length)-mismatch/seq.length; | |
console.log(Tm); | |
anti = seq.replace(/G/g,'c').replace(/C/g,'g').replace(/A/g,'t').replace(/T/g,'a').toUpperCase(); | |
/* | |
This kills consoles :-( | |
translation = []; | |
translate(realORF.match(/.{1,3}/g)); | |
var realTranscript = translation.join(''); | |
*/ | |
codonUsageRegEx = /[A-Z]{3}\s{8,9}\d{1,2}\.\d\d\s{8,9}\d{1,2}\.\d\d\s{8,9}\d{1,2}\.\d\d/g; | |
codonRegEx = /^[A-Z]{3}/; | |
usageRegEx = /\d\.\d\d$/; | |
// Input from geneinfinity.org/sms/sms_codonusage.html | |
// usageTable = prompt().match(codonUsageRegEx); | |
function codonUsage(codon,usage) | |
{ | |
this.codon=codon; | |
this.usage=usage; | |
} | |
codonDB = []; | |
for (b=0;b<usageTable.length;b++) { | |
var usageCodon = usageTable[b].match(codonRegEx)[0]; | |
var usagePer = Math.round((theORF.match(usageTable[b].match(codonRegEx)).length/theORF.length)*10000)/100; | |
var newUsage = new codonUsage(usageCodon,usagePer); | |
codonDB.push(newUsage); | |
} | |
// Codon Usage Tabulated from GenBank tables at kazusa.or.jp/codon | |
CUTGregex = /[A-Z]{3}\s+\d+\.\d\(\s+\d+\)/g; | |
CUTusageRegEx = /\d{1,2}\.\d/; | |
K12table = prompt().match(CUTGregex); // kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=83333 | |
K12DB = []; | |
for (b=0;b<usageTable.length;b++) { | |
var newUsage = new codonUsage(K12table[b].match(codonRegEx)[0].replace(/U/g,'T'),Math.round(K12table[b].match(CUTusageRegEx)[0])/1000); | |
K12DB.push(newUsage); | |
} | |
function usageComp(codon,usage1,usage2) | |
{ | |
this.codon=codon; | |
this.usage1=usage1; | |
this.usage2=usage2; | |
} | |
function compareUsage(DB1,DB2) { | |
for (z=0;z<DB1.length;z++) { | |
for (d=0;d<DB2.length;d++) { | |
if (DB1[z].codon == DB2[d].codon) { | |
var newComp = new usageComp(DB1[z].codon,DB1[z].usage,DB2[d].usage); | |
compResults.push(newComp); | |
} | |
} | |
} | |
} | |
// This is comparing chalk with cheese, as the usage info for the protein are relative to other codons for the same AA | |
// To compare properly, take the relative uses of different amino acids! This was done for stop codons already | |
// That is, construct a new variable each time you want to use the codons, containing var ResToCodon(<Codon 3 letter code>); | |
// I don't have time to make this, commented out below is the basic structure of a parser for such a table. Divide each K12 codon by total of others' uses. | |
function codonRatesFor(res) { | |
for (i=0;i<K12DB.length;i++) { | |
if (ResToCodon(res).join().match(K12DB[i].codon) !== null) { | |
console.log(K12DB[i].codon+" "+K12DB[i].usage); | |
} | |
} | |
} | |
// E.g. for Thr codonRatesFor('Thr') ACT 0.008 + ACC 0.023 + ACA 0.006 + ACG 0.012 = 0.049 | |
// | |
/* | |
compResults = []; | |
compareUsage(codonDB,K12DB); | |
for (i=0;i<compResults.length;i++) { | |
if (compResults[i].usage1 != "0.00") { | |
console.log(compResults[i].codon+"\t"+compResults[i].usage1+"\t"+compResults[i].usage2); | |
} | |
} | |
*/ | |
for (i=0;i<compResults.length;i++) { | |
if (Math.abs(compResults[i].usage1-compResults[i].usage2) > 0.5 && compResults[i].usage2 < 0.1) { | |
console.log(compResults[i].codon+"\t"+compResults[i].usage1+"\t"+compResults[i].usage2); | |
} | |
} | |
/* | |
GCG 1.00 4.48 0.09 | |
Ala GCA 4.00 17.94 0.36 | |
Ala GCT 2.00 8.97 0.18 | |
Ala GCC 4.00 17.94 0.36 | |
Cys TGT 12.00 53.81 0.50 | |
Cys TGC 12.00 53.81 0.50 | |
Asp GAT 3.00 13.45 0.33 | |
Asp GAC 6.00 26.91 0.67 | |
Glu GAG 8.00 35.87 0.47 | |
Glu GAA 9.00 40.36 0.53 | |
Phe TTT 4.00 17.94 0.57 | |
Phe TTC 3.00 13.45 0.43 | |
Gly GGG 1.00 4.48 0.13 | |
Gly GGA 3.00 13.45 0.38 | |
Gly GGT 3.00 13.45 0.38 | |
Gly GGC 1.00 4.48 0.13 | |
His CAT 5.00 22.42 0.50 | |
His CAC 5.00 22.42 0.50 | |
Ile ATA 2.00 8.97 0.25 | |
Ile ATT 3.00 13.45 0.38 | |
Ile ATC 3.00 13.45 0.38 | |
Lys AAG 3.00 13.45 0.30 | |
Lys AAA 7.00 31.39 0.70 | |
Leu TTG 1.00 4.48 0.05 | |
Leu TTA 2.00 8.97 0.10 | |
Leu CTG 6.00 26.91 0.30 | |
Leu CTA 1.00 4.48 0.05 | |
Leu CTT 5.00 22.42 0.25 | |
Leu CTC 5.00 22.42 0.25 | |
Met ATG 9.00 40.36 1.00 | |
Asn AAT 9.00 40.36 0.69 | |
Asn AAC 4.00 17.94 0.31 | |
Pro CCG 2.00 8.97 0.17 | |
Pro CCA 4.00 17.94 0.33 | |
Pro CCT 5.00 22.42 0.42 | |
Pro CCC 1.00 4.48 0.08 | |
Gln CAG 6.00 26.91 1.00 | |
Gln CAA 0.00 0.00 0.00 | |
Arg AGG 0.00 0.00 0.00 | |
Arg AGA 0.00 0.00 0.00 | |
Arg CGG 2.00 8.97 0.50 | |
Arg CGA 1.00 4.48 0.25 | |
Arg CGT 0.00 0.00 0.00 | |
Arg CGC 1.00 4.48 0.25 | |
Ser AGT 4.00 17.94 0.20 | |
Ser AGC 4.00 17.94 0.20 | |
Ser TCG 0.00 0.00 0.00 | |
Ser TCA 4.00 17.94 0.20 | |
Ser TCT 3.00 13.45 0.15 | |
Ser TCC 5.00 22.42 0.25 | |
Thr ACG 0.00 0.00 0.00 | |
Thr ACA 4.00 17.94 0.40 | |
Thr ACT 6.00 26.91 0.60 | |
Thr ACC 0.00 0.00 0.00 | |
Val GTG 6.00 26.91 0.40 | |
Val GTA 0.00 0.00 0.00 | |
Val GTT 7.00 31.39 0.47 | |
Val GTC 2.00 8.97 0.13 | |
Trp TGG 4.00 17.94 1.00 | |
Tyr TAT 6.00 26.91 1.00 | |
Tyr TAC 0.00 0.00 0.00 | |
End TGA 0.00 0.00 0.00 | |
End TAG 0.00 0.00 0.00 | |
End TAA 0.00 0.00 0.00 | |
*/ | |
/* | |
TTT Phe (F) | |
TTC Phe | |
TTA Leu (L) | |
TTG Leu | |
TCT Ser (S) | |
TCC Ser | |
TCA Ser | |
TCG Ser | |
TAT Tyr (Y) | |
TAC Tyr | |
TAA Stop | |
TAG Stop | |
TGT Cys (C) | |
TGC Cys | |
TGA Stop | |
TGG Trp (W) | |
CTT Leu (L) | |
CTC Leu | |
CTA Leu | |
CTG Leu | |
CCT Pro (P) | |
CCC Pro | |
CCA Pro | |
CCG Pro | |
CAT His (H) | |
CAC His | |
CAA Gln (Q) | |
CAG Gln | |
CGT Arg (R) | |
CGC Arg | |
CGA Arg | |
CGG Arg | |
ATT Ile (I) | |
ATC Ile | |
ATA Ile | |
ATG Met (M) | |
ACT Thr (T) | |
ACC Thr | |
ACA Thr | |
ACG Thr | |
AAT Asn (N) | |
AAC Asn | |
AAA Lys (K) | |
AAG Lys | |
AGT Ser (S) | |
AGC Ser | |
AGA Arg (R) | |
AGG Arg | |
GTT Val (V) | |
GTC Val | |
GTA Val | |
GTG Val | |
GCT Ala (A) | |
GCC Ala | |
GCA Ala | |
GCG Ala | |
GAT Asp (D) | |
GAC Asp | |
GAA Glu (E) | |
GAG Glu | |
GGT Gly (G) | |
GGC Gly | |
GGA Gly | |
GGG Gly | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment