Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@lmmx
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lmmx/9871296 to your computer and use it in GitHub Desktop.
Save lmmx/9871296 to your computer and use it in GitHub Desktop.
peptide = '1MKLHYVAVLTLAILMFLTWLPESLSCNKALCASDVSKCLIQELCQCRPGEGNCACCKECM6061LCLGALWDECCDCVGMCNPRNYADTPPTSKSTVEELHEPIPSLFRALTEGDTQLNWNIVS120121FPVAEELSHHENLVSFLETVNQPHHQNVSVPSNNVHAPYSSDKEHMCTVVYFDDCMSIHQ180181CKISCESMGASKYRWFHNACCECIGPECIDYGSKTVKCMNCMF223'.match(/[a-zA-Z]/g).join('');
dna = '1CGGCGGGAGGCGCGGCCTGGCCTCGCACTCAAAGCCGCCGCAGCGCGCCCCGGGCTCGGC60 61CGACCCGGCGGGGATCTAGGGGTGGGCGACTTCGCGGGACCGTGGCGCATGTTTCCTGGG120 121AGTTACTGATCATCTTCTTTGAAGAAACATGAAGTTACACTATGTTGCTGTGCTTACTCT180 181AGCCATCCTGATGTTCCTGACATGGCTTCCAGAATCACTGAGCTGTAACAAAGCACTCTG240 241TGCTAGTGATGTGAGCAAATGCCTCATTCAGGAGCTCTGCCAGTGCCGGCCGGGAGAAGG300 301CAATTGCGCCTGCTGTAAGGAGTGCATGCTGTGTCTTGGGGCCCTTTGGGACGAGTGCTG360 361TGACTGTGTTGGTATGTGTAATCCTCGAAATTATAGTGACACACCTCCAACTTCAAAGAG420 421CACAGTGGAGGAGCTGCATGAACCGATCCCTTCTCTCTTCCGGGCACTCACAGAAGGAGA480 481TACTCAGTTGAATTGGAACATCGTTTCTTTCCCTGTTGCAGAAGAACTTTCACATCATGA540 541GAATCTGGTTTCATTTTTAGAAACTGTGAACCAGCCACACCACCAGAATGTGTCTGTCCC600 601CAGCAATAATGTTCACGCGCCTTATTCCAGTGACAAAGAACACATGTGTACTGTGGTTTA660 661TTTTGATGACTGCATGTCCATACATCAGTGTAAAATATCCTGTGAGTCCATGGGAGCATC720 721CAAATATCGCTGGTTTCATAATGCCTGCTGCGAGTGCATTGGTCCAGAATGTATTGACTA780 781TGGTAGTAAAACTGTCAAATGTATGAACTGCATGTTTTAAAGAAGACAAATGCAAACCAA840 841AGCAACTTAGTAAAATAATAGGTATAAAAAGTTATTCTGTAAGTCTGTTGGTTGTATCTT900 901GTATCAGAATCCCAGTAAGTTAAGTTGTAAAGACTTTGGAATAAGTTTCTTTTAAAAATA960 961TGACATAGCCAGTGATGTGTTTAATTATATAACTGTTCTT1000'.match(/[a-zA-Z]/g).join('');
code = prompt('Insert codon table');
startCodon = "ATG";
var startList = [];
for (i=0;i<dna.match(/ATG/g).length;i++) {
if (i===0) {
startList.push(dna.indexOf("ATG"));
}
else startList.push(startList[i-1]+dna.substr(startList[i-1]+3).indexOf("ATG"));
}
// the ORFs somehow start 3 bases early?? use .substr(3)...
function ORF(orf,at)
{
this.orf=orf;
this.at=at;
this.len=orf.length;
}
var ORFlist = [];
for (i=0;i<startList.length;i++) {
newORF = dna.substr(startList[i]);
ORFlist.push(new ORF(newORF,startList[i]));
}
function PTM(ptm,at)
{
this.ptm=ptm;
this.at=at;
this.len=ptm.length;
}
var PTMlist = [];
var PTMtarget = /N.[ST]/g; // for NX(ST)
for (i=0;i<peptide.match(PTMtarget).length;i++) {
newPTM = peptide.match(PTMtarget)[0];
PTMlist.push(new PTM(newPTM,peptide.indexOf(newPTM)));
}
// for now just confirm with Rubular that there's only 1 matching PTM consensus site
theORF = ORFlist[1].orf.substr(3);
signal = theORF.substr(0,25*3);
function codonFor(residueAt) {
return theORF.substr(residueAt*3,3)+" at "+residueAt*3;
}
////////////////////////////////////////////////////////////////////
singles = code.match(/ [(].[)]/g);
function Codon(letter,codon,residue) {
this.letter = letter;
this.codon = codon;
this.residue = residue;
}
var shortList = [];
for (i=0;i<code.match(/[A-Z][A-Z][A-Z]/g).length;i++) {
if (code[code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i])+7] == 'p') {
var short = code.substr(code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i])+4,4);
}
else var short = code.substr(code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i])+4,3);
shortList.push(short);
}
var codonList = [];
for (i=0;i<code.match(/[A-Z][A-Z][A-Z]/g).length;i++) {
var short = code.substr(code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i]),3);
codonList.push(short);
}
var letters = [];
for (i=0;i<code.match(/[A-Z][A-Z][A-Z]/g).length;i++) {
if (shortList[i] == "Stop") {
letters.push("—");
}
else if (code[code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i])+8] == "(") {
letters.push(code[code.indexOf(code.match(/[A-Z][A-Z][A-Z]/g)[i])+9]);
}
else letters.push(letters[i-1]);
}
myTable = [];
for (i=0;i<codonList.length;i++) {
var newCodon = new Codon(letters[i],codonList[i],shortList[i]);
myTable.push(newCodon);
console.log(newCodon);
}
// var codons = theORF.match(/.{1,3}/g);
var realORF = theORF.substring(0,223*3);
var codons = realORF.match(/.{1,3}/g);
function ResToCodon(residue) {
var results = [];
for (i=0;i<myTable.length;i++) {
if (myTable[i].residue == residue) {
thisCodon = myTable[i].codon;
results.push(thisCodon);
}
} return results;
}
var stopCodons = ResToCodon("Stop");
for (i=0;i<codons.length;i++) {
codons[i] == stopCodons;
}
function LetterToCodon(letter) {
var results = [];
for (i=0;i<myTable.length;i++) {
if (myTable[i].letter == letter) {
thisCodon = myTable[i].codon;
results.push(thisCodon);
}
} return results;
}
function LetterToRes(letter) {
var results = [];
for (i=0;i<myTable.length;i++) {
if (myTable[i].letter == letter) {
thisCodon = myTable[i].codon;
results.push(thisCodon);
}
}
}
function ResToLetter(residue) {
for (i=0;i<myTable.length;i++) {
if (myTable[i].residue == residue) return myTable[i].letter;
}
}
function CodonToRes(codon) {
for (i=0;i<myTable.length;i++) {
if (myTable[i].codon == codon) return myTable[i].residue;
}
}
function CodonToLetter(codon) {
for (i=0;i<myTable.length;i++) {
if (myTable[i].codon == codon) return myTable[i].letter;
}
}
function translate(seq) { // for a nucleotide sequence split into an array of 3mers
var translation = [];
for (i=0;i<seq.length;i++) {
translation.push(CodonToLetter(seq[i]));
}
return translation;
}
// Find the stop codons
function stop(atCodon)
{
this.orf=theORF.substr(0,(atCodon-1)*3);
this.at=atCodon;
this.len=this.orf.length;
}
var allORFs = [];
for (i=0;i<codons.length;i++) {
if (codons[i] == stopCodons[0] || codons[i] == stopCodons[1] || codons[i] == stopCodons[2]) {
var newbie = new stop(i);
allORFs.push(newbie);
}
}
var transcripts = [];
for (r=0;r<allORFs.length;r++) {
transcript = translate(allORFs[r].orf.match(/.{1,3}/g));
transcripts.push(transcript); // must be in 3mers to translate!
}
if (typeof String.prototype.editAt !== 'function') {
String.prototype.editAt = function(index,mut) {
return this.substr(0,index)+mut+this.substr(index+1,this.length);
};
}
mutORF = theORF.editAt(439,"C"); // 439A>C
mutCodons = mutORF.match(/.{1,3}/g);
// code from previous tutorial, when var seq = 'GGGGCCCTTTGGGACGAGTGCTGTGACTGTGTTGG';
var seq = mutORF;
var mismatch = 1;
var GCcount = seq.match(/[GC]/g).length;
var GCper = GCcount/seq.length*100;
var Tm = 81.5+(0.41*GCper)-(675/seq.length)-mismatch/seq.length;
console.log(Tm);
anti = seq.replace(/G/g,'c').replace(/C/g,'g').replace(/A/g,'t').replace(/T/g,'a').toUpperCase();
/*
This kills consoles :-(
translation = [];
translate(realORF.match(/.{1,3}/g));
var realTranscript = translation.join('');
*/
codonUsageRegEx = /[A-Z]{3}\s{8,9}\d{1,2}\.\d\d\s{8,9}\d{1,2}\.\d\d\s{8,9}\d{1,2}\.\d\d/g;
codonRegEx = /^[A-Z]{3}/;
usageRegEx = /\d\.\d\d$/;
// Input from geneinfinity.org/sms/sms_codonusage.html
// usageTable = prompt().match(codonUsageRegEx);
function codonUsage(codon,usage)
{
this.codon=codon;
this.usage=usage;
}
codonDB = [];
for (b=0;b<usageTable.length;b++) {
var usageCodon = usageTable[b].match(codonRegEx)[0];
var usagePer = Math.round((theORF.match(usageTable[b].match(codonRegEx)).length/theORF.length)*10000)/100;
var newUsage = new codonUsage(usageCodon,usagePer);
codonDB.push(newUsage);
}
// Codon Usage Tabulated from GenBank tables at kazusa.or.jp/codon
CUTGregex = /[A-Z]{3}\s+\d+\.\d\(\s+\d+\)/g;
CUTusageRegEx = /\d{1,2}\.\d/;
K12table = prompt().match(CUTGregex); // kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=83333
K12DB = [];
for (b=0;b<usageTable.length;b++) {
var newUsage = new codonUsage(K12table[b].match(codonRegEx)[0].replace(/U/g,'T'),Math.round(K12table[b].match(CUTusageRegEx)[0])/1000);
K12DB.push(newUsage);
}
function usageComp(codon,usage1,usage2)
{
this.codon=codon;
this.usage1=usage1;
this.usage2=usage2;
}
function compareUsage(DB1,DB2) {
for (z=0;z<DB1.length;z++) {
for (d=0;d<DB2.length;d++) {
if (DB1[z].codon == DB2[d].codon) {
var newComp = new usageComp(DB1[z].codon,DB1[z].usage,DB2[d].usage);
compResults.push(newComp);
}
}
}
}
// This is comparing chalk with cheese, as the usage info for the protein are relative to other codons for the same AA
// To compare properly, take the relative uses of different amino acids! This was done for stop codons already
// That is, construct a new variable each time you want to use the codons, containing var ResToCodon(<Codon 3 letter code>);
// I don't have time to make this, commented out below is the basic structure of a parser for such a table. Divide each K12 codon by total of others' uses.
function codonRatesFor(res) {
for (i=0;i<K12DB.length;i++) {
if (ResToCodon(res).join().match(K12DB[i].codon) !== null) {
console.log(K12DB[i].codon+" "+K12DB[i].usage);
}
}
}
// E.g. for Thr codonRatesFor('Thr') ACT 0.008 + ACC 0.023 + ACA 0.006 + ACG 0.012 = 0.049
//
/*
compResults = [];
compareUsage(codonDB,K12DB);
for (i=0;i<compResults.length;i++) {
if (compResults[i].usage1 != "0.00") {
console.log(compResults[i].codon+"\t"+compResults[i].usage1+"\t"+compResults[i].usage2);
}
}
*/
for (i=0;i<compResults.length;i++) {
if (Math.abs(compResults[i].usage1-compResults[i].usage2) > 0.5 && compResults[i].usage2 < 0.1) {
console.log(compResults[i].codon+"\t"+compResults[i].usage1+"\t"+compResults[i].usage2);
}
}
/*
GCG 1.00 4.48 0.09
Ala GCA 4.00 17.94 0.36
Ala GCT 2.00 8.97 0.18
Ala GCC 4.00 17.94 0.36
Cys TGT 12.00 53.81 0.50
Cys TGC 12.00 53.81 0.50
Asp GAT 3.00 13.45 0.33
Asp GAC 6.00 26.91 0.67
Glu GAG 8.00 35.87 0.47
Glu GAA 9.00 40.36 0.53
Phe TTT 4.00 17.94 0.57
Phe TTC 3.00 13.45 0.43
Gly GGG 1.00 4.48 0.13
Gly GGA 3.00 13.45 0.38
Gly GGT 3.00 13.45 0.38
Gly GGC 1.00 4.48 0.13
His CAT 5.00 22.42 0.50
His CAC 5.00 22.42 0.50
Ile ATA 2.00 8.97 0.25
Ile ATT 3.00 13.45 0.38
Ile ATC 3.00 13.45 0.38
Lys AAG 3.00 13.45 0.30
Lys AAA 7.00 31.39 0.70
Leu TTG 1.00 4.48 0.05
Leu TTA 2.00 8.97 0.10
Leu CTG 6.00 26.91 0.30
Leu CTA 1.00 4.48 0.05
Leu CTT 5.00 22.42 0.25
Leu CTC 5.00 22.42 0.25
Met ATG 9.00 40.36 1.00
Asn AAT 9.00 40.36 0.69
Asn AAC 4.00 17.94 0.31
Pro CCG 2.00 8.97 0.17
Pro CCA 4.00 17.94 0.33
Pro CCT 5.00 22.42 0.42
Pro CCC 1.00 4.48 0.08
Gln CAG 6.00 26.91 1.00
Gln CAA 0.00 0.00 0.00
Arg AGG 0.00 0.00 0.00
Arg AGA 0.00 0.00 0.00
Arg CGG 2.00 8.97 0.50
Arg CGA 1.00 4.48 0.25
Arg CGT 0.00 0.00 0.00
Arg CGC 1.00 4.48 0.25
Ser AGT 4.00 17.94 0.20
Ser AGC 4.00 17.94 0.20
Ser TCG 0.00 0.00 0.00
Ser TCA 4.00 17.94 0.20
Ser TCT 3.00 13.45 0.15
Ser TCC 5.00 22.42 0.25
Thr ACG 0.00 0.00 0.00
Thr ACA 4.00 17.94 0.40
Thr ACT 6.00 26.91 0.60
Thr ACC 0.00 0.00 0.00
Val GTG 6.00 26.91 0.40
Val GTA 0.00 0.00 0.00
Val GTT 7.00 31.39 0.47
Val GTC 2.00 8.97 0.13
Trp TGG 4.00 17.94 1.00
Tyr TAT 6.00 26.91 1.00
Tyr TAC 0.00 0.00 0.00
End TGA 0.00 0.00 0.00
End TAG 0.00 0.00 0.00
End TAA 0.00 0.00 0.00
*/
/*
TTT Phe (F)
TTC Phe
TTA Leu (L)
TTG Leu
TCT Ser (S)
TCC Ser
TCA Ser
TCG Ser
TAT Tyr (Y)
TAC Tyr
TAA Stop
TAG Stop
TGT Cys (C)
TGC Cys
TGA Stop
TGG Trp (W)
CTT Leu (L)
CTC Leu
CTA Leu
CTG Leu
CCT Pro (P)
CCC Pro
CCA Pro
CCG Pro
CAT His (H)
CAC His
CAA Gln (Q)
CAG Gln
CGT Arg (R)
CGC Arg
CGA Arg
CGG Arg
ATT Ile (I)
ATC Ile
ATA Ile
ATG Met (M)
ACT Thr (T)
ACC Thr
ACA Thr
ACG Thr
AAT Asn (N)
AAC Asn
AAA Lys (K)
AAG Lys
AGT Ser (S)
AGC Ser
AGA Arg (R)
AGG Arg
GTT Val (V)
GTC Val
GTA Val
GTG Val
GCT Ala (A)
GCC Ala
GCA Ala
GCG Ala
GAT Asp (D)
GAC Asp
GAA Glu (E)
GAG Glu
GGT Gly (G)
GGC Gly
GGA Gly
GGG Gly
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment