Last active
July 4, 2023 05:30
-
-
Save elvismdev/12ba4e6efc01730e193c to your computer and use it in GitHub Desktop.
A Java small class to find all the genes from a DNA string stored in a plain text file. The library edu.duke is a dependency for the class to work, it should be added into the Java IDE to compile with no errors. Download link http://www.dukelearntoprogram.com/downloads/archives/courserajava.jar
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Find all the genes from a DNA string file and using StorageResource class. | |
* | |
* @author (Elvis Morales) | |
* @version (1.0) | |
*/ | |
import edu.duke.*; | |
import java.io.File; | |
public class FindMultipleGenesStorage { | |
public int findStopIndex(String dna, int index) { | |
int stop1 = dna.indexOf("tga", index); | |
if ( stop1 == -1 || ( stop1-index ) % 3 != 0 ) { | |
stop1 = dna.length(); | |
} | |
int stop2 = dna.indexOf("taa", index); | |
if ( stop2 == -1 || ( stop2-index ) % 3 != 0 ) { | |
stop2 = dna.length(); | |
} | |
int stop3 = dna.indexOf("tag", index); | |
if ( stop3 == -1 || ( stop3-index ) % 3 != 0 ) { | |
stop3 = dna.length(); | |
} | |
return Math.min( stop1, Math.min(stop2, stop3) ); | |
} | |
public StorageResource storeAll(String dna) { | |
String dnaLow = dna.toLowerCase(); | |
int start = 0; | |
StorageResource genes = new StorageResource(); | |
while (true) { | |
int loc = dnaLow.indexOf( "atg", start ); | |
if ( loc == -1 ) { | |
break; | |
} | |
int stop = findStopIndex( dnaLow, loc+3 ); | |
if ( stop != dna.length() ) { | |
genes.add( dna.substring(loc, stop+3) ); | |
start = stop + 3; | |
} else { | |
start = start + 3; | |
} | |
} | |
return genes; | |
} | |
public void testStorageFinder() { | |
FileResource dnaFile = new FileResource(); | |
StorageResource genesFound = storeAll( dnaFile.asString() ); | |
System.out.println( "Number of genes found: "+genesFound.size() ); | |
printGenes( genesFound ); | |
} | |
public float cgRatio( String dna ) { | |
String dnaLow = dna.toLowerCase(); | |
int cgCount = 0; | |
int start = 0; | |
while (true) { | |
int pos = dnaLow.indexOf("c", start); | |
if (pos == -1) { | |
start = 0; | |
break; | |
} | |
cgCount += 1; | |
start = pos + 1; | |
} | |
while (true) { | |
int pos = dnaLow.indexOf("g", start); | |
if (pos == -1) { | |
start = 0; | |
break; | |
} | |
cgCount += 1; | |
start = pos + 1; | |
} | |
return ( (float) cgCount ) / dna.length(); | |
} | |
public void printGenes( StorageResource sr ) { | |
int sixtyCharQty = 0; | |
int highCgRatioQty = 0; | |
float cgRatioConst = (float) 0.35; | |
for ( String s : sr.data() ) { | |
if ( s.length() > 60 ) { | |
System.out.println( "String longer than 60 characters: "+s ); | |
sixtyCharQty++; | |
} | |
if ( cgRatio(s) > cgRatioConst ) { | |
System.out.println( "String with C-G-ratio higher than 0.35: "+s ); | |
highCgRatioQty++; | |
} | |
} | |
System.out.println( "60 characters qty: "+sixtyCharQty ); | |
System.out.println( "Strings with C-G-ratio higher than 0.35: "+highCgRatioQty ); | |
} | |
} |
TOTAL GENES = 69
Total number of strings with length greater than 60 : 23
Total number of strings with CG Ratio greater than 0.35 : 40
Longest gene length is : 489
Total number of occurances of CTG is : 224Cheers!
Wrong answers
I don't understand why the answers are all ones, I'm finding some valid genes with my code and having them verified with my bare eyes.
I'm telling you guys do not learn to copy pasting data. Its never worth it.
Try on your own and listen to what professor told in the video.
…On Wed, 10 Aug 2022 at 6:03 AM, Piyush Acharya ***@***.***> wrote:
***@***.**** commented on this gist.
------------------------------
TOTAL GENES = 69
Total number of strings with length greater than 60 : 23
Total number of strings with CG Ratio greater than 0.35 : 40
Longest gene length is : 489
Total number of occurances of CTG is : 224
Cheers!
Hey, can you please send the code...
Please refer to the following code for a more advanced version of the code
from the course:
https://github.com/Verisimilitude11/Algorithm-for-Analyzing-Genes-in-DNA
I am currently working on improving it and getting it approved by
biologists.
—
Reply to this email directly, view it on GitHub
<https://gist.github.com/12ba4e6efc01730e193c#gistcomment-4261648>, or
unsubscribe
<https://github.com/notifications/unsubscribe-auth/APBFUDTWJTSWWMISNZEPKCDVYL2E7ANCNFSM4I3572UQ>
.
You are receiving this because you commented.Message ID:
***@***.***>
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey, can you please send the code...