-
-
Save cwake/1b1e322e2b23c8cdd7a7 to your computer and use it in GitHub Desktop.
import java.io.DataOutput; | |
import java.io.File; | |
import java.io.FileNotFoundException; | |
import java.io.PrintStream; | |
import java.util.Scanner; | |
//CSC 142, Assignment #3DNA, DNAproject.java,Date | |
//Chloe Wake, Stephanie Woods, Stuart Pascua. | |
//This program reads the data file dna.txt and gives information | |
public class Draft2 { | |
public static void intro() {//figure out what we want to say | |
System.out.println("This program reports information about DNA"); | |
System.out.println("nucleotide sequences that may encode proteins"); | |
System.out.println("(such as Captain Picards hair growth protein)."); | |
System.out.println(""); | |
} | |
public static final int HOW_MANY_OF_EACH = 4;//AGCT- | |
public static final int NUCS_IN_CODON = 3;//groups of 3 | |
public static final int PERCENT_MASS = 30;//min percent | |
public static final int ARE_THERE_ENOUGH_CODONS = 5;//min num of codons | |
public static void main(String[] args) throws FileNotFoundException { | |
Scanner console = new Scanner(System.in); | |
intro(); | |
processFile(); | |
} | |
private static void processFile() throws FileNotFoundException { | |
// whatFile(console); | |
Scanner console = new Scanner(System.in); | |
File userFileAnswer = whatFile(console); | |
Scanner read1 = new Scanner(userFileAnswer); | |
String nLine = read1.nextLine(); | |
String nName = (""); //insert name into quotes | |
PrintStream output = new PrintStream(output(console)); | |
while (read1.hasNextLine()) { | |
nName = nLine; | |
String nucsInARow = read1.nextLine().toUpperCase();//here's Chloe's toUpperCase statement | |
int[] numberOfNucs = howManyNucs(nucsInARow); | |
double[] massP = massPercentage(numberOfNucs); | |
String [] codon = codon(nucsInARow.replace("-", ""));// needed to replace the "junk" dashes with nothing | |
boolean IsItAProtein = isItAProtein(codon, massP);// tells us if the protein is legit or not | |
outputToFile(nName, nucsInARow, numberOfNucs, massP, codon, | |
output, IsItAProtein); | |
if (read1.hasNextLine()) { | |
nLine = read1.nextLine(); | |
} | |
} | |
} | |
public static File whatFile(Scanner console) {//ask for file | |
System.out.print("Input file name: "); // please let it be dna.txt!!! | |
File answer = new File(console.nextLine()); | |
return answer; | |
} | |
public static File output(Scanner console) {//where to send output | |
System.out.print("Output file name: "); //give it any name we want and it will store our output in a txt file in our documents | |
File fileName = new File (console.nextLine()); | |
return fileName; | |
} | |
public static int[] howManyNucs(String lineNucs) {//counting all the nucleotides at the same time | |
int[] nucCount = new int[HOW_MANY_OF_EACH]; | |
for (int i = 0; i < lineNucs.length(); i++) { // start at the nucleotide and read through its length which is 4 | |
int index = "ACGT".indexOf(lineNucs.charAt(i));// is A, C, G, or T in it? | |
if (index >= 0) { | |
nucCount[index]++;// if so add it | |
} | |
} | |
return nucCount; | |
} | |
public static double[] massPercentage (int[] nucCount) { | |
double[] mass = {135.128, 111.103, 151.128, 125.107};//total mass of string**not sure where to put the 'junk' | |
double[] massOfEach = new double[HOW_MANY_OF_EACH];//mass of each nuc | |
double[] massPercent = new double[HOW_MANY_OF_EACH]; | |
double massSum = 0; | |
for (int m = 0; m < HOW_MANY_OF_EACH; m++) { | |
massOfEach[m] = ((nucCount[m]) * (mass[m])); | |
massSum = massOfEach[m]+ massSum; | |
} | |
for (int p = 0; p < HOW_MANY_OF_EACH; p++) { | |
massPercent[p] = Math.round((massOfEach[p] / massSum) * 1000.0)/10.0;//percent mass of each nuc | |
} | |
return massPercent; | |
} | |
public static String[] codon(String lineOfNucs) {//puts into groups of 3 (codons) | |
String[] codon = new String [(lineOfNucs.length() / NUCS_IN_CODON)]; | |
int indexStart = 0; | |
for (int c = 0; c < codon.length; c++) { | |
codon[c] = lineOfNucs.substring(indexStart, (indexStart + NUCS_IN_CODON)); | |
indexStart = indexStart + NUCS_IN_CODON; | |
} | |
return codon; | |
} | |
public static boolean isItAProtein(String[] codon, double[] massPercent) { | |
//is it a protein? | |
if (!codon[0].equals("ATG")) {//start with ATG | |
return false; | |
} | |
if ((!codon[codon.length-1].equals("TAA")) && (!codon[codon.length-1].equals("TAG")) && | |
(!codon[codon.length-1].equals("TGA"))) { | |
return false;//end requirements | |
} | |
if (codon.length < ARE_THERE_ENOUGH_CODONS) {//is it long enough? | |
return false; | |
} | |
if ((massPercent[1] + massPercent[2]) < PERCENT_MASS) { | |
return false;//percentage mass check | |
} | |
return true; | |
} //output | |
public static void outputToFile (String nName, String nNucs, int[] nucCounts, | |
double[] massPercent,String[] codons, PrintStream output, boolean protein) { | |
output.println("Name: " + nName); | |
output.println("Nucleotides: " + nNucs); | |
output.println("Nucleotide counts: " + Arrays.toString(nucCounts)); | |
output.println("Mass percentages: " + Arrays.toString(massPercent)); | |
output.println("Codons: " + Arrays.toString(codons)); | |
output.print("Encodes a protein: "); | |
if (protein == true) { | |
output.println("yes"); | |
} else { | |
output.println("no"); | |
} | |
output.println(""); | |
} | |
} |
Found a similar project online : http://www.cs.kzoo.edu/cs210/Labs/IndexSubstring/DNADataReader.java
the draft looks good, I see you have 5 methods listed above and we can create one more method to print all of the output. I did an exercise that ignored blank spaces and special characters so I will find it and update here
From your comment about the 4 things we need, I think we make them constants like this?
public static final int NUMOFCODONS = 5;
public static final int PERCENT = 30;
public static final int UNIQUENUCLEO = 4;
public static final int NUCLEOPERCODON = 3;
I made some updates to my fork if you want to check it out, I'm able to get everything in dna.txt = String s
now I need to process string s with arrays
very useful link ! thanks I'm gonna work on it until I get sleepy I'll post updates later tonight
Shall we meet up tomorrow morning any time before 11?
Yeah I can meet up, what time?
9am work somewhere in the library?
Also super helpful: http://www.dreamincode.net/forums/topic/37297-homework-for-decoding-dna-codons/
9am sounds good let's meet around the information desk
oh my gosh this project just got easier after that link. Remember what Ravi said? "why create code from scratch when there is already existing code out there to use". Thank you ChelseaLura
Sure did.
I uploaded the code I wrote for identifying and counting nucleotides, it looks like the link did it all in one line of code while I did each letter separately. What do you guys think?
That sounds great, the simpler the better. I just got out hour of code I'm going to get back to the java now
Glad that code link helped! I wasn't sure if it was exactly what we needed but it looked pretty darn close.
Hello,
My name is Jonathan and I work with the introductory computer science courses at the University of Washington. This post contains a solution to a DNA homework assignment identical to the one we use in our class.
This post's code is indexed by Google, enabling our current students to easily find it and submit it as their own work
Would you please remove this post or make it secret in order to help us enforce our academic policy?
By removing this post, you would be removing a tool that our students could use in academic misconduct.
Thank you,
Jonathan Sanders
Intro Support, Computer Science & Engineering
University of Washington
jsanders@cs.washington.edu
use indexOf(str) where something is in a string, substring(index1,index2) starts at 1 and ends at 2 in the string, length() how long a string is, file.substring(#,#) to read starting at a certain character and end at a certain character,.nextLine() method reads a line of input as a String,Arrays.toString to print arrays, charAt method to get individual characters, double rounded = Math.round(num * 10.0) / 10.0 to round the masses to one decimal point, use the "replace" method on the nucleotide string to eliminate the dashes from the junk strands and PrintStream so that we can store the information that is being printed in the console to a new file that we can access and then share if we want.