Created
October 24, 2010 19:06
-
-
Save lindenb/643851 to your computer and use it in GitHub Desktop.
search for alternative Reading frames in human Genome (hg18) ( http://biostar.stackexchange.com/questions/3034)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* search for alternative Reading frames in human Genome (hg18) | |
* Pierre Lindenbaum 2010 | |
* http://biostar.stackexchange.com/questions/3034 | |
*/ | |
import java.io.BufferedReader; | |
import java.io.InputStreamReader; | |
import java.net.URL; | |
import java.util.Arrays; | |
import java.util.zip.GZIPInputStream; | |
public class BioStar3034 | |
{ | |
private static final byte NIL=(byte)99; | |
private static final byte ALT_FRAME=(byte)80; | |
/** scans each transcript from knownGene.txt.gz */ | |
private void run() throws Exception | |
{ | |
//an array of frame 0/1/2 or NIL if not filled | |
byte genome[]=new byte[247249800];//~ size of human chr1 | |
//download UCSC knownGene | |
BufferedReader r=new BufferedReader(new InputStreamReader( | |
new GZIPInputStream( new URL( | |
"http://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/knownGene.txt.gz").openStream()) | |
)); | |
String line; | |
String prevChrom=null; | |
while((line=r.readLine())!=null) | |
{ | |
String tokens[]=line.split("[\t]"); | |
String chrom=tokens[1]; | |
if(prevChrom==null || !(prevChrom.equals(chrom))) | |
{ | |
Arrays.fill(genome,0,genome.length,NIL);//all the frame | |
prevChrom=chrom; | |
} | |
int cdsStart=Integer.parseInt(tokens[5]); | |
int cdsEnd=Integer.parseInt(tokens[6]); | |
int exonsCount=Integer.parseInt(tokens[7]); | |
String exonStarts[]=tokens[8].split("[,]"); | |
String exonEnds[]=tokens[9].split("[,]"); | |
int starts[]=new int[exonsCount]; | |
int ends[]=new int[exonsCount]; | |
for(int i=0;i< exonsCount;++i) | |
{ | |
starts[i]=Integer.parseInt(exonStarts[i]); | |
ends[i]=Integer.parseInt(exonEnds[i]); | |
} | |
int cDNA=0; | |
//orientation is forward | |
if(tokens[2].equals("+")) | |
{ | |
int position=cdsStart; | |
//loop over the cDNA and flag the frames 0/1/2 | |
while(position<cdsEnd) | |
{ | |
//check position is in an exon | |
for(int exon=0;exon < exonsCount;++exon) | |
{ | |
if(!(starts[exon]<=position && position< ends[exon])) continue; | |
byte frame=(byte)(cDNA%3); | |
//we have been there before and it was not the same frame ! | |
if( genome[position]!=NIL && | |
genome[position]!=frame) | |
{ | |
genome[position]=ALT_FRAME; | |
System.out.println(chrom+":"+(position+1)+"-"+(position+2)+" (+)"); | |
} | |
else | |
{ | |
genome[position]=frame; | |
} | |
cDNA++; | |
break; | |
} | |
++position; | |
} | |
} | |
else //gene on reverse strand' | |
{ | |
int position=cdsEnd-1; | |
//loop over the cDNA and flag the frames 0/1/2 | |
while(position>=cdsStart) | |
{ | |
//check position is in an exon | |
for(int exon=0;exon < exonsCount;++exon) | |
{ | |
if(!(starts[exon]<=position && position< ends[exon])) continue; | |
byte frame=(byte)(10+cDNA%3);//add 10 to flag reverse orientation | |
//we have been there before and it was not the same frame ! | |
if( genome[position]!=NIL && | |
genome[position]!=frame) | |
{ | |
genome[position]=ALT_FRAME; | |
System.out.println(chrom+":"+(position+1)+"-"+(position+2)+" (-)"); | |
} | |
else | |
{ | |
genome[position]=frame; | |
} | |
genome[position]=frame; | |
cDNA++; | |
break; | |
} | |
--position; | |
} | |
} | |
} | |
r.close(); | |
} | |
public static void main(String[] args) | |
{ | |
try { | |
BioStar3034 app=new BioStar3034(); | |
app.run(); | |
} | |
catch (Exception e) | |
{ | |
e.printStackTrace(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment