Skip to content

Instantly share code, notes, and snippets.

@lindenb
Created October 24, 2010 19:06
Show Gist options
  • Save lindenb/643851 to your computer and use it in GitHub Desktop.
Save lindenb/643851 to your computer and use it in GitHub Desktop.
search for alternative Reading frames in human Genome (hg18) ( http://biostar.stackexchange.com/questions/3034)
/**
* search for alternative Reading frames in human Genome (hg18)
* Pierre Lindenbaum 2010
* http://biostar.stackexchange.com/questions/3034
*/
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Arrays;
import java.util.zip.GZIPInputStream;
public class BioStar3034
{
private static final byte NIL=(byte)99;
private static final byte ALT_FRAME=(byte)80;
/** scans each transcript from knownGene.txt.gz */
private void run() throws Exception
{
//an array of frame 0/1/2 or NIL if not filled
byte genome[]=new byte[247249800];//~ size of human chr1
//download UCSC knownGene
BufferedReader r=new BufferedReader(new InputStreamReader(
new GZIPInputStream( new URL(
"http://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/knownGene.txt.gz").openStream())
));
String line;
String prevChrom=null;
while((line=r.readLine())!=null)
{
String tokens[]=line.split("[\t]");
String chrom=tokens[1];
if(prevChrom==null || !(prevChrom.equals(chrom)))
{
Arrays.fill(genome,0,genome.length,NIL);//all the frame
prevChrom=chrom;
}
int cdsStart=Integer.parseInt(tokens[5]);
int cdsEnd=Integer.parseInt(tokens[6]);
int exonsCount=Integer.parseInt(tokens[7]);
String exonStarts[]=tokens[8].split("[,]");
String exonEnds[]=tokens[9].split("[,]");
int starts[]=new int[exonsCount];
int ends[]=new int[exonsCount];
for(int i=0;i< exonsCount;++i)
{
starts[i]=Integer.parseInt(exonStarts[i]);
ends[i]=Integer.parseInt(exonEnds[i]);
}
int cDNA=0;
//orientation is forward
if(tokens[2].equals("+"))
{
int position=cdsStart;
//loop over the cDNA and flag the frames 0/1/2
while(position<cdsEnd)
{
//check position is in an exon
for(int exon=0;exon < exonsCount;++exon)
{
if(!(starts[exon]<=position && position< ends[exon])) continue;
byte frame=(byte)(cDNA%3);
//we have been there before and it was not the same frame !
if( genome[position]!=NIL &&
genome[position]!=frame)
{
genome[position]=ALT_FRAME;
System.out.println(chrom+":"+(position+1)+"-"+(position+2)+" (+)");
}
else
{
genome[position]=frame;
}
cDNA++;
break;
}
++position;
}
}
else //gene on reverse strand'
{
int position=cdsEnd-1;
//loop over the cDNA and flag the frames 0/1/2
while(position>=cdsStart)
{
//check position is in an exon
for(int exon=0;exon < exonsCount;++exon)
{
if(!(starts[exon]<=position && position< ends[exon])) continue;
byte frame=(byte)(10+cDNA%3);//add 10 to flag reverse orientation
//we have been there before and it was not the same frame !
if( genome[position]!=NIL &&
genome[position]!=frame)
{
genome[position]=ALT_FRAME;
System.out.println(chrom+":"+(position+1)+"-"+(position+2)+" (-)");
}
else
{
genome[position]=frame;
}
genome[position]=frame;
cDNA++;
break;
}
--position;
}
}
}
r.close();
}
public static void main(String[] args)
{
try {
BioStar3034 app=new BioStar3034();
app.run();
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment