Skip to content

Instantly share code, notes, and snippets.

@lindenb
Last active March 6, 2019 13:29
Show Gist options
  • Save lindenb/293389 to your computer and use it in GitHub Desktop.
Save lindenb/293389 to your computer and use it in GitHub Desktop.
/**
Author: Pierre Lindenbaum PhD
http://plindenbaum.blogspot.com
January 2010
About: this code makes a Fasta File a source of Triples for a Jena RDF Graph
*/
package test;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PushbackReader;
import java.util.Iterator;
import java.util.Stack;
import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.graph.Triple;
import com.hp.hpl.jena.graph.TripleMatch;
import com.hp.hpl.jena.graph.impl.GraphBase;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.shared.JenaException;
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
import com.hp.hpl.jena.util.iterator.NiceIterator;
import com.hp.hpl.jena.vocabulary.DC;
import com.hp.hpl.jena.vocabulary.RDF;
/**
* FastaModel
* Implements a Graph
*/
public class FastaModel
extends GraphBase
{
/** the source of the fasta sequence */
private File fastaFile=null;
/** a simple fasta sequence */
private static class FastaSequence
{
StringBuilder name=new StringBuilder();
StringBuilder sequence=new StringBuilder();
}
/** an iterator scanning a fasta file a returning some
* com.hp.hpl.jena.graph.Triple
* */
private class FastaIterator
extends NiceIterator<Triple>
{
/** the file reader */
private PushbackReader reader;
/** a filter for the triples */
private Triple filter;
/** the queue of triple to be returned */
private Stack<Triple> triples_queue =new Stack<Triple>();
/** constructor opens the file reader
* @param matcher the filter of triples
* */
FastaIterator(TripleMatch matcher) throws IOException
{
this.filter=matcher.asTriple();
try
{
this.reader=new PushbackReader(new FileReader(FastaModel.this.fastaFile));
}
catch (IOException e)
{
throw new JenaException(e);
}
}
/** read the next FastaSequence in the stream or return null */
private FastaSequence readNext() throws IOException
{
if(this.reader==null) return null;
int c;
FastaSequence seq=null;
while((c=this.reader.read())!=-1)
{
if(c=='>')
{
if(seq!=null)
{
this.reader.unread(c);
return seq;
}
seq=new FastaSequence();
while((c=this.reader.read())!=-1)
{
if(c=='\n') break;
seq.name.append((char)c);
}
}
else if(seq!=null && Character.isLetter(c))
{
seq.sequence.append((char)c);
}
}
this.close();
return seq;
}
/** chech if there are some triples in the queue
* if not look if there is one more FastaSequence in the file
* this FastaSequence is tranformed to a set of Triples that will
* be added to the triples_queue if they match this.matcher
* */
@Override
public boolean hasNext()
{
if(!triples_queue.isEmpty()) return true;
if(this.reader==null) return false;
try
{
/* loop until the queue is not empty or the stream is closed */
while(this.triples_queue.isEmpty())
{
//try to get a new fasta sequence
FastaSequence seq=readNext();
if(seq==null) return false;
String name=seq.name.toString();
//check it is a genbank file with a gi
if(!name.startsWith("gi|"))
{
continue;
}
int i=name.indexOf('|',3);
if(i==-1) continue;
//create the subject
Node subject =Node.createURI("http://www.ncbi.nlm.nih.gov/nuccore/"+name.substring(3,i));
//make a triple for the rdf:type
Triple triple=new Triple(
subject,
RDF.type.asNode(),
Node.createURI("urn:lindenb:ontology:Sequence")
);
//append this triple to the queue if it is accepted by this.filter
if(this.filter.asTriple().matches(triple))
{
this.triples_queue.add(triple);
}
//make a triple for the dc:title
triple=new Triple(
subject,
DC.title.asNode(),
Node.createLiteral(name)
);
//append this triple to the queue if it is accepted by this.filter
if(this.filter.asTriple().matches(triple))
{
this.triples_queue.add(triple);
}
//make a triple for the DNA sequence
triple=new Triple(
subject,
Node.createURI("urn:lindenb:ontology:sequence"),
Node.createLiteral(seq.sequence.toString())
);
//append this triple to the queue if it is accepted by this.filter
if(this.filter.asTriple().matches(triple))
{
this.triples_queue.add(triple);
}
//make a triple for the size of this sequence
triple=new Triple(
subject,
Node.createURI("urn:lindenb:ontology:length"),
Node.createLiteral(String.valueOf(seq.sequence.length()),null,XSDDatatype.XSDint)
);
//append this triple to the queue if it is accepted by this.filter
if(this.filter.asTriple().matches(triple))
{
this.triples_queue.add(triple);
}
}
}
catch (IOException e)
{
close();
throw new JenaException(e);
}
return !triples_queue.isEmpty();
}
@Override
public Triple next()
{
if(this.triples_queue.isEmpty()) hasNext();
if(this.triples_queue.isEmpty()) throw new IllegalStateException();
return this.triples_queue.pop();
}
@Override
public void close()
{
try
{
if(this.reader!=null) reader.close();
}
catch (IOException e)
{
throw new JenaException(e);
}
finally
{
this.reader=null;
super.close();
}
}
}
@Override
protected ExtendedIterator<Triple> graphBaseFind(TripleMatch matcher)
{
try
{
return new FastaIterator(matcher);
}
catch (IOException e)
{
throw new JenaException(e);
}
}
public FastaModel(File fastaFile)
{
this.fastaFile=fastaFile;
}
@Override
public void close()
{
super.close();
System.err.println("Close called");
}
public static void main(String[] args)
{
try
{
Model m=ModelFactory.createModelForGraph(
new FastaModel(
new File("rotavirus.fa")
));
StmtIterator i=m.listStatements();
while(i.hasNext())
{
System.err.println(i.next());
}
System.err.println("OK");
ResIterator r=m.listSubjects();
while(r.hasNext())
{
System.err.println(r.next());
}
r.close();
i=m.listStatements(null,DC.title,(Literal)null);
while(i.hasNext())
{
System.err.println(i.next());
}
System.err.println("OK");
Query query=QueryFactory.create(
"SELECT ?Seq1 ?Len1 ?Seq2 ?Len2" +
"{" +
"?Seq1 a <urn:lindenb:ontology:Sequence> . " +
"?Seq1 <urn:lindenb:ontology:length> ?Len1 . " +
"?Seq2 a <urn:lindenb:ontology:Sequence> . " +
"?Seq2 <urn:lindenb:ontology:length> ?Len2 . " +
"FILTER (?Seq1!=?Seq2 && ?Len1 < ?Len2) "+
"}"
);
QueryExecution execution = QueryExecutionFactory.create(query, m);
ResultSet row=execution.execSelect();
while(row.hasNext())
{
QuerySolution solution=row.next();
for(Iterator<String> si=solution.varNames();si.hasNext();)
{
String name=si.next();
System.out.println(name+" : "+solution.get(name));
}
System.out.println();
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment