Skip to content

Instantly share code, notes, and snippets.

@maulikkamdar
Last active August 29, 2015 14:01
Show Gist options
  • Save maulikkamdar/06c85b8555a543643f71 to your computer and use it in GitHub Desktop.
Save maulikkamdar/06c85b8555a543643f71 to your computer and use it in GitHub Desktop.
A Java program writted to query multiple data sources - Coexpress, Corum, Gene Ontology (Biological Processes, Cellular Compartments, Molecular Functions), Gene Co-occurrence (Publications, KEGG Pathways, OMIM Diseases), Protein Domain-domain interactions and Gene Ideogram-Ideogram Interactions, which are stored as RDF in a Virtuoso triple store…
package org.deri.ppi;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.HashMap;
/**
* Retrieves information from multiple data sources stored in Virtuoso as RDF
*/
public class Interactors {
static final HashMap<String, String> geneList = new HashMap<String, String>();
static final HashMap<String, String> goBPList = new HashMap<String, String>();
static final HashMap<String, String> goCCList = new HashMap<String, String>();
static final HashMap<String, String> goMFList = new HashMap<String, String>();
/**
* @param args
*/
public static void main(String[] args) throws IOException{
//----- HGNC file which provides EntrezGene-HGNC ID conversion
FileInputStream hgncstream = new FileInputStream("hgncConvertor.tsv");
DataInputStream hgncin = new DataInputStream(hgncstream);
BufferedReader hgncbr = new BufferedReader(new InputStreamReader(hgncin));
String hgncIdline;
int genecount = 0;
while ((hgncIdline = hgncbr.readLine()) != null) {
String [] hgncParts = hgncIdline.split("\t");
if(genecount++ > 0) {
if(hgncParts.length > 1)
geneList.put(hgncParts[1], hgncParts[0]);
}
}
// ---- Custom GO File generated which removes the parent nodes
FileInputStream gostream = new FileInputStream("constrictedGo_rel.tsv");
DataInputStream goin = new DataInputStream(gostream);
BufferedReader gobr = new BufferedReader(new InputStreamReader(goin));
String goline;
while ((goline = gobr.readLine()) != null) {
String [] goParts = goline.split("\t");
if(goParts.length > 2)
goBPList.put(goParts[0], goParts[2]);
if(goParts.length > 3)
goCCList.put(goParts[0], goParts[3]);
if(goParts.length > 4)
goMFList.put(goParts[0], goParts[4]);
}
//- Test data for pairs of proteins of the format - EntrezID_A | UniprotID_A | EnsemblID_A | ChrStart_A | CHrStop_A | ChrLength_A | ChrNumber_A | ChrStrand_A | ... Repeat for B ... | Ideogram_A | Ideogram_B | Domains_A | Domains_B
FileInputStream stream = new FileInputStream("interactorsIdeogramDomainRef.tsv");
DataInputStream in = new DataInputStream(stream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
FileWriter foutstream = new FileWriter("interactorsIdeogramDomainRef_scores.tsv");
BufferedWriter out = new BufferedWriter(foutstream);
String pathLine;
int count = 0;
while ((pathLine = br.readLine()) != null) {
String [] strParts = pathLine.split("\t");
if(strParts.length < 20)
continue;
double ddScore = 0.0;
double ideoScore = 0.0;
double coExpressScore = 0.0;
int corumScore = 0, cooccurPub = 0, cooccurPath = 0, cooccurDis = 0;
int goBPScore = 0, goCCScore = 0, goMFScore = 0;
//--------------- Coexpress Processing here ------------------ //
/*PREFIX ppi: <http://data.bioinfo.deri.ie/>
PREFIX ns1: <http://coxpresdb.jp/rdf/def/0.1/>
PREFIX geneid: <http://bio2rdf.org/geneid:>
SELECT * WHERE {{
?coexpressInt ns1:gene_id_1 geneid:24147;
ns1:gene_id_2 geneid:23556;
ns1:mutual_rank ?rank
} UNION {
?coexpressInt ns1:gene_id_2 geneid:3280;
ns1:gene_id_1 geneid:23556;
ns1:mutual_rank ?rank
}
}*/
String coexpressQuery = "PREFIX ppi: <http://data.bioinfo.deri.ie/>" +
"PREFIX ns1: <http://coxpresdb.jp/rdf/def/0.1/>" +
"PREFIX geneid: <http://bio2rdf.org/geneid:>" +
"SELECT DISTINCT * WHERE {{" +
"?coexpressInt ns1:gene_id_1 geneid:"+strParts[0]+";ns1:gene_id_2 geneid:"+strParts[8]+";ns1:pcc ?pcc" +
"} UNION {" +
"?coexpressInt ns1:gene_id_2 geneid:"+strParts[8]+";ns1:gene_id_1 geneid:"+strParts[0]+";ns1:pcc ?pcc" +
"}}";
String coexpressQueryStr = "http://srvgal78.deri.ie/sparql/?default-graph-uri=&query="+URLEncoder.encode(coexpressQuery, "UTF-8")+ "&format=text%2Ftab-separated-values&timeout=0&debug=on";
String coexpressResult = httpGet(coexpressQueryStr);
String [] ceresultParts = coexpressResult.split("\t");
if(ceresultParts.length > 2) {
coExpressScore = Double.parseDouble(ceresultParts[3]);
}
// -------------------------- Corum Complexes processing ------- //
//<http://data.bioinfo.deri.ie/CORUM/1> <http://data.bioinfo.deri.ie/componets> <http://purl.uniprot.org/uniprot/P41182> , <http://purl.uniprot.org/uniprot/P56524>
String corumQuery = "SELECT (COUNT (DISTINCT ?s) AS ?complexes) WHERE {" +
"?s <http://data.bioinfo.deri.ie/componets> <http://purl.uniprot.org/uniprot/" + strParts[1] + "> , <http://purl.uniprot.org/uniprot/" + strParts[9] + "> ." +
"}";
String corumQueryStr = "http://srvgal78.deri.ie/sparql/?default-graph-uri=&query="+URLEncoder.encode(corumQuery, "UTF-8")+ "&format=text%2Ftab-separated-values&timeout=0&debug=on";
String corumResult = httpGet(corumQueryStr);
String [] corumParts = corumResult.split("\t");
corumScore = Integer.parseInt(corumParts[1]);
// -------------------------- GO processing includes only leaves ------- //
String goCC1 = goCCList.containsKey(strParts[1]) ? goCCList.get(strParts[1]) : "";
String goCC2 = goCCList.containsKey(strParts[9]) ? goCCList.get(strParts[9]) : "";
if(!(goCC1.isEmpty() || goCC2.isEmpty())) {
String [] goCC1parts = goCC1.split(";");
String [] goCC2parts = goCC2.split(";");
for(int cc1 = 0; cc1< goCC1parts.length; cc1++){
for(int cc2 = 0; cc2<goCC2parts.length; cc2++ ){
goCCScore += ((goCC1parts[cc1].matches(goCC2parts[cc2])) ? 1 : 0);
}
}
}
String goBP1 = goBPList.containsKey(strParts[1]) ? goBPList.get(strParts[1]) : "";
String goBP2 = goBPList.containsKey(strParts[9]) ? goBPList.get(strParts[9]) : "";
if(!(goBP1.isEmpty() || goBP2.isEmpty())) {
String [] goBP1parts = goBP1.split(";");
String [] goBP2parts = goBP2.split(";");
for(int bp1 = 0; bp1< goBP1parts.length; bp1++){
for(int bp2 = 0; bp2<goBP2parts.length; bp2++ ){
goBPScore += ((goBP1parts[bp1].matches(goBP2parts[bp2])) ? 1 : 0);
}
}
}
String goMF1 = goMFList.containsKey(strParts[1]) ? goMFList.get(strParts[1]) : "";
String goMF2 = goMFList.containsKey(strParts[9]) ? goMFList.get(strParts[9]) : "";
if(!(goMF1.isEmpty() || goMF2.isEmpty())) {
String [] goMF1parts = goMF1.split(";");
String [] goMF2parts = goMF2.split(";");
for(int mf1 = 0; mf1< goMF1parts.length; mf1++){
for(int mf2 = 0; mf2<goMF2parts.length; mf2++ ){
goMFScore += ((goMF1parts[mf1].matches(goMF2parts[mf2])) ? 1 : 0);
}
}
}
// ---------------------- Cooccurrence processing here ------------------------- //
if(geneList.containsKey(strParts[0]) && geneList.containsKey(strParts[8])) {
String connector = (geneList.get(strParts[0]) + "_" + geneList.get(strParts[8])).toLowerCase();
String connector1 = (geneList.get(strParts[8]) + "_" + geneList.get(strParts[0])).toLowerCase();
String coPathQuery = "SELECT * WHERE {" +
"GRAPH <http://tcga.deri.ie/pathwayconnectors/> { {" +
"<http://tcga.deri.ie/con:"+connector+"> a <http://tcga.deri.ie/schema/keggCon> ;" +
"<http://tcga.deri.ie/schema/weight> ?weight" +
"} UNION { " +
"<http://tcga.deri.ie/con:"+connector1+"> a <http://tcga.deri.ie/schema/keggCon> ;" +
"<http://tcga.deri.ie/schema/weight> ?weight" +
"}}}";
String coPathQueryURL = "http://srvgal78.deri.ie/sparql/?default-graph-uri=&query="+URLEncoder.encode(coPathQuery, "UTF-8")+ "&format=text%2Ftab-separated-values&timeout=0&debug=on";
String coPathResults = httpGet(coPathQueryURL);
String [] coPathResultsParts = coPathResults.split("\t");
if(coPathResultsParts.length > 1)
cooccurPath = Integer.parseInt(coPathResultsParts[1].substring(1,coPathResultsParts[1].length()-1));
String coPubQuery = "SELECT * WHERE {" +
"GRAPH <http://tcga.deri.ie/pubmedconnectors/> { {" +
"<http://tcga.deri.ie/con:"+connector+"> a <http://tcga.deri.ie/schema/pubmedCon> ;" +
"<http://tcga.deri.ie/schema/weight> ?weight" +
"} UNION { " +
"<http://tcga.deri.ie/con:"+connector1+"> a <http://tcga.deri.ie/schema/pubmedCon> ;" +
"<http://tcga.deri.ie/schema/weight> ?weight" +
"}}}";
String coPubQueryURL = "http://srvgal78.deri.ie/sparql/?default-graph-uri=&query="+URLEncoder.encode(coPubQuery, "UTF-8")+ "&format=text%2Ftab-separated-values&timeout=0&debug=on";
String coPubResults = httpGet(coPubQueryURL);
String [] coPubResultsParts = coPubResults.split("\t");
if(coPubResultsParts.length > 1)
cooccurPub = Integer.parseInt(coPubResultsParts[1].substring(1,coPubResultsParts[1].length()-1));
String coDisQuery = "SELECT * WHERE {" +
"GRAPH <http://tcga.deri.ie/diseaseconnectors/> { {" +
"<http://tcga.deri.ie/con:"+connector+"> a <http://tcga.deri.ie/schema/omimCon> ;" +
"<http://tcga.deri.ie/schema/weight> ?weight" +
"} UNION { " +
"<http://tcga.deri.ie/con:"+connector1+"> a <http://tcga.deri.ie/schema/omimCon> ;" +
"<http://tcga.deri.ie/schema/weight> ?weight" +
"}}}";
String coDisQueryURL = "http://srvgal78.deri.ie/sparql/?default-graph-uri=&query="+URLEncoder.encode(coDisQuery, "UTF-8")+ "&format=text%2Ftab-separated-values&timeout=0&debug=on";
String coDisResults = httpGet(coDisQueryURL);
String [] coDisResultsParts = coDisResults.split("\t");
if(coDisResultsParts.length > 1)
cooccurDis = Integer.parseInt(coDisResultsParts[1].substring(1,coDisResultsParts[1].length()-1));
}
//------- Ideogram Interaction processing here ---- //
if(! (strParts[16].isEmpty() || strParts[17].isEmpty())) {
/*
* SELECT * WHERE {{<http://data.bioinfo.deri.ie/IIinter:chr1_p13.3-chr11_q22.3> <http://data.bioinfo.deri.ie/inferredScore> ?inferredScore} UNION {<http://data.bioinfo.deri.ie/IIinter:chr11_q22.3-chr1_p13.3> <http://data.bioinfo.deri.ie/inferredScore> ?inferredScore
}}
*/
String ideogramInter = "chr" + strParts[6] + "_" +strParts[16] + "-chr" + strParts[14] + "_" + strParts[17];
String ideogramInter2 = "chr" + strParts[14] + "_" + strParts[17] + "-chr" + strParts[6] + "_" +strParts[16];
String query = "SELECT * WHERE {{ <http://data.bioinfo.deri.ie/IIinter:"+ideogramInter+"> <http://data.bioinfo.deri.ie/inferredScore> ?inferredScore " +
"} UNION { <http://data.bioinfo.deri.ie/IIinter:"+ideogramInter2+"> <http://data.bioinfo.deri.ie/inferredScore> ?inferredScore" +
"}}";
String urlStr = "http://srvgal78.deri.ie/sparql/?default-graph-uri=&query="+URLEncoder.encode(query, "UTF-8")+ "&format=text%2Ftab-separated-values&timeout=0&debug=on";
String result = httpGet(urlStr);
String [] resultParts = result.split("\t");
if(resultParts.length > 1) {
if(!resultParts[1].isEmpty()) {
ideoScore = Double.parseDouble(resultParts[1].substring(1,resultParts[1].length()-1));
}
}
}
//-------- Domain-Domain Interaction processing here -----/
if(! (strParts[18].matches("NA") || strParts[19].matches("NA")) ) {
String [] domain1Parts = strParts[18].split(";");
String [] domain2Parts = strParts[19].split(";");
for(int i = 0; i < domain1Parts.length ; i++){
for(int j = 0; j < domain2Parts.length ; j++){
/*
* SELECT ?inferredScore ?score WHERE { {<http://data.bioinfo.deri.ie/DDinter:PF04487_PF03299> a <http://data.bioinfo.deri.ie/DDinter> .
OPTIONAL {<http://data.bioinfo.deri.ie/DDinter:PF04487_PF03299> <http://data.bioinfo.deri.ie/score> ?score} .
OPTIONAL {<http://data.bioinfo.deri.ie/DDinter:PF04487_PF03299> <http://data.bioinfo.deri.ie/inferredScore> ?inferredScore } }
UNION {<http://data.bioinfo.deri.ie/DDinter:PF03299_PF04487> a <http://data.bioinfo.deri.ie/DDinter> .
OPTIONAL {<http://data.bioinfo.deri.ie/DDinter:PF03299_PF04487> <http://data.bioinfo.deri.ie/score> ?score} .
OPTIONAL {<http://data.bioinfo.deri.ie/DDinter:PF03299_PF04487> <http://data.bioinfo.deri.ie/inferredScore> ?inferredScore } }
}
*/
String query = "SELECT * WHERE { { <http://data.bioinfo.deri.ie/DDinter:"+domain1Parts[i]+"_"+domain2Parts[j]+"> a <http://data.bioinfo.deri.ie/DDinter> ."+
"OPTIONAL {<http://data.bioinfo.deri.ie/DDinter:"+domain1Parts[i]+"_"+domain2Parts[j]+"> <http://data.bioinfo.deri.ie/score> ?score} ."+
"OPTIONAL {<http://data.bioinfo.deri.ie/DDinter:"+domain1Parts[i]+"_"+domain2Parts[j]+"> <http://data.bioinfo.deri.ie/inferredScore> ?inferredScore }"+
"} UNION { " +
"<http://data.bioinfo.deri.ie/DDinter:"+domain2Parts[j]+"_"+domain1Parts[i]+"> a <http://data.bioinfo.deri.ie/DDinter> ."+
"OPTIONAL {<http://data.bioinfo.deri.ie/DDinter:"+domain2Parts[j]+"_"+domain1Parts[i]+"> <http://data.bioinfo.deri.ie/score> ?score} ." +
"OPTIONAL {<http://data.bioinfo.deri.ie/DDinter:"+domain2Parts[j]+"_"+domain1Parts[i]+"> <http://data.bioinfo.deri.ie/inferredScore> ?inferredScore}"+
"}}";
// query = http://srvgal78.deri.ie/sparql/?default-graph-uri=&query=+SELECT+*+WHERE+%7B%0D%0A%3Fs+a+%3Chttp%3A%2F%2Fdata.bioinfo.deri.ie%2FDDinter%3E+.%0D%0A%3Fs+%3Chttp%3A%2F%2Fdata.bioinfo.deri.ie%2FinferredScore%3E+%3Fo%0D%0A%7D&format=text%2Ftab-separated-values&timeout=0&debug=on
String urlStr = "http://srvgal78.deri.ie/sparql/?default-graph-uri=&query="+URLEncoder.encode(query, "UTF-8")+ "&format=text%2Ftab-separated-values&timeout=0&debug=on";
String result = httpGet(urlStr);
String [] resultParts = result.split("\t");
double ddMaxScore = 0.0;
for(int k = 2; k < resultParts.length; k++) {
double ddCurrentScore = 0.0;
if(!resultParts[k].isEmpty()) {
ddCurrentScore = Double.parseDouble(resultParts[k].substring(1,resultParts[k].length()-1));
}
if(ddCurrentScore > ddMaxScore)
ddMaxScore = ddCurrentScore;
}
ddScore += ddMaxScore;
}
}
}
/// ---- Write the scores to output file ----//
out.write(strParts[1] + "\t" + strParts[9] + "\t" + ddScore + "\t" + ideoScore + "\t" + coExpressScore + "\t" + corumScore + "\t" +
cooccurPub + "\t" + cooccurPath + "\t" + cooccurDis + "\t" + goBPScore + "\t" + goCCScore + "\t" + goMFScore + "\n");
System.out.println(count++);
// if(count > 5000)
// break;
}
in.close();
out.close();
}
public static String httpGet(String urlStr) {
try {
URL url = new URL(urlStr);
HttpURLConnection conn =
(HttpURLConnection) url.openConnection();
// conn.addRequestProperty("Accept", "application/xml");
if (conn.getResponseCode() != 200) {
return null;
// throw new IOException(conn.getResponseMessage());
}
// Buffer the result into a string
BufferedReader rd = new BufferedReader(
new InputStreamReader(conn.getInputStream()));
StringBuilder sb = new StringBuilder();
String line;
while ((line = rd.readLine()) != null) {
sb.append(line + "\t");
}
rd.close();
conn.disconnect();
return sb.toString();
} catch (IOException e) {
e.printStackTrace();
}
return "";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment