public
Created

Evaluate fingerprint accuracy ...

  • Download Gist
FPAccuracy.java
Java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
 
package net.sf.cdk.tools;
 
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.fingerprint.ExtendedFingerprinter;
import org.openscience.cdk.fingerprint.Fingerprinter;
import org.openscience.cdk.fingerprint.FingerprinterTool;
import org.openscience.cdk.fingerprint.IFingerprinter;
import org.openscience.cdk.fingerprint.GraphOnlyFingerprinter;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.smiles.SmilesParser;
 
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.BitSet;
 
/**
* Testing accuracy of CDK fingerprints.
*
* @author Rajarshi Guha
*/
public class FPAccuracy {
SmilesParser sp;
 
public FPAccuracy() {
sp = new SmilesParser(DefaultChemObjectBuilder.getInstance());
}
 
private IAtomContainer getMolecule(String smi) throws CDKException {
IAtomContainer frag = sp.parseSmiles(smi);
CDKHueckelAromaticityDetector.detectAromaticity(frag);
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(frag);
return frag;
}
 
private void process(IFingerprinter fp, String ofile) throws IOException, CDKException {
BufferedReader reader = new BufferedReader(new FileReader("/Users/guhar/work/cdkfp/small_molecule-std.fragments2"));
BufferedWriter writer = new BufferedWriter(new FileWriter(ofile));
 
String line;
int n = 0;
while ((line = reader.readLine()) != null) {
String[] toks = line.trim().split("\\s+");
String fragsmi = toks[0];
String parentsmi = toks[1];
 
BitSet frag = fp.getFingerprint(getMolecule(fragsmi));
BitSet parent = fp.getFingerprint(getMolecule(parentsmi));
boolean issubset = FingerprinterTool.isSubset(parent, frag);
writer.write(issubset+"\n");
 
n++;
if (n % 100 == 0) {
System.out.print("Processed "+n+"\n");
}
}
writer.close();
reader.close();
}
 
private void checkStandard() throws IOException, CDKException {
IFingerprinter fp = new Fingerprinter();
process(fp, "/Users/guhar/fpacc.std");
}
 
private void checkExtended() throws IOException, CDKException {
IFingerprinter fp = new ExtendedFingerprinter();
process(fp, "/Users/guhar/fpacc.ext");
}
 
private void checkGraphOnly() throws IOException, CDKException {
IFingerprinter fp = new GraphOnlyFingerprinter();
process(fp, "/Users/guhar/fpacc.graph");
}
 
public static void main(String[] args) throws IOException, CDKException {
FPAccuracy fpa = new FPAccuracy();
fpa.checkExtended();
}
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.