Created
November 27, 2010 17:32
-
-
Save rajarshi/718099 to your computer and use it in GitHub Desktop.
Evaluate fingerprint accuracy ...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.sf.cdk.tools; | |
import org.openscience.cdk.DefaultChemObjectBuilder; | |
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; | |
import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector; | |
import org.openscience.cdk.exception.CDKException; | |
import org.openscience.cdk.fingerprint.ExtendedFingerprinter; | |
import org.openscience.cdk.fingerprint.Fingerprinter; | |
import org.openscience.cdk.fingerprint.FingerprinterTool; | |
import org.openscience.cdk.fingerprint.IFingerprinter; | |
import org.openscience.cdk.fingerprint.GraphOnlyFingerprinter; | |
import org.openscience.cdk.interfaces.IAtomContainer; | |
import org.openscience.cdk.smiles.SmilesParser; | |
import java.io.BufferedReader; | |
import java.io.BufferedWriter; | |
import java.io.FileReader; | |
import java.io.FileWriter; | |
import java.io.IOException; | |
import java.util.BitSet; | |
/** | |
* Testing accuracy of CDK fingerprints. | |
* | |
* @author Rajarshi Guha | |
*/ | |
public class FPAccuracy { | |
SmilesParser sp; | |
public FPAccuracy() { | |
sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); | |
} | |
private IAtomContainer getMolecule(String smi) throws CDKException { | |
IAtomContainer frag = sp.parseSmiles(smi); | |
CDKHueckelAromaticityDetector.detectAromaticity(frag); | |
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(frag); | |
return frag; | |
} | |
private void process(IFingerprinter fp, String ofile) throws IOException, CDKException { | |
BufferedReader reader = new BufferedReader(new FileReader("/Users/guhar/work/cdkfp/small_molecule-std.fragments2")); | |
BufferedWriter writer = new BufferedWriter(new FileWriter(ofile)); | |
String line; | |
int n = 0; | |
while ((line = reader.readLine()) != null) { | |
String[] toks = line.trim().split("\\s+"); | |
String fragsmi = toks[0]; | |
String parentsmi = toks[1]; | |
BitSet frag = fp.getFingerprint(getMolecule(fragsmi)); | |
BitSet parent = fp.getFingerprint(getMolecule(parentsmi)); | |
boolean issubset = FingerprinterTool.isSubset(parent, frag); | |
writer.write(issubset+"\n"); | |
n++; | |
if (n % 100 == 0) { | |
System.out.print("Processed "+n+"\n"); | |
} | |
} | |
writer.close(); | |
reader.close(); | |
} | |
private void checkStandard() throws IOException, CDKException { | |
IFingerprinter fp = new Fingerprinter(); | |
process(fp, "/Users/guhar/fpacc.std"); | |
} | |
private void checkExtended() throws IOException, CDKException { | |
IFingerprinter fp = new ExtendedFingerprinter(); | |
process(fp, "/Users/guhar/fpacc.ext"); | |
} | |
private void checkGraphOnly() throws IOException, CDKException { | |
IFingerprinter fp = new GraphOnlyFingerprinter(); | |
process(fp, "/Users/guhar/fpacc.graph"); | |
} | |
public static void main(String[] args) throws IOException, CDKException { | |
FPAccuracy fpa = new FPAccuracy(); | |
fpa.checkExtended(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment