mnowotka/example.py

## example.py
# necessary imports first:
from chembl_webresource_client import *

# create resource object:
compounds = CompoundResource()

# before you do anythong else, please check webservices status:
print compounds.status()
True

# If you get 'False' instead, this could mean that your internet connection is broken,
# your settings are pointing to the wrong server or the server itself is down.

# get single compound by ChEMBL:
c = compounds.get('CHEMBL1')

# `c` is now a python dictionary:
print c
{u'smiles': u'COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56',
u'chemblId': u'CHEMBL1', u'passesRuleOfThree': u'No',
u'molecularWeight': 544.59, u'molecularFormula': u'C32H32O8',
u'acdLogp': 7.67, u'stdInChiKey': u'GHBOEFUAGSHXPO-XZOTUCIWSA-N',
u'knownDrug': u'No', u'medChemFriendly': u'Yes', u'rotatableBonds': 2,
u'alogp': 3.63, u'numRo5Violations': 1, u'acdLogd': 7.67}

# for those who prefer xml, this will return xml string:
# question for the reader: why we choose 'frmt' parameter name, instead of 'format'?
c = compounds.get('CHEMBL1', frmt='xml')
print c
"<?xml version='1.0' encoding='utf-8'?>\n<compound><smiles>COc1ccc2...</compound>"

# we can get compound by the standard inchi key as well:
c = compounds.get(stdinchikey='QFFGVLORLPOAEC-SNVBAGLBSA-N')
print c['molecularFormula']
'C19H21ClFN3O3'

# ... or by SMILES, in which case, a list of dictionaries will be returned:
cs = compounds.get(smiles='COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56')
print cs[0]['stdInChiKey']
'GHBOEFUAGSHXPO-UWXQAFAOSA-N'

# it's so easy to perform substructure search:
cs = compounds.substructure('COcccc')

# similarity search is super easy as well:
cs = compounds.similar_to('COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56', 70)

# OK, some more complex stuff now - let's get all compound of ids from CHEMBL1 to CHEMBL300:
cs = compounds.get(['CHEMBL%s' % x for x in range(1,301)])

# do it again and you will notice it's faster now - cache is working:
cs = compounds.get(['CHEMBL%s' % x for x in range(1,301)])

# lets see how many compounds we got back:
len(cs)
300

# hmm, this is strange beacuse there is no compound with chembID = CHEMBL300...
cs[299]
404

# OK, so for some compounds the server will return an error, e.g. 404 = compound does not exist
# This is fine, as we can remove the 'invalid' compounds with a filter:

valid_compounds = filter(lambda x: not isinstance(x, int), cs)
len(valid_compounds)
170

# new stuff is working as well, so you can get compound forms...:
print compounds.forms('CHEMBL415863')
[{u'chemblId': u'CHEMBL1207563', u'parent': True}, {u'chemblId': u'CHEMBL415863', u'parent': False}]

# ... and drug mechanisms:
print compounds.drug_mechnisms('CHEMBL1642')
[{u'chemblId': u'CHEMBL1862', ..., u'mechanismOfAction': u'Stem cell growth factor receptor inhibitor'}]

# displaying compound image looks like this:
from StringIO import StringIO
from PIL import Image

buf = StringIO()
buf.write(compounds.image('CHEMBL1'))
buf.seek(0)
im = Image.open(buf)
im.show()
	# necessary imports first:
	from chembl_webresource_client import *

	# create resource object:
	compounds = CompoundResource()

	# before you do anythong else, please check webservices status:
	print compounds.status()
	True

	# If you get 'False' instead, this could mean that your internet connection is broken,
	# your settings are pointing to the wrong server or the server itself is down.

	# get single compound by ChEMBL:
	c = compounds.get('CHEMBL1')

	# `c` is now a python dictionary:
	print c
	{u'smiles': u'COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56',
	u'chemblId': u'CHEMBL1', u'passesRuleOfThree': u'No',
	u'molecularWeight': 544.59, u'molecularFormula': u'C32H32O8',
	u'acdLogp': 7.67, u'stdInChiKey': u'GHBOEFUAGSHXPO-XZOTUCIWSA-N',
	u'knownDrug': u'No', u'medChemFriendly': u'Yes', u'rotatableBonds': 2,
	u'alogp': 3.63, u'numRo5Violations': 1, u'acdLogd': 7.67}

	# for those who prefer xml, this will return xml string:
	# question for the reader: why we choose 'frmt' parameter name, instead of 'format'?
	c = compounds.get('CHEMBL1', frmt='xml')
	print c
	"<?xml version='1.0' encoding='utf-8'?>\n<compound><smiles>COc1ccc2...</compound>"

	# we can get compound by the standard inchi key as well:
	c = compounds.get(stdinchikey='QFFGVLORLPOAEC-SNVBAGLBSA-N')
	print c['molecularFormula']
	'C19H21ClFN3O3'

	# ... or by SMILES, in which case, a list of dictionaries will be returned:
	cs = compounds.get(smiles='COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56')
	print cs[0]['stdInChiKey']
	'GHBOEFUAGSHXPO-UWXQAFAOSA-N'

	# it's so easy to perform substructure search:
	cs = compounds.substructure('COcccc')

	# similarity search is super easy as well:
	cs = compounds.similar_to('COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56', 70)

	# OK, some more complex stuff now - let's get all compound of ids from CHEMBL1 to CHEMBL300:
	cs = compounds.get(['CHEMBL%s' % x for x in range(1,301)])

	# do it again and you will notice it's faster now - cache is working:
	cs = compounds.get(['CHEMBL%s' % x for x in range(1,301)])

	# lets see how many compounds we got back:
	len(cs)
	300

	# hmm, this is strange beacuse there is no compound with chembID = CHEMBL300...
	cs[299]
	404

	# OK, so for some compounds the server will return an error, e.g. 404 = compound does not exist
	# This is fine, as we can remove the 'invalid' compounds with a filter:

	valid_compounds = filter(lambda x: not isinstance(x, int), cs)
	len(valid_compounds)
	170

	# new stuff is working as well, so you can get compound forms...:
	print compounds.forms('CHEMBL415863')
	[{u'chemblId': u'CHEMBL1207563', u'parent': True}, {u'chemblId': u'CHEMBL415863', u'parent': False}]

	# ... and drug mechanisms:
	print compounds.drug_mechnisms('CHEMBL1642')
	[{u'chemblId': u'CHEMBL1862', ..., u'mechanismOfAction': u'Stem cell growth factor receptor inhibitor'}]

	# displaying compound image looks like this:
	from StringIO import StringIO
	from PIL import Image

	buf = StringIO()
	buf.write(compounds.image('CHEMBL1'))
	buf.seek(0)
	im = Image.open(buf)
	im.show()