Last active
August 29, 2015 14:01
-
-
Save mnowotka/8760a8a05ebb8ae688bb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from chembl_webresource_client import * | |
# For some special SMILES, using GET method against webservices will end up with failure: | |
res = requests.get('https://www.ebi.ac.uk/chemblws/compounds/smiles/CN1C\C(=C/c2ccc(C)cc2)\C3=C(C1)C(C(=C(N)O3)C#N)c4ccc(C)cc4') | |
print res.ok | |
False | |
print res.status_code | |
400 | |
# This is because the SMILES contain slash '/' character, which has a special meaning in URL. | |
# The solution to this problem is to use POST request, which will look like this: | |
res = requests.post('https://www.ebi.ac.uk/chemblws/compounds/smiles', data={'smiles':'CN1C\C(=C/c2ccc(C)cc2)\C3=C(C1)C(C(=C(N)O3)C#N)c4ccc(C)cc4'}, headers={'Accept':'application/xml'})� | |
print res.ok | |
True | |
print res.content | |
"<?xml version='1.0' encoding='utf-8'?>\n<list><compound><smiles>CN1C\\C(=C/c2ccc(C)cc2)\\C3=C(C1)C(C(=C(N)O3)C#N)c4ccc(C)cc4</smiles>..." | |
# In that case maybe it's better to use POST for all requests to ChEMBL? | |
# Unfortunately (currently) there are only a few methods (search by SMILES, substructure and similarity serach) | |
# which support POST. Besides, GET requests contain all necessary data in the URL and such an URL can be embedded | |
# on a website, send by email or chat, so in some cases they are more useful then POST. | |
# But if we have a large number of SMILES, deciding whether GET or POST should be used for each one of them can be | |
# problematic. Our client can transparently handle this problem: | |
compounds = CompoundResource() | |
# Getting compound by SMILES containing only safe characters... | |
cs = compounds.get(smiles='COc1ccc2[C@@H]3[C@H](COc2c1)C(C)(C)OC4=C3C(=O)C(=O)C5=C4OC(C)(C)[C@@H]6COc7cc(OC)ccc7[C@H]56') | |
print cs[0]['molecularFormula'] | |
'C32H32O8' | |
# ...looks exaclty the same as using SMILES with some unsafe characters such as slash: | |
cs = compounds.get(smiles="C\C(=C/C=C/C(=C/C(=O)O)/C)\C=C\C1=C(C)CCCC1(C)C") | |
print cs[0]['preferredCompoundName'] | |
'MMAOIAFUZKMAOY-UHFFFAOYSA-N' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment