Skip to content

Instantly share code, notes, and snippets.

@leelasd
Forked from PatWalters/split_complex_v2.py
Created November 29, 2018 19:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leelasd/06befb47a5309bead05bc46c6ac9ef25 to your computer and use it in GitHub Desktop.
Save leelasd/06befb47a5309bead05bc46c6ac9ef25 to your computer and use it in GitHub Desktop.
An improved script to extract a ligand from a protein-ligand complex and assign bond orders
#!/usr/bin/env python
import sys
from prody import *
from rdkit import Chem
from rdkit.Chem import AllChem
from io import StringIO
import pypdb
def get_pdb_components(pdb_id):
"""
Split a protein-ligand pdb into protein and ligand components
:param pdb_id:
:return:
"""
pdb = parsePDB(pdb_id)
protein = pdb.select('protein')
ligand = pdb.select('not protein and not water')
return protein, ligand
def process_ligand(ligand, res_name):
"""
Add bond orders to a pdb ligand
1. Select the ligand component with name "res_name"
2. Get the corresponding SMILES from pypdb
3. Create a template molecule from the SMILES in step 2
4. Write the PDB file to a stream
5. Read the stream into an RDKit molecule
6. Assign the bond orders from the template from step 3
:param ligand: ligand as generated by prody
:param res_name: residue name of ligand to extract
:return: molecule with bond orders assigned
"""
output = StringIO()
sub_mol = ligand.select(f"resname {res_name}")
chem_desc = pypdb.describe_chemical(f"{res_name}")
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
template = AllChem.MolFromSmiles(sub_smiles)
writePDBStream(output, sub_mol)
pdb_string = output.getvalue()
rd_mol = AllChem.MolFromPDBBlock(pdb_string)
new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol)
return new_mol
def write_pdb(protein, pdb_name):
"""
Write a prody protein to a pdb file
:param protein: protein object from prody
:param pdb_name: base name for the pdb file
:return: None
"""
output_pdb_name = f"{pdb_name}_protein.pdb"
writePDB(f"{output_pdb_name}", protein)
print(f"wrote {output_pdb_name}")
def write_sdf(new_mol, pdb_name, res_name):
"""
Write an RDKit molecule to an SD file
:param new_mol:
:param pdb_name:
:param res_name:
:return:
"""
outfile_name = f"{pdb_name}_{res_name}_ligand.sdf"
writer = Chem.SDWriter(f"{outfile_name}")
writer.write(new_mol)
print(f"wrote {outfile_name}")
def main(pdb_name):
"""
Read Ligand Expo data, split pdb into protein and ligands,
write protein pdb, write ligand sdf files
:param pdb_name: id from the pdb, doesn't need to have an extension
:return:
"""
protein, ligand = get_pdb_components(pdb_name)
write_pdb(protein, pdb_name)
res_name_list = list(set(ligand.getResnames()))
for res in res_name_list:
new_mol = process_ligand(ligand, res)
write_sdf(new_mol, pdb_name, res)
if __name__ == "__main__":
if len(sys.argv) == 2:
main(sys.argv[1])
else:
print("Usage: {sys.argv[1]} pdb_id", file=sys.stderr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment