Skip to content

Instantly share code, notes, and snippets.

@padix-key
Created September 27, 2022 10:56
Show Gist options
  • Save padix-key/ecd5c5b494242d7eb1c4004e677d8191 to your computer and use it in GitHub Desktop.
Save padix-key/ecd5c5b494242d7eb1c4004e677d8191 to your computer and use it in GitHub Desktop.
Removal of incomplete residues
import numpy as np
import biotite.structure as struc
import biotite.structure.io.mmtf as mmtf
import biotite.structure.info as info
import biotite.database.rcsb as rcsb
def remove_incomplete_residues(atoms):
include_mask = np.zeros(atoms.array_length(), dtype=bool)
residue_starts = struc.get_residue_starts(atoms, add_exclusive_stop=True)
# Iterate over each residue in the input structure
for i in range(len(residue_starts) - 1):
start = residue_starts[i]
stop = residue_starts[i+1]
residue_length = stop - start
# Obtain the reference residue
# from Chemical Component Dictionary
ref_residue = info.residue(atoms.res_name[start])
ref_residue = ref_residue[ref_residue.element != "H"]
if stop != atoms.array_length():
# If this residue is not the final residue,
# the 'OXT' atom is missing due to the peptide bond
ref_residue = ref_residue[ref_residue.atom_name != "OXT"]
ref_length = ref_residue.array_length()
# Expect at least as many atoms in the residue as in the reference
# If this is not the case, at least one atom is missing and the
# entire residue is filtered out
if residue_length >= ref_length:
include_mask[start : stop] = True
atoms = atoms[include_mask]
return atoms
### Example code
atoms = mmtf.get_structure(
mmtf.MMTFFile.read(rcsb.fetch("1l2y", "mmtf")),
model=1
)
atoms = atoms[atoms.element != "H"]
print(len(atoms))
# All residues are complete -> no atom is filtered out
atoms = remove_incomplete_residues(atoms)
print(len(atoms))
# Remove a single atom...
atoms = atoms[1:]
# ... -> the whole residue is filtered out
atoms = remove_incomplete_residues(atoms)
print(len(atoms))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment