Skip to content

Instantly share code, notes, and snippets.

@evanfeinberg
Last active August 29, 2015 14:20
Show Gist options
  • Save evanfeinberg/b6f9c8bc6c54f98d09dc to your computer and use it in GitHub Desktop.
Save evanfeinberg/b6f9c8bc6c54f98d09dc to your computer and use it in GitHub Desktop.
custom dihedral featurizer for naughty pdb files
'''I should formalize this with a class, etc., but here's how one can use this:
with a file corresponding to a trajectory you want to featurize, call:
featurize_custom(traj_file)
'''
def fix_topology(topology):
new_top = topology.copy()
residues = {}
for chain in new_top.chains:
#print chain
for residue in chain.residues:
resname = str(residue)
if resname in residues.keys():
residues[resname].append(residue)
else:
residues[resname] = [residue]
for resname in residues.keys():
fragments = residues[resname]
if len(fragments) > 1:
main_fragment = fragments[0]
new_atom_list = []
new_atom_list += main_fragment._atoms
for i in range(1,len(fragments)):
fragment = fragments[i]
for atom in fragment.atoms:
atom.residue = main_fragment
new_atom_list += fragment._atoms
fragment._atoms = []
fragment.chain = main_fragment.chain
main_fragment._atoms = new_atom_list
return new_top
def phi_indices(top):
graph = top.to_bondgraph()
c_atoms = [(a, a.residue.resSeq) for a in top.atoms if a.name == "C"]
c_atoms.sort(key=operator.itemgetter(1))
c_atoms = [c_atom[0] for c_atom in c_atoms]
#print("%d C atoms" %len(c_atoms))
phi_tuples = []
for c in c_atoms:
n = None
ca = None
next_c = None
c_index = c.index
c_neighbors = graph.edge[c].keys()
for c_neighbor in c_neighbors:
if c_neighbor.name == "N":
n = c_neighbor
break
if n != None:
n_neighbors = graph.edge[n].keys()
for n_neighbor in n_neighbors:
if n_neighbor.name == "CA":
ca = n_neighbor
break
if ca != None:
ca_neighbors = graph.edge[ca].keys()
for ca_neighbor in ca_neighbors:
if ca_neighbor.name == "C":
next_c = ca_neighbor
break
if n != None and ca != None and next_c != None:
phi_tuples.append((c.index, n.index, ca.index, next_c.index))
#print("phi angles = %d" %len(phi_tuples))
return phi_tuples
def psi_indices(top):
graph = top.to_bondgraph()
n_atoms = [(a, a.residue.resSeq) for a in top.atoms if a.name == "N"]
n_atoms.sort(key=operator.itemgetter(1))
n_atoms = [n_atom[0] for n_atom in n_atoms]
psi_tuples = []
for n in n_atoms:
c = None
ca = None
next_n = None
n_index = n.index
n_neighbors = graph.edge[n].keys()
for n_neighbor in n_neighbors:
if n_neighbor.name == "CA":
ca = n_neighbor
break
if ca != None:
ca_neighbors = graph.edge[ca].keys()
for ca_neighbor in ca_neighbors:
if ca_neighbor.name == "C":
c = ca_neighbor
break
if c != None:
c_neighbors = graph.edge[c].keys()
for c_neighbor in c_neighbors:
if c_neighbor.name == "N":
next_n = c_neighbor
break
if c != None and ca != None and next_n != None:
psi_tuples.append((n.index, c.index, ca.index, next_n.index))
#print("psi angles = %d " %len(psi_tuples))
return psi_tuples
def chi2_indices(top):
seq1 = ('CA', 'CB', 'CG', 'CD')
seq2 = ('CA', 'CB', 'CG', 'OD1')
seq3 = ('CA', 'CB', 'CG', 'ND1')
seq4 = ('CA', 'CB', 'CG1', 'CD1')
seq5 = ('CA', 'CB', 'CG,' 'SD')
term_4 = ('CD', 'OD1', 'ND1', 'CD1', 'SD')
top = fix_topology(top)
residues = [(res, res.resSeq) for res in top.residues]
residues.sort(key=operator.itemgetter(1))
residues = [res[0] for res in residues]
chi2_tuples = []
for residue in residues:
dihedral = [None, None, None, None]
for atom in residue.atoms:
if atom.name == 'CA': dihedral[0] = atom.index
if atom.name == 'CB': dihedral[1] = atom.index
if atom.name == 'CG' or atom.name == 'CG1': dihedral[2] = atom.index
if atom.name in term_4: dihedral[3] = atom.index
if None not in dihedral:
dihedral = tuple(dihedral)
chi2_tuples.append(dihedral)
return chi2_tuples
def read_and_featurize_custom(traj_file, condition=None, location=None):
top = md.load_frame(traj_file,index = 0).topology
atom_indices = [a.index for a in top.atoms]
traj = md.load(traj_file, atom_indices=atom_indices)
phi_tuples = phi_indices(traj.topology)
psi_tuples = psi_indices(traj.topology)
chi2_tuples = chi2_indices(traj.topology)
phi_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=phi_tuples))
psi_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=psi_tuples))
chi2_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=chi2_tuples))
manual_features = np.concatenate([np.sin(phi_angles), np.cos(phi_angles), np.sin(psi_angles), np.cos(psi_angles), np.sin(chi2_angles), np.cos(chi2_angles)])
if condition is None:
condition = get_condition(traj_file)
if location is None:
location = "/scratch/users/enf/b2ar_analysis/features_allprot"
verbosedump(manual_features, "%s/%s.h5" %(location, condition))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment