Last active
August 29, 2015 14:20
-
-
Save evanfeinberg/b6f9c8bc6c54f98d09dc to your computer and use it in GitHub Desktop.
custom dihedral featurizer for naughty pdb files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''I should formalize this with a class, etc., but here's how one can use this: | |
with a file corresponding to a trajectory you want to featurize, call: | |
featurize_custom(traj_file) | |
''' | |
def fix_topology(topology): | |
new_top = topology.copy() | |
residues = {} | |
for chain in new_top.chains: | |
#print chain | |
for residue in chain.residues: | |
resname = str(residue) | |
if resname in residues.keys(): | |
residues[resname].append(residue) | |
else: | |
residues[resname] = [residue] | |
for resname in residues.keys(): | |
fragments = residues[resname] | |
if len(fragments) > 1: | |
main_fragment = fragments[0] | |
new_atom_list = [] | |
new_atom_list += main_fragment._atoms | |
for i in range(1,len(fragments)): | |
fragment = fragments[i] | |
for atom in fragment.atoms: | |
atom.residue = main_fragment | |
new_atom_list += fragment._atoms | |
fragment._atoms = [] | |
fragment.chain = main_fragment.chain | |
main_fragment._atoms = new_atom_list | |
return new_top | |
def phi_indices(top): | |
graph = top.to_bondgraph() | |
c_atoms = [(a, a.residue.resSeq) for a in top.atoms if a.name == "C"] | |
c_atoms.sort(key=operator.itemgetter(1)) | |
c_atoms = [c_atom[0] for c_atom in c_atoms] | |
#print("%d C atoms" %len(c_atoms)) | |
phi_tuples = [] | |
for c in c_atoms: | |
n = None | |
ca = None | |
next_c = None | |
c_index = c.index | |
c_neighbors = graph.edge[c].keys() | |
for c_neighbor in c_neighbors: | |
if c_neighbor.name == "N": | |
n = c_neighbor | |
break | |
if n != None: | |
n_neighbors = graph.edge[n].keys() | |
for n_neighbor in n_neighbors: | |
if n_neighbor.name == "CA": | |
ca = n_neighbor | |
break | |
if ca != None: | |
ca_neighbors = graph.edge[ca].keys() | |
for ca_neighbor in ca_neighbors: | |
if ca_neighbor.name == "C": | |
next_c = ca_neighbor | |
break | |
if n != None and ca != None and next_c != None: | |
phi_tuples.append((c.index, n.index, ca.index, next_c.index)) | |
#print("phi angles = %d" %len(phi_tuples)) | |
return phi_tuples | |
def psi_indices(top): | |
graph = top.to_bondgraph() | |
n_atoms = [(a, a.residue.resSeq) for a in top.atoms if a.name == "N"] | |
n_atoms.sort(key=operator.itemgetter(1)) | |
n_atoms = [n_atom[0] for n_atom in n_atoms] | |
psi_tuples = [] | |
for n in n_atoms: | |
c = None | |
ca = None | |
next_n = None | |
n_index = n.index | |
n_neighbors = graph.edge[n].keys() | |
for n_neighbor in n_neighbors: | |
if n_neighbor.name == "CA": | |
ca = n_neighbor | |
break | |
if ca != None: | |
ca_neighbors = graph.edge[ca].keys() | |
for ca_neighbor in ca_neighbors: | |
if ca_neighbor.name == "C": | |
c = ca_neighbor | |
break | |
if c != None: | |
c_neighbors = graph.edge[c].keys() | |
for c_neighbor in c_neighbors: | |
if c_neighbor.name == "N": | |
next_n = c_neighbor | |
break | |
if c != None and ca != None and next_n != None: | |
psi_tuples.append((n.index, c.index, ca.index, next_n.index)) | |
#print("psi angles = %d " %len(psi_tuples)) | |
return psi_tuples | |
def chi2_indices(top): | |
seq1 = ('CA', 'CB', 'CG', 'CD') | |
seq2 = ('CA', 'CB', 'CG', 'OD1') | |
seq3 = ('CA', 'CB', 'CG', 'ND1') | |
seq4 = ('CA', 'CB', 'CG1', 'CD1') | |
seq5 = ('CA', 'CB', 'CG,' 'SD') | |
term_4 = ('CD', 'OD1', 'ND1', 'CD1', 'SD') | |
top = fix_topology(top) | |
residues = [(res, res.resSeq) for res in top.residues] | |
residues.sort(key=operator.itemgetter(1)) | |
residues = [res[0] for res in residues] | |
chi2_tuples = [] | |
for residue in residues: | |
dihedral = [None, None, None, None] | |
for atom in residue.atoms: | |
if atom.name == 'CA': dihedral[0] = atom.index | |
if atom.name == 'CB': dihedral[1] = atom.index | |
if atom.name == 'CG' or atom.name == 'CG1': dihedral[2] = atom.index | |
if atom.name in term_4: dihedral[3] = atom.index | |
if None not in dihedral: | |
dihedral = tuple(dihedral) | |
chi2_tuples.append(dihedral) | |
return chi2_tuples | |
def read_and_featurize_custom(traj_file, condition=None, location=None): | |
top = md.load_frame(traj_file,index = 0).topology | |
atom_indices = [a.index for a in top.atoms] | |
traj = md.load(traj_file, atom_indices=atom_indices) | |
phi_tuples = phi_indices(traj.topology) | |
psi_tuples = psi_indices(traj.topology) | |
chi2_tuples = chi2_indices(traj.topology) | |
phi_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=phi_tuples)) | |
psi_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=psi_tuples)) | |
chi2_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=chi2_tuples)) | |
manual_features = np.concatenate([np.sin(phi_angles), np.cos(phi_angles), np.sin(psi_angles), np.cos(psi_angles), np.sin(chi2_angles), np.cos(chi2_angles)]) | |
if condition is None: | |
condition = get_condition(traj_file) | |
if location is None: | |
location = "/scratch/users/enf/b2ar_analysis/features_allprot" | |
verbosedump(manual_features, "%s/%s.h5" %(location, condition)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment