Last active
April 5, 2020 08:18
-
-
Save fabian-paul/1cac21668db608d4954fdbf81521bf1f to your computer and use it in GitHub Desktop.
Generate trajectories for computational alanine scan (mass generation of mutant structures)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import mdtraj | |
import os | |
import os.path | |
import argparse | |
def prepare_maps(mut_pdb='mutant.pdb', wt_pdb=None): | |
if wt_pdb is None: | |
wt_pdb = os.path.expanduser('~/system/crystal.pdb') | |
wt = mdtraj.load(wt_pdb) | |
mut = mdtraj.load(mut_pdb) | |
wt = wt.atom_slice(wt.top.select('protein')) | |
mut = mut.atom_slice(mut.top.select('protein')) | |
dest = [] | |
source = [] | |
# go throught list of unmodified residues and map atoms | |
for rw, rm in zip(wt.top.residues, mut.top.residues): | |
if rw.name == rm.name and rw.index == rm.index: | |
for aw,am in zip(rw.atoms, rm.atoms): | |
assert aw.name == am.name | |
i = aw.index | |
source.append(i) | |
j = am.index | |
dest.append(j) | |
# add mutated backbone to the map | |
away = None | |
for rw, rm in zip(wt.top.residues, mut.top.residues): | |
if rw.name != rm.name and rw.index == rm.index: | |
if away is None: | |
away = rm.index | |
else: | |
assert rm.index == away # allow only one mutation at the same time | |
for aw in rw.atoms: | |
i = aw.index | |
if aw.name in ['C','O','N','H','CA','HA','H1','H2','H3','OXT']: | |
j = next(am.index for am in rm.atoms if am.name == aw.name) | |
source.append(i) | |
dest.append(j) | |
assert away is not None # should have least one mutation | |
source = np.array(source) | |
dest = np.array(dest) | |
# find the indices of ALA-HB1 to ALA-HB3 in mutant topology | |
r_mut_ala = next(rm for rm in mut.top.residues if rm.index == away) | |
h_and_cb_indices = [] | |
for n in ['HB1', 'HB2', 'HB3', 'CB']: | |
h_and_cb_indices.append(next(a.index for a in r_mut_ala.atoms if a.name == n)) | |
h_and_cb_indices = np.array(h_and_cb_indices) | |
# find the indices of mutated-away backbone in wt topology | |
r_wt_away = next(rw for rw in wt.top.residues if rw.index == away) | |
bb_frame = [] | |
for n in ['C', 'CA', 'N', 'O', 'CB']: | |
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == n)) | |
# CG is kind of tough... | |
for n in ['CG', 'CG2', 'OG']: | |
if any(a.name == n for a in r_wt_away.atoms): | |
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == n)) | |
# find HB2 and HB3 positions | |
if r_wt_away.name=='THR': # THR is an exception | |
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'OG1')) | |
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'HB')) | |
if r_wt_away.name=='ILE' or r_wt_away.name=='VAL': | |
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'CG1')) | |
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'HB')) | |
else: | |
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'HB2')) | |
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'HB3')) | |
bb_frame = np.array(bb_frame) | |
# if we are mutating a proline, we have to place the bb hydrogen too ... | |
if wt.top.residue(away).name == 'PRO': | |
proline = True | |
PRO_H_index = next(am.index for am in mut.top.residue(away).atoms if am.name=='H') | |
PRO_N_CD = np.array([ | |
next(aw.index for aw in wt.top.residue(away).atoms if aw.name=='N'), | |
next(aw.index for aw in wt.top.residue(away).atoms if aw.name=='CD') | |
]) | |
else: | |
proline = False | |
PRO_H_index = None | |
PRO_N_CD = None | |
return source, dest, h_and_cb_indices, bb_frame, proline, PRO_H_index, PRO_N_CD, wt, mut | |
# positions of hydrogens in the coordiante system attached to the CA of alanine | |
# * z-axis is oriented along the CA-CB bond | |
# * x-axis is oriented along the direction from CA to GG orthogonalized w.r.t the z-axis (with Gautam method) | |
# * x-axis is oriented along along the bisectrix of the angle (N-CA-C) orthogonalized w.r.t the z-axis (with my method) | |
t = 1.91113553093 # the diamond angle 2*arctan(sqrt(2)) +/- Amber stupidity | |
c = 0.1526 # bond length CA-CB (Amber) | |
b = 0.109 # bond length CB-H (Amber) | |
sqrt_3 = np.sqrt(3.0) | |
H1 = np.array([b*np.sin(t), 0.0, -b*np.cos(t)]) | |
H2 = np.array([-0.5*b*np.sin(t), 0.5*sqrt_3*b*np.sin(t), -b*np.cos(t)]) | |
H3 = np.array([-0.5*b*np.sin(t), -0.5*sqrt_3*b*np.sin(t), -b*np.cos(t)]) | |
ceZ = np.array([0.0, 0.0, c]) | |
def normalized(v): | |
return v/np.linalg.norm(v) | |
def orthogonalized(v, w): | |
return v - v.dot(w)/w.dot(w)* w | |
def orthonormalized(v, w): | |
return normalized(orthogonalized(v, w)) | |
def compute(frame, massova=False, constant_angles=False, fixCB=False): | |
C, CA, N, O, CB, CG, HB2, HB3 = frame | |
O = np.zeros((3,3)) | |
vZ = normalized(CB-CA) | |
if constant_angles: | |
assert not fixCB, 'not implemented' | |
H1p = b*normalized(CG-CB) + CB | |
H2p = b*normalized(HB2-CB) + CB | |
H3p = b*normalized(HB3-CB) + CB | |
CBp = CB | |
return np.vstack((H1p, H2p, H3p, CBp)) | |
if massova: | |
vX = orthonormalized(CG-CB, vZ) | |
else: | |
cc = orthonormalized(C-CA, vZ) | |
n = orthonormalized(N-CA, vZ) | |
vX = normalized(cc+n) | |
vY = np.cross(vZ, vX) | |
assert np.cross(vX, vY).dot(vZ) > 0 | |
# O = map from the coordiante system attached to ALA-CA to the laboratory frame | |
O[:,0] = vX # remember: columns of a matrix are the images of the canonical unit vectors | |
O[:,1] = vY | |
O[:,2] = vZ | |
if fixCB: # everything relative to CA | |
H1p = O.dot(H1 + ceZ) + CA | |
H2p = O.dot(H2 + ceZ) + CA | |
H3p = O.dot(H3 + ceZ) + CA | |
CBp = O.dot(ceZ) + CA | |
else: # everything relative to CB | |
H1p = O.dot(H1) + CB | |
H2p = O.dot(H2) + CB | |
H3p = O.dot(H3) + CB | |
CBp = CB | |
return np.vstack((H1p, H2p, H3p, CBp)) | |
def place_PRO_hydrogen(frame): | |
k = 0.101 # bond length N-H in Amber | |
N,CD = frame | |
v = normalized(CD-N) | |
return N + k*v | |
if __name__=='__main__': | |
parser = argparse.ArgumentParser(description='mutate trajectory') | |
parser.add_argument('-i', '--trajectory', required=True, help='(input) trajectory', metavar='file') | |
parser.add_argument('-m', '--mutant_topology', required=True, help='(input) topology file for mutant', metavar='file') | |
parser.add_argument('-w', '--wild_type_topology', required=True, help='(input) topology file for wild type', metavar='file') | |
parser.add_argument('-o', '--outdir', default='.', help='directory where mutant trajectories are written to, default=.', metavar='path') | |
parser.add_argument('-g', '--massova', default=False, help='orient ALA methyl group like CG atom, default=false', action='store_true') | |
parser.add_argument('-b', '--fixcb', default=False, help='reset C-alpha-C-beta bond length to rest length, default=false', action='store_true') | |
parser.add_argument('-c', '--cangles', default=False, help='keep all angles and dihedrals, default=false', action='store_true') | |
parser.add_argument('-s', '--stride', default=1, help='apply a stride of n steps to the mutant trajectory default=1', metavar='steps') | |
# fixcb: place all hydrogens and CB to rest positions | |
# default: place all hydrogens to rest positions | |
# massova: like default but keep the rotation of the dihedral, reset angles and bond lengths | |
# cangles: keep all angles and dihedrals, only reset hydrogen bond lengths | |
args = parser.parse_args() | |
stride = int(args.stride) | |
source, dest, h_and_cb_indices, bb_frame, proline, PRO_H_index, PRO_N_CD, wt, mut = prepare_maps(mut_pdb=args.mutant_topology, wt_pdb=args.wild_type_topology) | |
# load trajectory | |
wt_traj = mdtraj.load(args.trajectory, top=wt.top).atom_slice(wt.top.select('protein')) | |
n_frames = len(wt_traj) | |
n_frames_out = ((n_frames - 1) // stride) + 1 | |
mut_traj = mdtraj.Trajectory(np.zeros((n_frames_out, mut.top.n_atoms, 3))*np.nan, mut.top, time=wt_traj.time[::stride], | |
unitcell_lengths=wt_traj.unitcell_lengths[::stride], unitcell_angles=wt_traj.unitcell_angles[::stride]) | |
if proline: | |
assert len(source)==len(dest) | |
assert len(source)+len(h_and_cb_indices)+1==mut.n_atoms | |
else: | |
assert len(source)==len(dest)==mut.n_atoms-len(h_and_cb_indices) | |
for t in range(n_frames_out): | |
mut_traj.xyz[t, dest] = wt_traj.xyz[t*stride, source] | |
mut_traj.xyz[t, h_and_cb_indices] = compute(wt_traj.xyz[t*stride, bb_frame], constant_angles=args.cangles, massova=args.massova, fixCB=args.fixcb) | |
if proline: | |
mut_traj.xyz[t, PRO_H_index] = place_PRO_hydrogen(wt_traj.xyz[t*stride, PRO_N_CD]) | |
assert not np.any(np.isnan(mut_traj.xyz)) | |
#mut_traj.save_xtc(args.outdir + os.sep + os.path.basename(args.trajectory)) | |
#mut_traj.save_netcdf(args.outdir + os.sep + os.path.splitext(os.path.basename(args.trajectory))[0] + '.nc') | |
mut_traj.save_dcd(args.outdir + os.sep + os.path.splitext(os.path.basename(args.trajectory))[0] + '.dcd') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment