sunhwan/align.py

## align.py
#!/usr/bin/env python

import numpy as np

def usage():
    print """
    This script calculates C-alpha RMSD between 2 PDB structures,
    and gererates optimal superposition of the 2nd structure to the
    1st structure.

    Transmembrane proteins are often aligned along the Z-axis, because,
    conventionally, the bilayer normal is parallel to the Z-axis. To
    preserve the orientation of protein with respect to the bilayer,
    the optimal rotation matrix will be constructed in a way that the
    rotation will be carried along the Z-axis only.

    Usage: python align.py pdb-file1 pdb-file2 selection

    selection: string of residue number separated by colon, e.g. 10:40.
    C-alpha atoms of selected residues will be used for alignment. Currently,
    only a PDB that contains single chain can be used.
    """

class Atom:
    def __init__(self, line):
        self.id = int(line[6:11].strip())
        self.name = line[11:16].strip().upper()
        self.resname = line[17:21].strip()
        self.chain = line[21:22].strip()
        self.x = float(line[30:38])
        self.y = float(line[38:46])
        self.z = float(line[46:54])
        self.alt = line[16]
        resid = line[22:27].strip()
        type = line[77:80].strip()
        try:
            self.resid = int(resid)
        except:
            self.resid = int(resid[:-1])
        if not type: type = self.name[0]
        self.type = type

    def __getitem__(self, key):
        return self.__dict__[key]

class PDB:

    def __init__(self, pdbfile):
        self.residues = {}
        self.atoms = []
        self._parse(pdbfile)

    def _parse(self, pdbfile):
        for line in open(pdbfile, 'r').readlines():
            if line.startswith('ATOM'):
                atom = Atom(line)
                self.atoms.append(atom)
                if not self.residues.has_key(atom.resid):
                    self.residues[atom.resid] = {}
                self.residues[atom.resid][atom.name] = atom

    def get_com(self, selection):
        atoms = self.get_selection(selection)
        com = np.sum([(atom.x, atom.y, atom.z) for atom in atoms], axis=0)/len(atoms)
        return com

    def orient(self, selection):
        atoms = self.get_selection(selection)
        com = np.sum([(atom.x, atom.y, atom.z) for atom in atoms], axis=0)/len(atoms)

        for i in self.residues.keys():
            for j in self.residues[i].keys():
                self.residues[i][j].x -= com[0]
                self.residues[i][j].y -= com[1]
                self.residues[i][j].z -= com[2]

    def translate(self, dist):
        for atom in self.atoms:
            atom.x += dist[0]
            atom.y += dist[1]
            atom.z += dist[2]

    def get_selection(self, selection):
        selections = []
        for rid in range(selection[0], selection[1]+1):
            selections.append(self.residues[rid]['CA'])
            selections.append(self.residues[rid]['N'])
            selections.append(self.residues[rid]['O'])
            selections.append(self.residues[rid]['C'])
        return selections

    def get_crds(self, selection):
        atoms = self.get_selection(selection)
        crds = np.zeros((len(atoms), 3), float)
        for i, a in enumerate(atoms):
            crds[i,0] = a.x
            crds[i,1] = a.y
            crds[i,2] = a.z
        return crds

    def transform(self, matrix):
        for atom in self.atoms:
            pos = (atom.x, atom.y, atom.z)
            atom.x = matrix[0,0] * pos[0] + matrix[0,1] * pos[1] + matrix[0,2] * pos[2]
            atom.y = matrix[1,0] * pos[0] + matrix[1,1] * pos[1] + matrix[1,2] * pos[2]
            atom.z = matrix[2,0] * pos[0] + matrix[2,1] * pos[1] + matrix[2,2] * pos[2]

    def write_pdb(self, filename):
        fp = open(filename, 'w')
        for atom in self.atoms:
            atomdata = (atom.id, atom.name, atom.resname, atom.resid, atom.x, atom.y, atom.z, 0.0, 0.0)
            if len(atom.name) == 4:
                fp.write("ATOM %6i %4s%5s%5i    %8.3f%8.3f%8.3f%6.2f%6.2f\n" % atomdata)
            else:
                fp.write("ATOM %6i  %3s%5s%5i    %8.3f%8.3f%8.3f%6.2f%6.2f\n" % atomdata)
        fp.close()

if __name__ == '__main__':
    import sys
    try:
        pdbfile1 = sys.argv[1]
        pdbfile2 = sys.argv[2]
        selection = map(int, sys.argv[3].split(':'))
    except:
        usage()
        sys.exit(0)

    pdb1 = PDB(pdbfile1)
    com1 = pdb1.get_com(selection)
    pdb1.translate((-com1[0], -com1[1], 0))
    crds1 = pdb1.get_crds(selection)

    pdb2 = PDB(pdbfile2)
    com2 = pdb2.get_com(selection)
    pdb2.translate((-com2[0], -com2[1], 0))
    crds2 = pdb2.get_crds(selection)

    # calculate rotation matrix
    correlation_matrix = np.dot(np.transpose(crds1), crds2)
    v, s, w = np.linalg.svd(correlation_matrix)
    is_reflection = (np.linalg.det(np.transpose(v)) * np.linalg.det(np.transpose(w))) < 0.0
    if is_reflection:
        v[-1,:] = -v[-1,:]
    u = np.dot(v, w)

    psi = -np.arctan2(u[0,2], u[1,2])
    z = np.zeros(9).reshape(3,3)
    z[2,2] = 1
    z[0,0] = z[1,1] = np.cos(psi)
    z[0,1] = -np.sin(psi)
    z[1,0] = np.sin(psi)

    if u[0,1] > 0 and z[0,1] < 0:
        z[0,1] = -z[0,1]
        z[1,0] = -z[1,0]

    pdb2.transform(u)
    pdb2.translate((com1[0], com1[1], 0))
    pdb2.write_pdb('xxx.pdb')
	#!/usr/bin/env python

	import numpy as np

	def usage():
	print """
	This script calculates C-alpha RMSD between 2 PDB structures,
	and gererates optimal superposition of the 2nd structure to the
	1st structure.

	Transmembrane proteins are often aligned along the Z-axis, because,
	conventionally, the bilayer normal is parallel to the Z-axis. To
	preserve the orientation of protein with respect to the bilayer,
	the optimal rotation matrix will be constructed in a way that the
	rotation will be carried along the Z-axis only.

	Usage: python align.py pdb-file1 pdb-file2 selection

	selection: string of residue number separated by colon, e.g. 10:40.
	C-alpha atoms of selected residues will be used for alignment. Currently,
	only a PDB that contains single chain can be used.
	"""

	class Atom:
	def __init__(self, line):
	self.id = int(line[6:11].strip())
	self.name = line[11:16].strip().upper()
	self.resname = line[17:21].strip()
	self.chain = line[21:22].strip()
	self.x = float(line[30:38])
	self.y = float(line[38:46])
	self.z = float(line[46:54])
	self.alt = line[16]
	resid = line[22:27].strip()
	type = line[77:80].strip()
	try:
	self.resid = int(resid)
	except:
	self.resid = int(resid[:-1])
	if not type: type = self.name[0]
	self.type = type

	def __getitem__(self, key):
	return self.__dict__[key]

	class PDB:

	def __init__(self, pdbfile):
	self.residues = {}
	self.atoms = []
	self._parse(pdbfile)

	def _parse(self, pdbfile):
	for line in open(pdbfile, 'r').readlines():
	if line.startswith('ATOM'):
	atom = Atom(line)
	self.atoms.append(atom)
	if not self.residues.has_key(atom.resid):
	self.residues[atom.resid] = {}
	self.residues[atom.resid][atom.name] = atom

	def get_com(self, selection):
	atoms = self.get_selection(selection)
	com = np.sum([(atom.x, atom.y, atom.z) for atom in atoms], axis=0)/len(atoms)
	return com

	def orient(self, selection):
	atoms = self.get_selection(selection)
	com = np.sum([(atom.x, atom.y, atom.z) for atom in atoms], axis=0)/len(atoms)

	for i in self.residues.keys():
	for j in self.residues[i].keys():
	self.residues[i][j].x -= com[0]
	self.residues[i][j].y -= com[1]
	self.residues[i][j].z -= com[2]

	def translate(self, dist):
	for atom in self.atoms:
	atom.x += dist[0]
	atom.y += dist[1]
	atom.z += dist[2]

	def get_selection(self, selection):
	selections = []
	for rid in range(selection[0], selection[1]+1):
	selections.append(self.residues[rid]['CA'])
	selections.append(self.residues[rid]['N'])
	selections.append(self.residues[rid]['O'])
	selections.append(self.residues[rid]['C'])
	return selections

	def get_crds(self, selection):
	atoms = self.get_selection(selection)
	crds = np.zeros((len(atoms), 3), float)
	for i, a in enumerate(atoms):
	crds[i,0] = a.x
	crds[i,1] = a.y
	crds[i,2] = a.z
	return crds

	def transform(self, matrix):
	for atom in self.atoms:
	pos = (atom.x, atom.y, atom.z)
	atom.x = matrix[0,0] * pos[0] + matrix[0,1] * pos[1] + matrix[0,2] * pos[2]
	atom.y = matrix[1,0] * pos[0] + matrix[1,1] * pos[1] + matrix[1,2] * pos[2]
	atom.z = matrix[2,0] * pos[0] + matrix[2,1] * pos[1] + matrix[2,2] * pos[2]

	def write_pdb(self, filename):
	fp = open(filename, 'w')
	for atom in self.atoms:
	atomdata = (atom.id, atom.name, atom.resname, atom.resid, atom.x, atom.y, atom.z, 0.0, 0.0)
	if len(atom.name) == 4:
	fp.write("ATOM %6i %4s%5s%5i %8.3f%8.3f%8.3f%6.2f%6.2f\n" % atomdata)
	else:
	fp.write("ATOM %6i %3s%5s%5i %8.3f%8.3f%8.3f%6.2f%6.2f\n" % atomdata)
	fp.close()

	if __name__ == '__main__':
	import sys
	try:
	pdbfile1 = sys.argv[1]
	pdbfile2 = sys.argv[2]
	selection = map(int, sys.argv[3].split(':'))
	except:
	usage()
	sys.exit(0)

	pdb1 = PDB(pdbfile1)
	com1 = pdb1.get_com(selection)
	pdb1.translate((-com1[0], -com1[1], 0))
	crds1 = pdb1.get_crds(selection)

	pdb2 = PDB(pdbfile2)
	com2 = pdb2.get_com(selection)
	pdb2.translate((-com2[0], -com2[1], 0))
	crds2 = pdb2.get_crds(selection)

	# calculate rotation matrix
	correlation_matrix = np.dot(np.transpose(crds1), crds2)
	v, s, w = np.linalg.svd(correlation_matrix)
	is_reflection = (np.linalg.det(np.transpose(v)) * np.linalg.det(np.transpose(w))) < 0.0
	if is_reflection:
	v[-1,:] = -v[-1,:]
	u = np.dot(v, w)

	psi = -np.arctan2(u[0,2], u[1,2])
	z = np.zeros(9).reshape(3,3)
	z[2,2] = 1
	z[0,0] = z[1,1] = np.cos(psi)
	z[0,1] = -np.sin(psi)
	z[1,0] = np.sin(psi)

	if u[0,1] > 0 and z[0,1] < 0:
	z[0,1] = -z[0,1]
	z[1,0] = -z[1,0]

	pdb2.transform(u)
	pdb2.translate((com1[0], com1[1], 0))
	pdb2.write_pdb('xxx.pdb')