Created
May 7, 2018 10:12
-
-
Save bennof/83f935d51be3fa9c9b3ed8fdd4ba5ee4 to your computer and use it in GitHub Desktop.
pdbAlign
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/awk -f | |
#written by Benjamin Falkner (2012) | |
#RMSD Calculation of two PDB files | |
#usage pdbalign.awk [file1] [file2] | |
#uses TER to terminate input | |
#using Quaternion Rotations, Householder Transform, QL Decomposition | |
#Absolute Value | |
function abs(value){ | |
return (value<0.0?-value:value); | |
} | |
function Tridiagonal4(mat){ | |
# input: mat = 4x4 real, symmetric lower Matrix | |
# output: mat = diagonal [0-3] and subdiagonal [4-6] | |
# save matrix M | |
a = mat[0]; b = mat[1]; c = mat[2]; d = mat[3]; | |
e = mat[4]; f = mat[5]; g = mat[6]; | |
h = mat[7]; i = mat[8]; | |
j = mat[9]; | |
diag[0] = a; | |
subd[3] = 0; | |
if ( c != 0 || d != 0 ) { | |
# build column Q1 | |
len = sqrt(b*b+c*c+d*d); | |
q11 = b/len; | |
q21 = c/len; | |
q31 = d/len; | |
subd[0] = len; | |
# compute S*Q1 | |
v0 = e*q11+f*q21+g*q31; | |
v1 = f*q11+h*q21+i*q31; | |
v2 = g*q11+i*q21+j*q31; | |
diag[1] = q11*v0+q21*v1+q31*v2; | |
# build column Q3 = Q1x(S*Q1) | |
q13 = q21*v2-q31*v1; | |
q23 = q31*v0-q11*v2; | |
q33 = q11*v1-q21*v0; | |
len = sqrt(q13*q13+q23*q23+q33*q33); | |
if ( len > 0 ) { | |
q13 /= len; | |
q23 /= len; | |
q33 /= len; | |
# build column Q2 = Q3xQ1 | |
q12 = q23*q31-q33*q21; | |
q22 = q33*q11-q13*q31; | |
q32 = q13*q21-q23*q11; | |
v0 = q12*e+q22*f+q32*g; | |
v1 = q12*f+q22*h+q32*i; | |
v2 = q12*g+q22*i+q32*j; | |
subd[1] = q11*v0+q21*v1+q31*v2; | |
diag[2] = q12*v0+q22*v1+q32*v2; | |
subd[2] = q13*v0+q23*v1+q33*v2; | |
v0 = q13*e+q23*f+q33*g; | |
v1 = q13*f+q23*h+q33*i; | |
v2 = q13*g+q23*i+q33*j; | |
diag[3] = q13*v0+q23*v1+q33*v2; | |
} | |
else { # S*Q1 parallel to Q1, choose any valid Q2 and Q3 | |
subd[1] = 0; | |
len = q21*q21+q31*q31; | |
if ( len > 0 ) { | |
tmp = q11-1; | |
q12 = -q21; | |
q22 = 1+tmp*q21*q21/len; | |
q32 = tmp*q21*q31/len; | |
q13 = -q31; | |
q23 = q32; | |
q33 = 1+tmp*q31*q31/len; | |
v0 = q12*e+q22*f+q32*g; | |
v1 = q12*f+q22*h+q32*i; | |
v2 = q12*g+q22*i+q32*j; | |
diag[2] = q12*v0+q22*v1+q32*v2; | |
subd[2] = q13*v0+q23*v1+q33*v2; | |
v0 = q13*e+q23*f+q33*g; | |
v1 = q13*f+q23*h+q33*i; | |
v2 = q13*g+q23*i+q33*j; | |
diag[3] = q13*v0+q23*v1+q33*v2; | |
} | |
else { # Q1 = (+-1,0,0) | |
q12 = 0; q22 = 1; q32 = 0; | |
q13 = 0; q23 = 0; q33 = 1; | |
diag[2] = h; | |
diag[3] = j; | |
subd[2] = i; | |
} | |
} | |
} | |
else { | |
diag[1] = e; | |
subd[0] = b; | |
if ( g != 0 ) { | |
ell = sqrt(f*f+g*g); | |
f /= ell; | |
g /= ell; | |
Q = 2*f*i+g*(j-h); | |
diag[2] = h+g*Q; | |
diag[3] = j-g*Q; | |
subd[1] = ell; | |
subd[2] = i-f*Q; | |
} | |
else { | |
diag[2] = h; | |
diag[3] = j; | |
subd[1] = f; | |
subd[2] = i; | |
} | |
} | |
mat[0]=diag[0] | |
mat[1]=diag[1] | |
mat[2]=diag[2] | |
mat[3]=diag[3] | |
mat[4]=subd[0] | |
mat[5]=subd[1] | |
mat[6]=subd[2] | |
} | |
function getShift(d1,d2,dO){ | |
_getShift_h=(d1-d2)/(dO+dO) | |
_getShift_H =sqrt(_getShift_h*_getShift_h+1.0) | |
return (_getShift_h<0.0)? d2-_getShift_H/(_getShift_h-_getShift_H):d2-_getShift_H/(_getShift_h+_getShift_H) | |
} | |
function QLdecomposition(x){ | |
#Maximal iterations | |
_QLdecomposition_maxiter=10 | |
#Significance | |
_QLdecomposition_prec=0.000000001 | |
for(i=0;i<4;i++){ #iterate rows | |
for (iter=0; iter< _QLdecomposition_maxiter;iter++) { #iteration loop | |
for (k = i; k < 3; k++) { ## test if still significant | |
e = _QLdecomposition_prec*(abs(x[k])+abs(x[k+1])) | |
if ( abs(x[k+4]) <= e) break; | |
} | |
if ( k == i ) break #not significant | |
shift = getShift(x[i+1],x[i],x[i+4]) #calc shift | |
q = x[k] - shift; | |
shift=0.0 | |
c=1.0 | |
s=1.0 | |
for (j=k-1;j>=i;j--) { #apply shift | |
h = c * x[j+4]; | |
g = s * x[j+4]; | |
if (abs(g)>= abs(q)){ | |
c = q / g; | |
dum = sqrt( c * c + 1.0 ); | |
x[j+5] = g * dum; | |
s = 1.0 / dum; | |
c *= s; | |
} | |
else { | |
s = g / q; | |
dum = sqrt( s * s + 1.0 ); | |
x[j+5] = q * dum; | |
c = 1.0 / dum; | |
s *= c; | |
} | |
q = x[j + 1] - shift; | |
dum = s * (x[j] - q) + 2.0 * c * h; | |
shift = s * dum; | |
x[j+1] = q + shift; | |
q = c * dum - h; | |
#Transform_Matrix(U, s, c, j, n); | |
}# end apply shift | |
x[i] -= shift; | |
x[i+4] = q; | |
x[k+4] = 0.0; | |
} #end iteration loop | |
} | |
if (iter>=_QLdecomposition_maxiter) return -1; | |
return 0; | |
} | |
function rmsd(X,Y,from,till){ | |
#build quaternion covariance matrix A | |
n=till-from | |
for(i=from;i<till;i++){ | |
xx += X[i,0]*Y[i,0]; | |
xy += X[i,0]*Y[i,1]; | |
xz += X[i,0]*Y[i,2]; | |
yx += X[i,1]*Y[i,0]; | |
yy += X[i,1]*Y[i,1]; | |
yz += X[i,1]*Y[i,2]; | |
zx += X[i,2]*Y[i,0]; | |
zy += X[i,2]*Y[i,1]; | |
zz += X[i,2]*Y[i,2]; | |
GA += X[i,0]*X[i,0]+X[i,1]*X[i,1]+X[i,2]*X[i,2]; | |
GB += Y[i,0]*Y[i,0]+Y[i,1]*Y[i,1]+Y[i,2]*Y[i,2]; | |
} | |
A[0] = xx+yy+zz; | |
A[4] = xx-yy-zz; | |
A[7] = -xx+yy-zz; | |
A[9] = -xx-yy+zz; | |
A[1] = yz-zy; | |
A[2] = zx-xz; | |
A[3] = xy-yx; | |
A[5] = xy+yx; | |
A[6] = xz+zx; | |
A[8] = yz+zy; | |
# eigenvalues | |
Tridiagonal4(A) | |
if(0!=QLdecomposition(A)) print "QL Decomposition failed" | |
#finding max | |
A[0]=(A[0]>A[1])?A[0]:A[1] | |
A[2]=(A[2]>A[3])?A[2]:A[3] | |
A[0]=(A[0]>A[2])?A[0]:A[2] | |
A[0]=((GA+GB-2.0*A[0])/n) | |
return (A[0]<0.0)?"ERROR negativ sqrt":sqrt(A[0]); | |
} | |
function readPDB_CA(filename,mat){ | |
_readPDB_CA_n=0; | |
while(getline < filename){ | |
if($1=="ATOM" && $3=="CA" && substr($0,22,1)!=" "){ | |
mat[_readPDB_CA_n,0]=$7;mat[_readPDB_CA_n,1]=$8;mat[_readPDB_CA_n,2]=$9;_readPDB_CA_n++ | |
} | |
if($1=="ATOM" && $3=="CA" && substr($0,22,1)==" "){ | |
mat[_readPDB_CA_n,0]=$6;mat[_readPDB_CA_n,1]=$7;mat[_readPDB_CA_n,2]=$8;_readPDB_CA_n++ | |
} | |
if($1=="TER" || $1=="END")break | |
} | |
close(filename) | |
_readPDB_CA_n-- | |
return _readPDB_CA_n | |
} | |
function center(mat,from,till){ | |
_center_n=till-from | |
_center_x=_center_y=_center_z=0.0 | |
for(_center_i=from;_center_i<till;_center_i++){ | |
_center_x+=mat[_center_i,0] | |
_center_y+=mat[_center_i,1] | |
_center_z+=mat[_center_i,2] | |
} | |
_center_x/=_center_n | |
_center_y/=_center_n | |
_center_z/=_center_n | |
for(_center_i=from;_center_i<till;_center_i++){ | |
mat[_center_i,0]-=_center_x | |
mat[_center_i,1]-=_center_y | |
mat[_center_i,2]-=_center_z | |
} | |
} | |
BEGIN{ | |
if(ARGV[1]==NULL || ARGV[2]==NULL ) {print "Missing Files: pdbalign.awk [PDB File 1] [PDB File 2]";exit(1) } | |
#init Listes | |
seqA[0,0]=0.0 | |
seqB[0,0]=0.0 | |
#read PDB | |
nA = readPDB_CA(ARGV[1],seqA) | |
nB = readPDB_CA(ARGV[2],seqB) | |
#center | |
center(seqA,0,nA) | |
center(seqB,0,nB) | |
#RMSD | |
print "RMSD:",rmsd(seqA,seqB,0,(nA<nB)?nA:nB) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment