Skip to content

Instantly share code, notes, and snippets.

@3ki5tj
Created December 5, 2014 19:50
Show Gist options
  • Save 3ki5tj/02b4916ee1cd8410c91c to your computer and use it in GitHub Desktop.
Save 3ki5tj/02b4916ee1cd8410c91c to your computer and use it in GitHub Desktop.
Wrap coordinates to make atoms in a cluster stay together
#!/usr/bin/env python
''' wrapping coordinates '''
import os, sys
from math import *
fnin = "test.pdb" # input file
fnout = None
box = [0,0,0]
cutoff = 12.0 # angstroms
verbose = 0
allatom = False
quicknb = 0
fnclus = None
def usage():
''' print usage and die '''
print '''pdbwrap.py [OPTIONS] input.pdb [output.pdb]
Smartly wrap coordinates of a PDB file
Options:
-s, --size=: followed by the box size in angstroms (for a cubic box)
-X, --bx=: followed by the X dimension in angstroms
-Y, --by=: followed by the Y dimension in angstroms
-Z, --bz=: followed by the Z dimension in angstroms
-c: followed by the cutoff length for neighbors
-a, --all: include all atoms, not just alpha-carbon (CA)
-q: quick search (for all atoms): search only local neighbors for non-CA atoms
-Q: quick search (for heavy atoms): search only local neighbors for heavy atoms
--fclus=: file name for cluster information
-v: be verbose
-vv: be more verbose
-h, --help: print this message
Examples:
pdbwrap.py input.pdb
pdbwrap.py -X143.748 -Y143.758 -Z143.759 -c10 -q 2000.pdb
'''
exit(1)
def doargs():
''' handle command line options '''
import getopt, glob
try:
opts, args = getopt.gnu_getopt(sys.argv[1:], "s:X:Y:Z:c:aqQv:",
["bx=", "by=", "bz=", "size=", "all", "fclus=",
"verbose=", "help"])
except getopt.GetoptError, err:
print str(err)
usage()
global fnin, fnout, box, cutoff, allatom, quicknb, fnclus
for o, a in opts:
if o in ("-s", "--size"):
box = [float(a),]*3
elif o in ("-X", "--bx",):
box[0] = float(a)
elif o in ("-Y", "--by",):
box[1] = float(a)
elif o in ("-Z", "--bz",):
box[2] = float(a)
elif o in ("-c",):
cutoff = float(a)
elif o in ("-a", "--all"):
allatom = True
elif o in ("-q",):
quicknb = "CA"
allatom = True
elif o in ("-Q",):
quicknb = "HEAVY"
allatom = True
elif o in ("--fclus",):
fnclus = a
elif o in ("-v",):
verbose += 1
elif o in ("-h", "--help",):
usage()
ls = args
if len(ls) > 0:
fnin = ls[0]
if len(ls) > 1:
fnout = ls[1]
else:
ls = glob.glob("*.pdb")
def add(a, b):
return [a[0] + b[0], a[1] + b[1], a[2] + b[2]]
def diff(a, b):
return [a[0] - b[0], a[1] - b[1], a[2] - b[2]]
def norm(a):
return sqrt(a[0]*a[0] + a[1]*a[1] + a[2]*a[2])
def dist(a, b):
return norm(diff(a, b))
def pbc(a, b, box):
dx = a - b
while dx > box/2: dx -= box
while dx < -box/2: dx += box
return dx
def pbcdist(a, b, dr, box):
for i in range(3):
dr[i] = pbc(a[i], b[i], box[i])
return norm(dr)
class PDBAtom:
''' a record for an ATOM line in a PDB file '''
def __init__(self, s):
self.raw = s
x = float( s[30:38] )
y = float( s[38:46] )
z = float( s[46:54] )
self.r = [x, y, z]
self.nb = [] # neighbor list
self.atom = s[12:17].strip()
self.res = s[17:20].strip()
self.elem = s[77]
self.chain = int( s[72:77].strip()[:-1] )
def write(self):
s = self.raw
return s[:30] + "%8.3f%8.3f%8.3f" % (self.r[0], self.r[1], self.r[2]) + s[54:]
def findbox(atomls, iscubic = False):
''' guess the box dimension '''
rmin, rmax, box = [0,0,0], [0,0,0], [0,0,0]
for d in range(3):
rmin[d] = min(a.r[d] for a in atomls)
rmax[d] = max(a.r[d] for a in atomls)
box[d] = rmax[d] - rmin[d]
# if the box is cubic
# make sure the three sides are the same
if iscubic:
span = max(box)
box = [span,]*3
rmax = [rmin[0] + span, rmin[1] + span, rmin[2] + span]
return rmin, rmax, box
def makenblist(atomls, box, cutoff):
''' build a neighbor list '''
n = len(atomls)
print "building the neigbhor list of %d atoms..." % n
dr = [0]*3
for i in range(n):
j0 = 0
j1 = i
quit = False
# use local search for nonessential atoms
if ( ( quicknb == "CA" and atomls[i].atom != "CA" )
or ( quicknb == "HEAVY" and atomls[i].elem == "H" ) ):
j0 = max(i - 50, 0)
j1 = min(i + 50, n)
quit = True
ai = atomls[i]
for j in range(j0, j1):
aj = atomls[j]
if ( (quicknb == "CA" and aj.atom != "CA")
or (quicknb == "HEAVY" and aj.elem == "H") ): continue
dis = pbcdist(ai.r, aj.r, dr, box)
if dis < cutoff:
ai.nb += [j,]
aj.nb += [i,]
if quit: break
print "building nblist %5d/%5d quick: %s \r" % (i+1, n, quicknb),
print "the neigbhor list is completed. "
def dock(atomls, j, i, box):
''' dock j on to i '''
a = atomls[j].r
b = atomls[i].r
for d in range(3):
dr = pbc(a[d], b[d], box[d])
a[d] = b[d] + dr
def wrap(atomls):
''' find clusters and wrap their coordinates '''
global box
# 1. compute the box
rmin, rmax, mybox = findbox(atomls)
if box[0] == 0 or box[1] == 0 or box[2] == 0:
box = mybox # override the input box
print "box: %s, rmin %s, rmax %s" % (box, rmin, rmax)
# 2. build a neighbor list of the graph
makenblist(atomls, box, cutoff)
# 3. find connected components
n = len(atomls)
visited = [False]*n
que = [0]
visited[0] = True
ic = 0
clusls = [] # list of clusters
while 1:
ls = [] # atom list of this cluster
while len(que):
i = que.pop(0)
ls += [i]
# add neighbors of i in the queue
for j in atomls[i].nb:
if visited[j]: continue
que += [j,]
visited[j] = True
rj0 = atomls[j].r[:]
dock(atomls, j, i, box) # dock j on to i
if verbose >= 2:
print "dock %d on to %d, %s --> %s" % (j, i, rj0, atomls[j].r)
ic += 1
clusls += [ls,]
if verbose:
print "Cluter %d has %d atoms" % (ic, len(ls))
# add the first unique atom into the queue
for i in range(n):
if not visited[i]:
que = [i,]
visited[i] = True
break
else:
break
# dump cluster information
# and shift the clusters
clusls = sorted(clusls, key=lambda x: -len(x)) # sort clusters by size
nclus = len(clusls)
print "%d clusters" % (nclus)
strclus = "# %d\n" % nclus
for i in range(nclus):
ls = clusls[i]
chains = set(atomls[j].chain for j in ls)
arr2str = lambda arr: " ".join([str(x) for x in arr])
strclus += "%3d %4d %6d | %s\n" % (
i+1, len(chains), len(ls), arr2str(sorted(list(chains))))
# uncomment this line (and comment the above line)
# if you want atom information as well
#strclus += "%3d %4d %6d | %s | %s\n" % (
# i+1, len(chains), len(ls), arr2str(sorted(list(chains))), arr2str(sorted(ls)))
# compute the center of mass
# and shift the cluster as a whole
# such that the center of mass lies in the box
com = [0,0,0]
shift = [0,0,0]
for d in range(3):
com[d] = sum(atomls[j].r[d] for j in ls)/len(ls)
# compute the shift
shift[d] = 0
while com[d] + shift[d] > box[d]: shift[d] -= box[d]
while com[d] + shift[d] < 0: shift[d] += box[d]
# apply the shift to all atoms in this cluster
for j in ls: atomls[j].r[d] += shift[d]
v2s = lambda v: "(" + ", ".join("%8.3f" % x for x in v) + ")" # print a vector nicely
print " cluster %3d/%3d, %6d atoms, %4d chains, com %s -> %s" % (
i+1, nclus, len(ls), len(chains), v2s(com), v2s(add(com, shift)) )
if fnclus:
open(fnclus, "w").write(strclus)
def pdbwrap(fnin, fnout = None):
''' wrap the coordinates in fnin '''
# 1. read in coordinates
atomls = []
for s in open(fnin).readlines():
if s.strip() == "": continue # skip empty lines
a = PDBAtom(s)
if allatom or a.atom == "CA":
atomls += [ a ]
# 2. wrap coordinates
wrap(atomls)
# 3. output
out = [a.write() for a in atomls]
if not fnout: fnout = "out." + fnin
open(fnout, "w").writelines(out)
print "%s --> %s" % (fnin, fnout)
if __name__ == "__main__":
doargs()
pdbwrap(fnin, fnout)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment