Skip to content

Instantly share code, notes, and snippets.

@diffracteD
Last active November 19, 2019 00:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save diffracteD/38f766c6b06fe919f2ab1aa212221678 to your computer and use it in GitHub Desktop.
Save diffracteD/38f766c6b06fe919f2ab1aa212221678 to your computer and use it in GitHub Desktop.
Hydrogen bond extraction from .pdb files after H addition from -reduce program. Takes .atom file extension. Performs quantum mechanical analysis of each bond. Please contact in case of complicated error messages.
#! /usr/bin/env python
'''
################################################################################################################
PHASE 3: Generating all H-bond info.
PROGRAM NAME: HB_radar_walk_m062x.v.3.1 (27.05.2015)
_______________________________________________________________________________________________________________
1. calculate the DHA angle...
2. calculate HAAA angle...
3. calculate DAAj angle...
4. calculate DA and HA distances. . .
5. all distance & angle outfile generated as .hbinfo
6. dft energy + .hbinfo is outfiled as .HBradar
##########################################################################
New Func v.3.1: Multiple Exception Handler...
'''
from __future__ import with_statement #in some python version "with" statement does not work, so this line helps to overcome,
# but must be written at VERY FIRST of CODE
def show_me_hbs(path):
from time import sleep
import sys
import time
import timeit
start = timeit.default_timer()
import math
import itertools
import os
from os.path import basename
import glob
#EMPLOYING THE WALKING MODULE...
for root, dirs, files in os.walk(path):
list_of_file = glob.iglob(os.path.join(root,'*.atom'))
print " ENTERING DIRECTORY: " + root
for atom_file_name in list_of_file:
#count += 1
if os.stat(atom_file_name)[6] != 0:
filename = basename(atom_file_name)
#Now opening the .atom file as input...
inp = open(atom_file_name,'r').read().strip().split('\n')
print "> PROCESSING FILE: " + filename #shows current file under attack...
loop_clock_start = timeit.default_timer() #Strarting loop timer $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
#-----------------------------------------------------------------------------------
#Opening oufiles to be written in later as data comes...[special walking_module format]
#Opening DA_cal file while on walk...
da_cal = filename.replace('atom','DA_cal')
da_cal_out = open(os.path.join(root,da_cal),'w')
#Opening DA file while on walk...
da = filename.replace('atom','DA')
da_out = open(os.path.join(root, da),'w')
#--------------------------------------------------------------------------
#inp = open("/home/saumen/abhisek/dft_cal/xpt_zone_3_fine-opt/4g78FH.atom",'r').read().strip().split('\n')
#out = open("/home/saumen/abhisek/dft_cal/xpt_zone_3_fine-opt/4g78FH.DA_cal",'w')
#out2 = open("/home/saumen/abhisek/dft_cal/xpt_zone_3_fine-opt/4g78FH.DA",'w')
#hb_out = open("/home/saumen/abhisek/dft_cal/xpt_zone_3_fine-opt/4g78FH.hbinfo",'w') #for writing all finally calculated distance & angle values...
donor = []
H = []
acceptor = []
adjacent = []
donor_list = []
H_atom_list = []
for line in map(str.split,inp):
atomName = line[2]
residueName = line[3]
chainName = line[4]
residueNumber = line[5]
xAxis = line[6]
yAxis = line[7]
zAxis = line[8]
#making-out donor n hydrogen atoms...
if atomName =='N' and residueName[-3] != 'PRO':
a = residueNumber
x = line
elif atomName == 'H':
b = residueNumber
y = line
if a == b: #making sure we are taking pair of same residue i.e. N3-H3 not N3-H4
donor.append(x)
H.append(y)
#making-out acceptor n adjacent atoms...
#if atomName == 'O':
# acceptor.append(line)
#elif atomName == 'C':
# adjacent.append(line)
#appending donor-adjacent atoms...
for line in map(str.split,inp):
if line[2]=='O':
acceptor_search_str_a = 'C'+line[3]+line[4]+line[5]
for item in map(str.split,inp):
if item[2]=='C':
adjacent_search_str_b = item[2]+item[3]+item[4]+item[5]
if acceptor_search_str_a == adjacent_search_str_b:
#print line, item
acceptor.append(line)
adjacent.append(item)
'''
==================================================================================================================================================
MODULE NAME: RAT_h[rESONATING aTOM tRACKER for HISTIDINE] v.15.01.2014....
.........completed on 15th Jan 2014.................
PROGRAM TO CATEGORIZE DONOR AND ACCEPTOR ATOM INFORMATION FROM RESONATING IMMIDAZOLE RING OF HISTIDINE.
NB: Histidine is the only amino acid which can act as both donor and acceptor depending upon the resonating double bond character.
i.e. ND1 & NE2 nitrogens while have correspondong H-atom then they act as donor, but if no H-atom is attached then they act as acceptor atom.
==================================================================================================================================================
'''
#initiating array for accumulation of GRAND DATA cominf after each module. . .
side_donor = []
side_H = []
side_acceptor = []
side_adjacent = []
result = {"N" : [], "H" : [], "ace" : []} #initiating listed dictionary.
h_his = []
d_h_his = [] #d_ lists contains duplicate list items, created to avoid calculation easier.
donor_his = []
d_donor_his = []
acceptor_his = []
d_acceptor_his = []
his_inp_file = []
adjacent_his = [] #adjacent atoms of acceptor for histidine.
with open(atom_file_name,'r') as f:
categories = {} #initiating a dictionary.
for line in f:
splitline = line.strip().split()
#considering only HIS residue and some specified atom of interest.
if splitline[3] == 'HIS' or splitline[3] == 'AHIS':
if splitline[2] == "ND1" or splitline[2] == "NE2" or splitline[2] == "HE2" or splitline[2] == "HD1":
new_line = splitline[2]+splitline[5] #joining selected columns for future use as a key
identifier = "".join(new_line)[1:] #making identifier...
if identifier not in categories:
categories[identifier] = 1 #starting the counter as 1...
else:
categories[identifier] = 2 #if the identifier is already there, the value becomes 2. i.e. if pair is present it becomes 2.
#to go for second passs to create the lists.
f.seek(0)
for line in f:
splitline = line.strip().split()
if splitline[3] == 'HIS' or splitline[3] == 'AHIS':
if splitline[2] == "ND1" or splitline[2] == "NE2" or splitline[2] == "HE2" or splitline[2] == "HD1" :
#print splitline[-1]
new_line = splitline[2]+splitline[5]
identifier = "".join(new_line)[1:]
if categories[identifier] == 1: #means the stem just occured once.
if splitline[-1] == 'N':
result["ace"].append(line.strip())
else:
#splitline[-1] indicates H or N and add it to the lists as result[H] or result[N].
result[splitline[-1]].append(line.strip())
#for type in result:
# print (type + ' list:')+'\n'+( '\n'.join(result[type]))+'\n'
#-----------------------------------------------------------------------------------------
#actual RAT_h program ends here.. what follows represent the procedings
#for H-bond calculations while "HIS" as side chain molecule......
#*******************************************************************<<<<<<<<
#-----------------------------------------------------------------------------------------
for type in result:
if type == 'H':
for line in (result[type]):
h_his.append(line.split())
elif type == 'N':
for line in (result[type]):
donor_his.append(line.split())
elif type == 'ace':
for line in (result[type]):
acceptor_his.append(line.split())
from itertools import repeat
#doubling acceptor atom list entry now...
#acceptor_new = [i for item in acceptor_his for i in repeat(item,2)]
#Gathering adjacent atoms now... !!
inp = open(atom_file_name,'r').read().strip().split('\n')
for line in map(str.split,inp):
residueName = line[3][-3:] #just taking last 3 alphabets so to avoid 'AHIS' instead of 'HIS'...
if residueName == 'HIS':
his_inp_file.append(line)
#---------------------------------------------------------------------
#creating a search string...
#---------------------------------------------------------------------
for item in his_inp_file:
if item[2] == 'ND1':
search_string_a = 'CG'+item[3]+item[4]+item[5] #combining all columns to make searching easier in the in_file.
search_string_b = 'CE1'+item[3]+item[4]+item[5]
#print search_string_a
for case in his_inp_file:
target_string_x = case[2]+case[3]+case[4]+case[5]
#print target_string
if search_string_a == target_string_x:
adjacent_his.append(case)
d_acceptor_his.append(item)
elif search_string_b == target_string_x:
adjacent_his.append(case)
d_acceptor_his.append(item)
elif item[2] == 'NE2':
search_string_c = 'CD2'+item[3]+item[4]+item[5]
search_string_d = 'CE1'+item[3]+item[4]+item[5]
for case in his_inp_file:
target_string_y = case[2]+case[3]+case[4]+case[5]
#print target_string
if search_string_c == target_string_y:
adjacent_his.append(case)
d_acceptor_his.append(item)
elif search_string_d == target_string_y:
adjacent_his.append(case)
d_acceptor_his.append(item)
#for don, hy, ace, adj in zip(d_donor_his, d_h_his, d_acceptor_his, adjacent_his):
# print don, hy, ace, adj
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_donor.extend(donor_his)
side_H.extend(h_his)
side_acceptor.extend(d_acceptor_his)
side_adjacent.extend(adjacent_his)
#------------------------------------------------------------------------------------
#including LYSINE residue...[DONOR]
# it has 1 NZ and 3 HZ atoms. So gonna triple NZ to get zippable array structure...
#------------------------------------------------------------------------------------
donor_lysine = []
donor_lysine_tripled = []
H_lysine = []
lysine_inp_file = []
inp = open(atom_file_name,'r').read().strip().split('\n')
for line in map(str.split,inp):
residueName = line[3][-3:] #just taking last 3 alphabets so to avoid 'AHIS' instead of 'HIS'...
if residueName == 'LYS':
lysine_inp_file.append(line)
for line in lysine_inp_file:
if line[2] == 'NZ' :
donor_lysine.append(line)
elif line [2] == 'HZ1' or line[2] == 'HZ2' or line[2] == 'HZ3':
H_lysine.append(line)
#tripling the donor_lysine entry to get hold via *zip...
donor_lysine_new = [i for item in donor_lysine for i in repeat(item,3)]
for items in donor_lysine_new:
donor_lysine_tripled.append(items)
#for don, h in zip(donor_lysine_tripled, H_lysine):
# print don,h
#categorization for LYSINE is completed. . .>>>>>put it in a list for calculations. . .
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_donor.extend(donor_lysine_tripled)
side_H.extend(H_lysine)
#---------------------------------------------------------------------------------------
#including ARGININE residue . . .[DONOR]
#it has 1 NE & 1 HE atom and NH1 with HH11 & HH12 and NH2 with HH21 & HH22. . .
#So, lets put NE & HE in a list, and double NH1,NH2 to match with H-atom counterparts...
#---------------------------------------------------------------------------------------
donor_arginine = []
donor_arginine_doubled = []
H_arginine = []
arginine_inp_file = []
for line in map(str.split,inp):
residueName = line[3][-3:] #just taking last 3 alphabets so to avoid 'AARG' instead of 'ARG'...
if residueName == 'ARG':
arginine_inp_file.append(line)
for line in arginine_inp_file:
residueNumber = line[5] #has to redefine the residue number as reopening...
if line[2] == 'NH1' or line[2] == 'NH2':
p = residueNumber
#print line, p
arg_x = line
donor_arginine.append(arg_x)
elif line[2] == 'HH11' or line[2] == 'HH12' or line[2] == 'HH21' or line[2] == 'HH22' or line[2] == 'HH1' or line[2] =='HH2':
q = residueNumber
arg_y = line
H_arginine.append(arg_y)
#doubling donor_arginine so far added...
donor_arginine_new = [i for item in donor_arginine for i in repeat(item,2)]
for items in donor_arginine_new:
donor_arginine_doubled.append(items)
#now including NE and HE atoms to corresponding lists. . .
for line in arginine_inp_file:
if line[2] == 'NE':
donor_arginine_doubled.append(line)
elif line[2] == 'HE':
H_arginine.append(line)
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_donor.extend(donor_arginine_doubled)
side_H.extend(H_arginine)
#for don, h in zip(side_donor, side_H): print don, h
#---------------------------------------------------------------------------------------------
#including ASPARTATE residue . . .[ACCEPTOR]
#it has 1OD1, 1OD2 and 1CG . . .
#So, putting OD1 & OD2 in a acceptor and CG in adjacent follwed by doubling adjacent list. . .
#----------------------------------------------------------------------------------------------
acceptor_asp = []
adjacent_asp = []
adjacent_asp_doubled = []
asp_inp_file = []
for line in map(str.split, inp):
residueName = line[3][-3:] #just taking last 3 alphabets so to avoid 'AASP' instead of 'ASP'...
if residueName == 'ASP':
asp_inp_file.append(line)
for line in asp_inp_file:
if line[2] == 'OD1':
asp_search_string_a = 'CG'+line[3]+line[4]+line[5]
for item in asp_inp_file:
if item[2] == 'CG':
asp_target_string = item[2]+item[3]+item[4]+item[5]
if asp_search_string_a == asp_target_string:
adjacent_asp.append(item)
acceptor_asp.append(line)
elif line[2] == 'OD2':
asp_search_string_b = 'CG'+line[3]+line[4]+line[5]
for item in asp_inp_file:
if item[2] == 'CG':
asp_target_string = item[2]+item[3]+item[4]+item[5]
if asp_search_string_b == asp_target_string:
adjacent_asp.append(item)
acceptor_asp.append(line)
#resNum = line[5]
#if line[2] == 'OD1' or line[2] == 'OD2':
# asp_x = line[5] #residue number
# asp_ace_line = line
# acceptor_asp.append(asp_ace_line)
#elif line[2] == 'CG':
# asp_y = line[5] #residue number
# asp_adj_line = line
# adjacent_asp.append(asp_adj_line)
#doubling adjacent CG atoms from adjacent_asp list. . .
#adjacent_asp_new = [i for item in adjacent_asp for i in repeat(item,2)]
#for items in adjacent_asp_new:
#adjacent_asp_doubled.append(items)
#for ace, adj in zip(acceptor_asp, adjacent_asp_doubled):
# print ace, adj
#categorization of ASPARTATE completed . . . >>[NOTE]>>>put it in a list for calculations. . .
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_acceptor.extend(acceptor_asp)
side_adjacent.extend(adjacent_asp)
#------------------------------------------------------------------------------------------------
#including GLUTAMATE residue. . .[ACCEPTOR]
#it has 1OE1, 1OE2 and 1 CD. . .
#So, putting OE1 & OE2 in a acceptor and CD in adjacent followed by doubling adjacent list. . .
#-------------------------------------------------------------------------------------------------
acceptor_glu = []
adjacent_glu= []
adjacent_glu_doubled = []
glu_inp_file = []
for line in map(str.split, inp):
residueName = line[3][-3:] #just taking last 3 alphabets so to avoid 'AGLU' instead of 'GLU...
if residueName == 'GLU':
glu_inp_file.append(line)
for line in glu_inp_file:
#print line
if line[2] == 'OE1':
glu_search_string_a = 'CD'+line[3]+line[4]+line[5]
for item in glu_inp_file:
if item[2] == 'CD':
glu_target_string_b = item[2]+item[3]+item[4]+item[5]
if glu_search_string_a == glu_target_string_b:
adjacent_glu.append(item)
acceptor_glu.append(line)
elif line[2] == 'OE2':
glu_search_string_c = 'CD'+line[3]+line[4]+line[5]
for item in glu_inp_file:
if item[2] == 'CD':
glu_target_string_d = item[2]+item[3]+item[4]+item[5]
if glu_search_string_c == glu_target_string_d:
adjacent_glu.append(item)
acceptor_glu.append(line)
#for ace, adj in zip(acceptor_glu, adjacent_glu_doubled):
# print ace, adj
#categorization of GLUTAMATE completed. . . >>[NOTE]>>>put it in a list for calculations
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_acceptor.extend(acceptor_glu)
side_adjacent.extend(adjacent_glu)
#print acceptor_glu,adjacent_glu
#--------------------------------------------------------------------------------------------------------
#including METHIONINE residue. . .[ACCEPTOR]
#it has 1SD(as an acceptor) and adjacent CG & CE. . .
#So, putting CE & CG in adjacent list and SD in acceptor followed by doubling the acceptor list. . .
#---------------------------------------------------------------------------------------------------------
acceptor_met = []
#acceptor_met_doubled = []
adjacent_met = []
met_inp_file = []
for line in map(str.split, inp):
residueName = line[3][-3:] #just taking last 3 alphabets so to avoid 'AMET instead of 'MET'...
if residueName == 'MET':
met_inp_file.append(line)
for line in met_inp_file:
#a new type of loop is used to efficiently treat 2 ADJACENT atoms per 1 ACCEPTOR atom...
if line[2] == 'SD':
search_string_a = 'CG'+line[3]+line[4]+line[5]
search_string_b = 'CE'+line[3]+line[4]+line[5]
for case in met_inp_file:
target_string_x = case[2]+case[3]+case[4]+case[5]
if search_string_a == target_string_x:
acceptor_met.append(line)
adjacent_met.append(case)
for case in met_inp_file:
target_string_y == case[2]+case[3]+case[4]+case[5]
if search_string_b == target_string_y:
acceptor_met.append(line)
adjacent_met.append(case)
#for ace, adj in zip(acceptor_met_doubled, adjacent_met):
# print ace, adj
#categorization of METHIIONINE completed. . .>>[NOTE]>>>put it in a list for calculations
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_acceptor.extend(acceptor_met)
side_adjacent.extend(adjacent_met)
#for ace,adj in zip(side_acceptor, side_adjacent): print ace, adj
#------------------------------------------------------------------------------------------------------------
#including ASPARAGINE(ASN) residue. . .[ACCEPTOR & DONOR]
#it's OD1 act as an Acceptor with adjacent CG atom.. and it's ND2 act as donor with HD21 & HD22 H-atoms. . .
#So, storing OD1 & CG in required lists and doubling ND2 to get zipping enabled with HD21 & HD22. . .
#-------------------------------------------------------------------------------------------------------------
acceptor_asn = []
adjacent_asn = []
donor_asn = []
donor_asn_doubled = []
H_asn = []
asn_inp_file = []
for line in map(str.split, inp):
residueName = line[3][-3:] #just taking last 3 alphabets so to avoid 'AASN' instead of 'ASN'...
if residueName == 'ASN':
asn_inp_file.append(line)
for line in asn_inp_file:
if line[2] == 'OD1':
#print line
asn_search_string_a = 'CG'+line[3]+line[4]+line[5]
for item in asn_inp_file:
if item[2] == 'CG':
#print item
asn_target_string = item[2]+item[3]+item[4]+item[5]
if asn_search_string_a == asn_target_string:
adjacent_asn.append(item)
acceptor_asn.append(line)
#print asn_ace, item
#if line[2] == 'OD1':
# acceptor_asn.append(line)
#elif line[2] == 'CG':
# adjacent_asn.append(line)
elif line[2] == 'ND2':
donor_asn.append(line)
elif line[2] == 'HD21' or line[2] == 'HD22':
H_asn.append(line)
#doubling donor 'ND2' atoms from donor_asn list. . .
donor_asn_new = [i for item in donor_asn for i in repeat(item, 2)]
for items in donor_asn_new:
donor_asn_doubled.append(items)
#for ace, adj in zip(acceptor_asn, adjacent_asn):
# print ace, adj
#categorization of ASPARAGINE as Acceptor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#for don,h in zip(donor_asn_doubled, H_asn):
# print don,h
#categorization of ASPARAGINE as Donor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_donor.extend(donor_asn_doubled)
side_H.extend(H_asn)
side_acceptor.extend(acceptor_asn)
side_adjacent.extend(adjacent_asn)
#for don, h in zip(side_donor, side_H): print don, h
#for ace, adj in zip(side_acceptor, side_adjacent): print ace, adj
#------------------------------------------------------------------------------------------------------------
#including GLUTAMINE(GLN) residue . . .[DONOR & ACCEPTOR]
#it's OD1 act as an Acceptor with adjacent CG atom.. and it's ND2 act as donor with HD21 & HD22 H-atoms. . .
#So, storing OD1 & CG in required lists and doubling ND2 to get zipping enabled with HD21 & HD22. . .
#------------------------------------------------------------------------------------------------------------
acceptor_gln = []
adjacent_gln = []
donor_gln = []
donor_gln_doubled = []
H_gln = []
gln_inp_file = []
for line in map(str.split, inp):
residueName = line[3][-3:] #just taking last 3 alphabets so to avoid 'AASN' instead of 'ASN'...
if residueName == 'GLN':
gln_inp_file.append(line)
for line in gln_inp_file:
if line[2] == 'OE1':
#gln_ace = line
gln_search_string_a = 'CD'+line[3]+line[4]+line[5]
for item in gln_inp_file:
if item[2] == 'CD':
gln_target_string = item[2]+item[3]+item[4]+item[5]
if gln_search_string_a == gln_target_string:
adjacent_gln.append(item)
acceptor_gln.append(line)
#***********
#if line[2] == 'OE1':
#acceptor_gln.append(line)
#elif line[2] == 'CD':
#adjacent_gln.append(line)
#**********
elif line[2] == 'NE2':
donor_gln.append(line)
elif line[2] == 'HE21' or line[2] == 'HE22' or line[2] == 'HE2':
H_gln.append(line)
#doubling donor 'NE2' atoms from donor_gln list. . .
donor_gln_new = [i for item in donor_gln for i in repeat(item, 2)]
for items in donor_gln_new:
donor_gln_doubled.append(items)
#for ace, adj in zip(acceptor_gln, adjacent_gln):
# print ace, adj
#categorization of ASPARAGINE as Acceptor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#for don,h in zip(donor_gln_doubled, H_gln):
# print don,h
#categorization of ASPARAGINE as Donor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_donor.extend(donor_gln_doubled)
side_H.extend(H_gln)
side_acceptor.extend(acceptor_gln)
side_adjacent.extend(adjacent_gln)
#for don, h in zip(side_donor, side_H): print don, h
#for ace, adj in zip(side_acceptor, side_adjacent): print ace, adj
#----------------------------------------------------------------------------------------------
#including SERINE(SER) residue . . . [DONOR & ACCEPTOR]
#it's OG acts as both acceptor and donor atom. . .
#So, alloting OG as donor & HG as H-atom and OG as acceptor & CB as adjacent atom. . .
#-----------------------------------------------------------------------------------------------
acceptor_ser = []
adjacent_ser = []
donor_ser = []
H_ser = []
ser_inp_file = []
for line in map(str.split, inp):
residueName = line[3][-3:] ##just taking last 3 alphabets so to avoid 'ASER' instead of 'SER'...
if residueName == 'SER':
ser_inp_file.append(line)
for line in ser_inp_file:
if line[2] == 'OG':
donor_ser.append(line)
elif line[2] == 'HG':
H_ser.append(line)
#Sometimes adjacent atom goes missing, so creating such loops helps maintain the whole data string hole-free.
for line in ser_inp_file:
if line[2] == 'OG':
ser_ace = line
ser_search_str_a = 'CB'+line[3]+line[4]+line[5]
for item in ser_inp_file:
if item[2] == 'CB':
ser_search_str_b = item[2]+item[3]+item[4]+item[5]
if ser_search_str_a == ser_search_str_b:
adjacent_ser.append(item)
acceptor_ser.append(ser_ace)
#for ace, adj in zip(acceptor_ser, adjacent_ser):
# print ace, adj
#categorization of SERINE as Acceptor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#for don,h in zip(donor_ser, H_ser):
# print don,h
#categorization of SERINE as Donor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_donor.extend(donor_ser)
side_H.extend(H_ser)
side_acceptor.extend(acceptor_ser)
side_adjacent.extend(adjacent_ser)
#for don, h in zip(side_donor, side_H): print don, h
#for ace, adj in zip(side_acceptor, side_adjacent): print ace, adj
#---------------------------------------------------------------------------------------------------
#including THREONINE(THR) residue. . . [DONOR & ACCEPTOR]
#it's OG1 atom acts as both donor and acceptor. . .
#So, alocating OG1 in donor & HG1 in H-atom list and OG1 in acceptor & CB in adjacent list. . .
#----------------------------------------------------------------------------------------------------
acceptor_thr = []
adjacent_thr = []
donor_thr = []
H_thr = []
thr_inp_file = []
for line in map(str.split, inp):
residueName = line[3][-3:] ##just taking last 3 alphabets so to avoid 'ATHR' instead of 'THR'...
if residueName == 'THR':
thr_inp_file.append(line)
for line in thr_inp_file:
if line[2] == 'OG1':
donor_thr.append(line)
elif line[2] == 'HG1':
H_thr.append(line)
#Sometimes adjacent atom goes missing, so creating such loops helps maintain the whole data string hole-free.
for line in thr_inp_file:
if line[2] == 'OG1':
thr_ace = line
thr_search_str_a = 'CB'+line[3]+line[4]+line[5]
for item in thr_inp_file:
if item[2] == 'CB':
thr_search_str_b = item[2]+item[3]+item[4]+item[5]
if thr_search_str_a == thr_search_str_b:
adjacent_thr.append(item)
acceptor_thr.append(thr_ace)
#for ace, adj in zip(acceptor_thr, adjacent_thr):
# print ace, adj
#categorization of THREONINE as Acceptor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#for don,h in zip(donor_thr, H_thr):
# print don,h
#categorization of THREONINE as Donor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_donor.extend(donor_thr)
side_H.extend(H_thr)
side_acceptor.extend(acceptor_thr)
side_adjacent.extend(adjacent_thr)
#for don, h in zip(side_donor, side_H): print don, h
#for ace, adj in zip(side_acceptor, side_adjacent): print ace, adj
#---------------------------------------------------------------------------------------------------
#including TYROSINE(TYR) residue. . . [DONOR & ACCEPTOR]
#its OH atom acts as both donor and acceptor. . .
#So, allocating OH in donor, HH in H-atom and OH in acceptor, CZ in adjacent atom list. . .
#---------------------------------------------------------------------------------------------------
acceptor_tyr = []
adjacent_tyr = []
donor_tyr = []
H_tyr = []
tyr_inp_file = []
for line in map(str.split, inp):
residueName = line[3][-3:] ##just taking last 3 alphabets so to avoid 'ATYR' instead of 'TYR'...
if residueName == 'TYR':
tyr_inp_file.append(line)
for line in tyr_inp_file:
if line[2] == 'OH':
donor_tyr.append(line)
elif line[2] == 'HH':
H_tyr.append(line)
#Sometimes adjacent atom goes missing, so creating such loops helps maintain the whole data string hole-free.
for line in tyr_inp_file:
if line[2] == 'OH':
tyr_ace = line
tyr_search_str_a = 'CZ'+line[3]+line[4]+line[5]
for item in tyr_inp_file:
if item[2] == 'CZ':
tyr_search_str_b = item[2]+item[3]+item[4]+item[5]
if tyr_search_str_a == tyr_search_str_b:
adjacent_tyr.append(item)
acceptor_tyr.append(tyr_ace)
#for ace, adj in zip(acceptor_tyr, adjacent_tyr):
# print ace, adj
#categorization of TYROSINE as Acceptor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#for don,h in zip(donor_tyr, H_tyr):
# print don,h
#categorization of TYROSINE as Donor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#INCLUDING IN MOTHER LIST . . . > > > > > >
side_donor.extend(donor_tyr)
side_H.extend(H_tyr)
side_acceptor.extend(acceptor_tyr)
side_adjacent.extend(adjacent_tyr)
#for don, h in zip(side_donor, side_H): print don, h
#for ace, adj in zip(side_acceptor, side_adjacent): print ace, adj
#----------------------------------------------------------------------------------------------------
#including TRYPTOPHAN(TRP) residue . . . [DONOR]
#it's NE1 atom as donor and HE1 is corresponding H-atom. . .
#So, allocating NE1 in donor & HE1 in H-atom list. . .
#----------------------------------------------------------------------------------------------------
donor_trp = []
H_trp = []
trp_inp_file = []
for line in map(str.split, inp):
residueName = line[3][-3:] ##just taking last 3 alphabets so to avoid 'ATRP' instead of 'TRP'...
if residueName == 'TRP':
trp_inp_file.append(line)
for line in trp_inp_file:
if line[2] == 'NE1':
donor_trp.append(line)
elif line[2] == 'HE1':
H_trp.append(line)
#for don,h in zip(donor_trp, H_trp):
# print don,h
#categorization of TYPTOPHAN as Donor completed . . . >>[NOTE]>>> put it in subsequent list for calculations
#INCLUDING IN MOTHER LIST . . . > > > > > >
#print donor_trp, H_trp
#print "----" *20
side_donor.extend(donor_trp)
side_H.extend(H_trp)
#for don, h,ace,adj in zip(side_donor, side_H,side_acceptor, side_adjacent): print don, h, ace, adj
#--------------------------------------------------------------------------
#>>>> EXTENDING MAIN_CHAIN_COORDINATES WITH SIDE_CHAIN_COORDINATES... >>>>
donor.extend(side_donor)
H.extend(side_H)
acceptor.extend(side_acceptor)
adjacent.extend(side_adjacent)
#================================================================
#PREPARING FOR CALCULATION NOW. . .
#......... :) :) :) ..................... >>>>>
#================================================================
for don,h in zip(donor,H):
#print don, h
#print "---"*20
'''
#--------------------------------------------------------------------------
#Constructing a Progressbar to minor loop progress...
toolbar_width = len(donor)
sys.stdout.write("[%s]" % (" "*toolbar_width))
sys.stdout.flush()
sys.stdout.write("\b"*(toolbar_width+1)) #return to the startline after '['
#----------------------------------------------------------------------------
'''
#print don
don_xAxis = don[6]
don_yAxis = don[7]
don_zAxis = don[8]
h_xAxis = h[6]
h_yAxis = h[7]
h_zAxis = h[8]
da_cal_out.write('\n')
#dx = 'DONOR', ' '*20 , 'ACCEPTOR', ' '*15, 'D-A'
#header = ''.join(map(''.join,dx))
donorAtom = ('donor: ' + don[2]),don[3],don[4],don[5],don[6],don[7],don[8]
final_donorAtom = ' '.join(map(''.join,donorAtom))
hAtom = ('H-atom: ' + h[2]),h[3],h[4],h[5],h[6],h[7],h[8]
final_hAtom = ' '.join(map(''.join,hAtom))
#print header
#out.write(header)
#print final_donorAtom
for ace,adj in zip(acceptor, adjacent):
#print ace, adj
ace_xAxis = ace[6]
ace_yAxis = ace[7]
ace_zAxis = ace[8]
xy = ('acceptor: '+ace[2]),ace[3],ace[4],ace[5],ace[6],ace[7],ace[8]
DA = ' '.join(map(''.join,xy))
adj = ('adjacent : '+adj[2]),adj[3],adj[4],adj[5],adj[6],adj[7],adj[8]
adjFinal = ' '.join(map(''.join,adj))
#-----------------------------------------------------------------
#Generating file in a format to ease angle calculation(*.DA_cal)...
#-----------------------------------------------------------------
da_cal_out.write(final_donorAtom) #getting donor atom
da_cal_out.write(' ')
da_cal_out.write(final_hAtom) #getting H-atom
da_cal_out.write(' ')
da_cal_out.write(DA) #getting acceptor atom
da_cal_out.write(' ')
da_cal_out.write(adjFinal) #getting acceptor-adjacent atom
da_cal_out.write('\n')
#---------------------------------------------------------------
#now generating same file but in READABLE format(.DA)...
#---------------------------------------------------------------
da_out.write('\n')
da_out.write(final_donorAtom) #getting donor atom
da_out.write('\n')
da_out.write(final_hAtom) #getting H-atom
da_out.write('\n')
da_out.write(DA) #getting acceptor atom
da_out.write('\n')
da_out.write(adjFinal) #getting acceptor-adjacent atom
da_out.write('\n')
loop_clock_stop = timeit.default_timer() #Terminating loop timer $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
loop_time = str(loop_clock_stop - loop_clock_start)
da_cal_out.close()
da_out.close()
print " - Primary Scan Complete, .DA_cal and .DA outfile created for " + filename
print " -- .DA_cal & .DA file generated for all .atom files..."
print " -- Searching dir for .DA_cal files to generate .hbinfo file..."
#------------------------------------------------------------------------------
#Now as the calculation ready format .DA_cal has been created.
#We gonna search th directory for .DA-cal and process each onw by one...
#-------------------------------------------------------------------------------
#Now have to open .DA_cal files one by one to make further processing...
#inp2 = open("/home/saumen/abhisek/dft_cal/xpt_zone_3_fine-opt/4g78FH.DA_cal",'r').read().strip().split('\n')
#OPENING A NEW SEARCH STRING...
for root, dirs, files in os.walk(path):
f_count = 0 #initializing count, want to count total no of files...
list_of_DA_cal_files = glob.iglob(os.path.join(root,'*.DA_cal'))
for DA_cal_file in list_of_DA_cal_files:
#distance list for storage . . .
DA = []
HA = []
#angle list for storage...
DHA_list = [] #contain all calculated DHA angle values.
HAAj_list = [] #contain all calculated HAAj angle values.
DAAj_list = [] #contain all calculated DAAj angle values.
inp2_list = []
inp2_list_new = [] #list made after removing trailing spaces. . .
#Putting a file marker for logical-checkpoint...
DA_filename = basename(DA_cal_file)
inp2 = open(DA_cal_file,'r').read().strip().split('\n')
print " "
print " >> Now Processing file: " + DA_filename
#----------------------------------------------------
#Opening .hbinfo file for each opened .DA_cal file..
hbinfo = DA_filename.replace('DA_cal','hbinfo')
hbinfo_out = open(os.path.join(root,hbinfo),'w')
#Opening .Hbradar file for each open .DA-cal file...
hbradar = DA_filename.replace('DA_cal','HBradar')
hbradar_out = open(os.path.join(root,hbradar),'w')
#-----------------------------------------------------
#print "[debug-step] blank files created..."
for lines in map(str.split,inp2):
#print lines
inp2_list.append(lines)
inp2_list_new = filter(None, inp2_list) #removing extra new_line characters to avoid index error. . .
for lines in inp2_list_new:
donor_atom = lines[1]
donor_res_name = lines[2]
donor_chain = lines[3]
donor_res_no = lines[4]
donor_xAxis = float(lines[5])
donor_yAxis = float(lines[6])
donor_zAxis = float(lines[7])
hy_atom = lines[9]
hy_res_name = lines[10]
hy_chain = lines[11]
hy_res_no = lines[12]
hy_xAxis = float(lines[13])
hy_yAxis = float(lines[14])
hy_zAxis = float(lines[15])
ace_atom = lines[17]
ace_res_name = lines[18]
ace_chain = lines[19]
ace_res_no = lines[20]
ace_xAxis = float(lines[21])
ace_yAxis = float(lines[22])
ace_zAxis = float(lines[23])
adj_atom = lines[26]
adj_res_name = lines[27]
adj_chain = lines[28]
adj_res_no = lines[29]
adj_xAxis = float(lines[30])
adj_yAxis = float(lines[31])
adj_zAxis = float(lines[32])
#print donor_xAxis,hy_xAxis,ace_xAxis,adj_xAxis
if donor_atom == ace_atom and donor_res_name == ace_res_name and donor_chain == ace_chain and donor_res_no == ace_res_no and donor_xAxis == ace_xAxis:
break
else:
#print lines
#--------------------------------------------------------------------------
#>>> STARTING DISTANCE CALCULATIONS. . .
#Distance_1: DA . . .
#--------------------------------------------------------------------------
xD = (ace_xAxis) - (donor_xAxis)
yD = (ace_yAxis) - (donor_yAxis)
zD = (ace_zAxis) - (donor_zAxis)
DA_distance = math.sqrt(xD*xD + yD*yD + zD*zD) #distance b/w donor-acceptor.
DA.append(DA_distance)
#-------------------------------------------------------------------------
#Distance_2: HA. . .
#-------------------------------------------------------------------------
xD1 = (ace_xAxis) - (hy_xAxis)
yD1 = (ace_yAxis) - (hy_yAxis)
zD1 = (ace_zAxis) - (hy_zAxis)
HA_distance = math.sqrt(xD1*xD1 +yD1*yD1 + zD1*zD1)
HA.append(HA_distance)
#--------------------------------------------------------------------------
#>>> STARTING ANGLE CALCULATIONS. . .
#Angle_1: DHA...
#Calculating magnitude of the two vectors HD & HA i.e. DHA ANGLE . . .!!!
#-------------------------------------------------------------------------
#Calculating vector HD . . .(NB: in vector HD & DH is not same. Draw picture to explain to urself. HERE calculating vectors keeping 'H' as central point.)
x1 = (donor_xAxis - hy_xAxis)
y1 = (donor_yAxis - hy_yAxis)
z1 = (donor_zAxis - hy_zAxis)
#Calculating vector HA . . .
x2 = (ace_xAxis - hy_xAxis)
y2 = (ace_yAxis - hy_yAxis)
z2 = (ace_zAxis - hy_zAxis)
#Calculating cosine and rev-cosine function. . .
numerator = ((x1*x2)+(y1*y2)+(z1*z2))
denominator = ((math.sqrt(math.pow(x1,2)+math.pow(y1,2)+math.pow(z1,2)))*(math.sqrt(math.pow(x2,2)+math.pow(y2,2)+math.pow(z2,2))))
cosine = numerator/denominator
theta_rad = math.acos(cosine) #inverse cos function gives angle measure in radian...
DHA_angle = math.degrees(theta_rad) #converted to degree...
DHA_list.append(DHA_angle)
#print 'DHA = '+ str(DHA_angle)
#--------------------------------------------------------------------------
#Angle_2: HAAj...(Aj = adjacent atom to Acceptor atom)
#Calculating magnitude of the two vectors AH & Aj` i.e. HAAj ANGLE . . .!!!
#--------------------------------------------------------------------------
#Calculating vector AH. . .
x3 = (hy_xAxis - ace_xAxis)
y3 = (hy_yAxis - ace_yAxis)
z3 = (hy_zAxis - ace_zAxis)
#Calculating vector AA'. . .
x4 = (adj_xAxis - ace_xAxis)
y4 = (adj_yAxis - ace_yAxis)
z4 = (adj_zAxis - ace_zAxis)
#Calculating cosine and rev-cosine function. . .
numerator2 = ((x3*x4)+(y3*y4)+(z3*z4))
denominator2 = ((math.sqrt(math.pow(x3,2)+math.pow(y3,2)+math.pow(z3,2)))*(math.sqrt(math.pow(x4,2)+math.pow(y4,2)+math.pow(z4,2))))
cosine2 = numerator2/denominator2
theta_rad2 = math.acos(cosine2)
HAAj_angle = math.degrees(theta_rad2)
HAAj_list.append(HAAj_angle)
#print 'HAAj = ' + str(HAAj_angle)
#---------------------------------------------------------------------------
#Angle_3: DAAj...(Aj = adjacent atom to Acceptor atom)
#Calculating magnitude of the two vectors AD & Aj` i.e. DAAj ANGLE . . .!!!
#---------------------------------------------------------------------------
#Calculating vector AD. . .
x5 = (donor_xAxis - ace_xAxis)
y5 = (donor_yAxis - ace_yAxis)
z5 = (donor_zAxis - ace_zAxis)
#Calculating vector AA'. . .
x6 = (adj_xAxis - ace_xAxis)
y6 = (adj_yAxis - ace_yAxis)
z6 = (adj_zAxis - ace_zAxis)
#Calculating cosine and rev-cosine function. . .
numerator3 = ((x5*x6)+(y5*y6)+(z5*z6))
#print numerator3
denominator3 = ((math.sqrt(math.pow(x5,2)+math.pow(y5,2)+math.pow(z5,2)))*(math.sqrt(math.pow(x6,2)+math.pow(y6,2)+math.pow(z6,2))))
#print denominator3
cosine3 = numerator3/denominator3
theta_rad3 = math.acos(cosine3)
DAAj_angle = math.degrees(theta_rad3)
DAAj_list.append(DAAj_angle)
#print 'DAAj = ' + str(DAAj_angle)
#Now Preparing for DFT calculations...
#.HBradar file already been opened above as 'hbradar_out'...
#--------------------------------------------------------------------
print " Generating_outfile: "+ hbinfo
final_list = []
dft_input_list = [] #CONTAINS DATA FOR NWCHEM INPUT.. >>>>>>
h1 = str("_"*180) #adding a header line for viewing pleasure...
hbinfo_out.write(h1)
hbinfo_out.write('\n')
header_line = str( "DONOR"+" "*20+"HYDROGEN-ATOM"+" "*12+"ACCEPTOR ATOM"+" "*12+"ADJACENT ATOM"+" "*10+"DA"+" "*6+"HA"+" "*7+"DHA"+" "*7+"HAAj"+" "*9+"DAAj"+" "*9+"m06-2x"+" "*14+"m05-2x"+" "*10+"m06-HF") #headers line. . .
hbinfo_out.write(header_line)
hbradar_out.write(header_line)
hbinfo_out.write('\n')
hbradar_out.write('\n')
hbinfo_out.write(h1)
hbradar_out.write(h1)
hbinfo_out.write('\n')
hbradar_out.write('\n')
#print "[debug-step]header written..."
radial_distance_list=[]
midpoint_list = []
HB_coord_list=[] #Contains all d coordiantes of HB partners.
HB_output_list=[] #to store output items forming hydrogen bond, in a particular format, for later ease of usage...
for lines,da,ha,items,case,entry in zip(inp2_list_new,DA,HA,DHA_list,HAAj_list,DAAj_list):
#print lines,da,ha,items,case,entry
if da <= 3.9 and ha <= 2.5 and items > 90 and case >90 and entry > 90:
#print lines
HB_coord_list.append(lines)
#print lines, ('DA: '+ str(da)), ('HA: '+ str(ha)), ('DHA: ' + str(items)), ('HAAj: ' + str(case)), ('DAAj: ' + str(entry))
#output = lines[1], lines[2], lines[3],lines[4],' ', lines[9],lines[10],lines[11],lines[12],' ',lines[17],lines[18],lines[19],lines[20],' ',lines[26],lines[27],lines[28],lines[29],' ', ('DA: %.4f ' % da),' ',('HA: %.4f' % ha),' ', ('DHA: %.4f' % items),' ', ('HAAj: %.4f' % case),' ', ('DAAj: %.4f' % entry)
donor_lines =str(str(lines[1])+" "*(6-len(lines[1]))+str(lines[2])+" "*(6-len(lines[2]))+
str(lines[3])+" "*(4-len(lines[3]))+str(lines[4]))
dft_donor_lines = str(str(lines[1][0])+' '+str(lines[5])+' '+str(lines[6])+' '+
str(lines[7])) #to be used as DFT input data for DONOR_ATOM...
#donor_lines_list.append(donor_lines)
hydrogen_lines = str(" "*(8-len(lines[4]))+str(lines[9])+" "*(6-len(lines[9]))+str(lines[10])+
" "*(6-len(lines[10]))+str(lines[11])+" "*(4-len(lines[11]))+lines[12])
dft_hydrogen_lines = str(str(lines[9][0])+' '+str(lines[13])+' '+str(lines[14])+' '+
str(lines[15])) #to be used as DFT input data for HYDROGEN_ATOM...
#hydrogen_lines_list.append(hydrogen_lines)
acceptor_lines = str(" "*(8-len(lines[12]))+str(lines[17])+" "*(6-len(lines[17]))+
str(lines[18])+" "*(6-len(lines[18]))+str(lines[19])+" "*(4-len(lines[19]))+str(lines[20]))
dft_acceptor_lines = str(str(lines[17][0])+' '+str(lines[21])+' '+str(lines[22])+' '+
str(lines[23])) #to be used as DFT input data for ACCEPTOR_ATOM...
#acceptor_lines_list.append(acceptor_lines)
adjacent_lines = str(" "*(8-len(lines[20]))+str(lines[26])+" "*(6-len(lines[26]))+
str(lines[27])+" "*(6-len(lines[27]))+str(lines[28])+" "*(4-len(lines[28]))+str(lines[29]))
dft_adjacent_lines = str(str(lines[26][0])+' '+str(lines[30])+ ' '+str(lines[31])+' '+
str(lines[32])) #to be used as DFT input data for ADJACENT_ATOMS...
total_dft_input = dft_donor_lines+ ';' +dft_hydrogen_lines+';'+dft_acceptor_lines+';'+dft_adjacent_lines
dft_input_list.append(total_dft_input) #storing in the list for mass usage later on...
#adjacent_lines_list.append(adjacent_lines)
da_distance = str(" "*(6-len(lines[29]))+'DA:%.2f' % (da))
ha_distance = str(" "*(6-len(da_distance))+'HA:%.2f' % (ha))
dha_angle = str(" "*(6-len(ha_distance))+'DHA:%.2f' % (items))
haaj_angle = str(" "*(6-len(dha_angle)) +'HAAj:%.2f' % (case))
daaj_angle = str(" "*(11-len(haaj_angle))+'DAAj:%.2f' % (entry))
zz = donor_lines, hydrogen_lines, acceptor_lines, adjacent_lines,da_distance,ha_distance,dha_angle,haaj_angle,daaj_angle
#string_zz = str(zz) #converting zz-tuple to string
zzz = ' '.join(zz)
HB_output_list.append(zzz)
hbinfo_out.write(zzz)
hbinfo_out.write('\n')
#print zzz
#formatted_output = (' '.join((output)))
#z = str(formatted_output)
#final_list.append(z)
for line in HB_coord_list:
midpoint = (float(line[13])+float(line[21]))/2,(float(line[14])+float(line[22]))/2,\
(float(line[15])+float(line[23]))/2
midpoint_list.append(midpoint)
final_list=[] #CONTAINS ALL THE COORDINATES FOR DFT/MP2 CALCULATIONs...
for midpoint,hb_coord in zip(midpoint_list,HB_coord_list):
#print hb_coord
#x = [hb_coord]
x = [[hb_coord[1][0],hb_coord[5],hb_coord[6],hb_coord[7]],';',[hb_coord[9][0],hb_coord[13],hb_coord[14],\
hb_coord[15]],';',[hb_coord[17][0],hb_coord[21],hb_coord[22],hb_coord[23]],';',\
[hb_coord[26][0],hb_coord[30],hb_coord[31],hb_coord[32],';']]
#x = hb_coord[1][0]+' '+hb_coord[5]+' '+hb_coord[6]+' '+hb_coord[7]+';'+hb_coord[9][0]+' '+hb_coord[13]+' '+\
# hb_coord[14]+' '+hb_coord[15]+';'+hb_coord[17][0]+' '+hb_coord[21]+' '+hb_coord[22]+' '+hb_coord[23]+';'+\
# hb_coord[26][0]+' '+hb_coord[30]+' '+hb_coord[31]+' '+hb_coord[32]
#x.append(hb_coord)
for lines in map(str.split,inp):
#print lines
#print midpoint[0], lines[6]
x_dist = float(midpoint[0]) - float(lines[6])
y_dist = float(midpoint[1]) - float(lines[7])
z_dist = float(midpoint[2]) - float(lines[8])
radial_distance = math.sqrt((x_dist**2)+(y_dist**2)+(z_dist**2))
if radial_distance <= 2.0:
#print hb_coord[4], lines
f = [[lines[2][0],lines[6], lines[7],lines[8],';']]
if lines[6] != hb_coord[5] and lines[6] != hb_coord[13] and lines[6] != hb_coord[21] and lines[6] != hb_coord[30]:
x.extend(f)
print lines
#x.extend(lines)
final_list.append(x)
#for line in final_list: print (line)
hbinfo_out.close()
print " --.hbinfo file generated successfully "
#""" Following part is active in HPC version, laptops wont be able to run this below part, so inactivated[no bu]
#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
#---------------------------------------------------------------------------------------------------------------
# ...............PREPARING FOR DFT INPUT CONFIGURATION...............................
# <<<data are stored in list named "dft_input_list">>>>
#================================================================================================================
print " -- Starting the DFT calculations now"
print " please wait, this may take a while. . ."
import sys
import subprocess
m06_2x = [] #list to store dft energy coming from m06-2x function...
m05_2x = []
m06_HF = []
#--------------------------------------------------------------------------------------------------------------
#CREATING FUNCTION TO ANALYZE DFT_code i.e. <analyze_dft_code>
#_________________________________________________________________________________________________________________
def analyze_dft_code(dft_code_exe,xc,func_name):
dft_out = open(path + "e_hydroden_bond.data",'w')
p =subprocess.Popen(['mpirun -np 16 nwchem ' + dft_code_exe], stdout=subprocess.PIPE, shell=True)
output, err = p.communicate()
#print "communication successful with nwchem"
for line in output:
dft_out.write(line)
dft_out.close()
dft_file = open(path + "e_hydroden_bond.data",'r').read().strip().split('\n')
total_lines = [] #storing all nwchem generated DFT & BSSE energies...
useful_energies = [] #filtering from list "total_lines" and storing only 'isolated' and 'BSSE corrected energies'...
#Loop to point out energy terms in DFT_script...
for lines in map(str.split,dft_file):
if lines != []:
if lines[0] == 'Total' and lines[1] == 'DFT' and lines[2] == 'energy':
total_lines.append(lines)
else:
if lines[0] == 'Corrected' and lines[1] == 'energy':
total_lines.append(lines)
#print total_lines
try:
isolated_energy1 = float(total_lines[1][-1])
isolated_energy2 = float(total_lines[3][-1])
BSSE_energy = float(total_lines[5][-1])
#print isolated_energy1, isolated_energy2, BSSE_energy
net_energy = (BSSE_energy - (isolated_energy1 + isolated_energy2)) # generates energy in Hartree...
net_energy_kcal = net_energy * 627.509 #Converting energy into kcal/mol unit...
print (' DFT Energy(%s) = %f' %(func_name, net_energy_kcal) +' kcal/mol')
xc.append(net_energy_kcal)
#print "COMMUNICATION SUCCESSFULL WITH NWCHEM >>>>>>>>>>>>>>>>>>>>>>"
except IndexError:
print (' DFT Energy = ') + 'Failed to calculate for this coordinate...'
xc.append('00.00000')
return
##############################################################################################################
#_________________________________________________________________________________________________________________
import itertools
#print final_list[-1][-1][-1] = ; #for info
for items in final_list:
#print items
#print items
del items[-1][-1]
electron_num = []
bsse_mon_2 = []
#print items
for case in items:
#print case[0]
if case[0] =='N':
electron_num.append(7)
elif case[0]=='O':
electron_num.append(8)
elif case[0]=='C':
electron_num.append(6)
elif case[0] == 'H':
electron_num.append(1)
total_electron_number = sum(electron_num)
for i in range (3, len(electron_num)+1): bsse_mon_2.append(str(i))
print items
print bsse_mon_2
dft_string = list(itertools.chain.from_iterable(items))
final_dft_string = ' '.join(dft_string) #to be appended in dft calculations
final_bsse_mon_2 = ' '.join(bsse_mon_2) #to be appended as bsse mon 2
#print final_dft_string
#print final_bsse_mon_2
'''
- calculating electron number based on atoms involved...
- As because, in case of ODD number of electrons of participating atoms, the calculation must go in ODFT-format
- But for EVEN no. of electrons, preferred is simple DFT-protocol.
-- Now First checking if the no. of electron is ODD/EVEN, then approaching further likewise...
'''
if (total_electron_number % 2) == 0: #i.e. number of electron is even, calculation will go on with 'dft protocol'...
#print items
'''
** New to v.3.1
________________
Writing an EXCEPTION handler (in case the convergence fails in 6-31g(medium grid), DFT func gonna try
another shot of calculation in 6-311g(fine-grid) and it it fails too then DFT gonna switch to
6-311g(coarse grid) template)
> So, the exception handler order is:
6-31g(medium grid) > 6-311g (fine grid) > 6-311g (coarse grid)
'''
try:
f_m06_2x = open(path + "dft_m06-2x_631g_template.nw",'r')
filedata=f_m06_2x.read()
f_m06_2x.close()
newdata = filedata.replace(" N##",(" "+final_dft_string)).replace(" #mon#"," mon second "+final_bsse_mon_2)
dft_code = open(path + "e_hydrogen_bond_exe.nw",'w')
dft_code.write(newdata)
dft_code.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe=path + "e_hydrogen_bond_exe.nw", xc=m06_2x,func_name='m06-2x')
except IndexError: #could not converge in xfine so going for xfine now...
#Going for 6-311g(fine-grid) exception ...
try:
print "Seems problem in Convergence, switching to 6-311g basis-set(grid: fine)..."
f_m06_2x = open(path + "dft_m06-2x_6311g-fine_template.nw",'r')
filedata=f_m06_2x.read()
f_m06_2x.close()
newdata = filedata.replace(" N##",(" "+final_dft_string)).replace(" #mon#"," mon second "+final_bsse_mon_2)
dft_code = open(path + "e_hydrogen_bond_exe.nw",'w')
dft_code.write(newdata)
dft_code.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe= path + "e_hydrogen_bond_exe.nw", xc=m06_2x,func_name='m06-2x')
#Going for 6-311g(Coarse-grid) exception ...
except IndexError:
try:
print "Seems a problem with convergence, switching to 6-311g basis set(grid: coarse)..."
f_m06_2x = open(path + "dft_m06-2x_6311g-coarse_template.nw",'r')
filedata=f_m06_2x.read()
f_m06_2x.close()
newdata = filedata.replace(" N##",(" "+final_dft_string)).replace(" #mon#"," mon second "+final_bsse_mon_2)
dft_code = open(path + "e_hydrogen_bond_exe.nw",'w')
dft_code.write(newdata)
dft_code.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe=path + "e_hydrogen_bond_exe.nw", xc=m06_2x,func_name='m06-2x')
except IndexError:
print "Seems it failed again ! Switching to 6-311g (grid: xcoarse)..."
f_m06_2x = open(path + "dft_m06-2x_6311g-xcoarse_template.nw",'r')
filedata=f_m06_2x.read()
f_m06_2x.close()
newdata = filedata.replace(" N##",(" "+final_dft_string)).replace(" #mon#"," mon second "+final_bsse_mon_2)
dft_code = open(path + "e_hydrogen_bond_exe.nw",'w')
dft_code.write(newdata)
dft_code.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe=path + "e_hydrogen_bond_exe.nw", xc=m06_2x,func_name='m06-2x')
'''
#Killing extra calculation for faster data generation...
#________________________________________________________
#NOW trying m05-2x FUNCTION...
f_m05_2x = open("/home/saumen/abhisek/project_HBf/e_hydrogen_bond_dft_m05-2x_template.nw",'r')
filedata=f_m05_2x.read()
f_m05_2x.close()
newdata = filedata.replace(" N##",(" "+items))
dft_code = open("/home/saumen/abhisek/project_HBf/e_hydrogen_bond_exe.nw",'w')
dft_code.write(newdata)
dft_code.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe="/home/saumen/abhisek/project_HBf/e_hydrogen_bond_exe.nw", xc=m05_2x, func_name='m05-2x')
#NOW trying m06-HF FUNCTION...
f_m06_HF = open("/home/saumen/abhisek/project_HBf/e_hydrogen_bond_dft_m06-HF_template.nw",'r')
filedata=f_m06_HF.read()
f_m06_HF.close()
newdata = filedata.replace(" N##",(" "+items))
dft_code = open("/home/saumen/abhisek/project_HBf/e_hydrogen_bond_exe.nw",'w')
dft_code.write(newdata)
dft_code.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe="/home/saumen/abhisek/project_HBf/e_hydrogen_bond_exe.nw", xc=m06_HF,func_name='m06-HF')
'''
elif (total_electron_number % 2) > 0: #i.e. no. of electron is ODD, Switching to ODFT-protocols...
#print items
#Going into the exception looping...
try:
f_m06_2x = open(path + "odft_m06-2x_631g_template.nw",'r')
filedata=f_m06_2x.read()
f_m06_2x.close()
#> Now if atom 3 is N them we might get multiplicity error, so if-else loop required...
if items[0][0] != 'N':
newdata = filedata.replace(" O##",(" "+final_dft_string)).replace(" #mult@@"," mult 2").replace(" #mon#"," mon second "+final_bsse_mon_2)
elif items[0][0] == 'N' and (sum(electron_num[2:]) % 2) > 0:
newdata = filedata.replace(" O##",(" "+final_dft_string)).replace(" #mult$$"," mult 2").replace(" #mon#"," mon second "+final_bsse_mon_2)
dft_code_exe = open(path + "e_hydrogen_bond_exe.nw",'w')
dft_code_exe.write(newdata)
dft_code_exe.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe=path + "e_hydrogen_bond_exe.nw", xc=m06_2x, func_name='m06-2x')
except IndexError:
#Going for 6-311g(fine-grid) exception ...
try:
print "Seems a problem with convergence, switching to 6-311g (grid: fine)..."
f_m06_2x = open(path + "odft_m06-2x_6311g-fine_template.nw",'r')
filedata=f_m06_2x.read()
f_m06_2x.close()
#> Now if atom 3 is N them we might get multiplicity error, so if-else loop required...
if items[0][0] != 'N':
newdata = filedata.replace(" O##",(" "+final_dft_string)).replace(" #mult@@"," mult 2").replace(" #mon#"," mon second "+final_bsse_mon_2)
elif items[4][0] == 'N' and (sum(electron_num[2:]) % 2) > 0:
newdata = filedata.replace(" O##",(" "+final_dft_string)).replace(" #mult$$"," mult 2").replace(" #mon#"," mon second "+final_bsse_mon_2)
dft_code_exe = open(path + "e_hydrogen_bond_exe.nw",'w')
dft_code_exe.write(newdata)
dft_code_exe.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe=path + "e_hydrogen_bond_exe.nw", xc=m06_2x, func_name='m06-2x')
#Going for 6-311g(coarse-grid) exception ...
except IndexError:
try:
print "Seems the convergence persists, switching to 6-311g (grid: coarse)..."
f_m06_2x = open(path + "odft_m06-2x_6311g-coarse_template.nw",'r')
filedata=f_m06_2x.read()
f_m06_2x.close()
#> Now if atom 3 is N them we might get multiplicity error, so if-else loop required...
if items[0][0] != 'N':
newdata = filedata.replace(" O##",(" "+final_dft_string)).replace(" #mult@@"," mult 2").replace(" #mon#"," mon second "+final_bsse_mon_2)
elif items[4][0] == 'N' and (sum(electron_num[2:]) % 2) > 0:
newdata = filedata.replace(" O##",(" "+final_dft_string)).replace(" #mult$$"," mult 2").replace(" #mon#"," mon second "+final_bsse_mon_2)
dft_code_exe = open(path + "e_hydrogen_bond_exe.nw",'w')
dft_code_exe.write(newdata)
dft_code_exe.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe=path + "e_hydrogen_bond_exe.nw", xc=m06_2x, func_name='m06-2x')
except IndexError:
print "oh faied again! switching to 6-311g (grid: xcoarse)..."
f_m06_2x = open(path + "odft_m06-2x_6311g-xcoarse_template.nw",'r')
filedata=f_m06_2x.read()
f_m06_2x.close()
#> Now if atom 3 is N them we might get multiplicity error, so if-else loop required...
if split_items[0][0] != 'N':
newdata = filedata.replace(" O##",(" "+final_dft_string)).replace(" #mult@@"," mult 2").replace(" #mon#"," mon second "+final_bsse_mon_2)
elif split_items[4][0] == 'N' and (sum(electron_num[2:]) % 2) > 0:
newdata = filedata.replace(" O##",(" "+final_dft_string)).replace(" #mult$$"," mult 2").replace(" #mon#"," mon second "+final_bsse_mon_2)
dft_code_exe = open(path + "e_hydrogen_bond_exe.nw",'w')
dft_code_exe.write(newdata)
dft_code_exe.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe=path + "e_hydrogen_bond_exe.nw", xc=m06_2x, func_name='m06-2x')
'''
#NOW trying m05-2x FUNCTION...
f_m05_2x = open("/home/saumen/abhisek/project_HBf/e_hydrogen_bond_odft_m05-2x_template.nw",'r')
filedata=f_m05_2x.read()
f_m05_2x.close()
if split_items[6][-1] != 'N':
newdata = filedata.replace(" O##",(" "+items)).replace(" #mult@@"," mult 2")
elif split_items[6][-1] == 'N':
newdata = filedata.replace(" O##",(" "+items)).replace(" #mult$$"," mult 2")
dft_code_exe = open("/home/saumen/abhisek/project_HBf/e_hydrogen_bond_exe.nw",'w')
dft_code_exe.write(newdata)
dft_code_exe.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe="/home/saumen/abhisek/project_HBf/e_hydrogen_bond_exe.nw", xc=m05_2x,func_name='m05-2x')
#NOW trying m06-HF FUNCTION...
f_m06_HF = open("/home/saumen/abhisek/project_HBf/e_hydrogen_bond_odft_m06-HF_template.nw",'r')
filedata=f_m06_HF.read()
f_m06_HF.close()
if split_items[6][-1] != 'N':
newdata = filedata.replace(" O##",(" "+items)).replace(" #mult@@"," mult 2")
elif split_items[6][-1] == 'N':
newdata = filedata.replace(" O##",(" "+items)).replace(" #mult$$"," mult 2")
dft_code_exe = open("/home/saumen/abhisek/project_HBf/e_hydrogen_bond_exe.nw",'w')
dft_code_exe.write(newdata)
dft_code_exe.close()
#Now calling dft_analyzer function created earlier...
analyze_dft_code(dft_code_exe="/home/saumen/abhisek/project_HBf/e_hydrogen_bond_exe.nw", xc=m06_HF, func_name='m06-HF')
'''
#CHANGING THE STORED ENERGY VALUES INTO string...
m06_2x_string_dft_energy=[] #converting the energy value in string and storing here for extending the HB-output_list later...
#m05_2x_string_dft_energy=[]
#m06_HF_string_dft_energy=[]
for m06_2x_energy in m06_2x:
m06_2x_string_value=str(m06_2x_energy)
m06_2x_string_dft_energy.append(m06_2x_string_value)
#--------------------------------------------------------------------------------------
#Generating the .HBradar outfile with all info. and hydrogen bond_energy...
#including xc= m06-2x, m05-2x, m06-HF
#--------------------------------------------------------------------------------------
for items, m06_2x_value in zip(HB_output_list, m06_2x_string_dft_energy):
#print items, m06_2x_value,m06_2x_value, m06_HF_value
new_line=items +" "+ m06_2x_value
hbradar_out.write(new_line)
hbradar_out.write('\n')
#print new_line
hbradar_out.close()
#"""
#print "COMPLETED SUCCESSFULLY. . ."
#print (str(h_count) +" probable hydrogen bond calculated")
################################################################
#stop = timeit.default_timer()
#print ("Total time taken to analyze this file: %.3f sec" % (stop - start))
#print "OUTPUT FILE GENERATED SUCCESSFULLY AS: " + hbradar
print ">>> ALL DIRECTORIES SCANNED, TOTAL FILE PROCESSED: " + str(f_count)
print " Thanks for using me !"
print " Have a nice day !"
return #'coz func-loop has to run till the path has files to be processed...
#Opening the file...
import time
#person_name = raw_input("Hello sir !, Please enter your name to proceed: ")
print "*********************************************************************************************************"
print " Hello sir! You are executing HB_radar algorithm on "+ time.asctime()+" IST "
print " PROGRAM NAME: HB_radar_walk_m062x.v.4.0 (uses neighbouring atoms) "
print " designed by => Abhisek Mondal"
print " Please cite: doi: 10.1002/prot.25271"
print " CSIR-IICB, KOLKATA, INDIA"
print "**********************************************************************************************************"
file_path = raw_input("ENTER THE PATH FOR THE FILE (viz. /home/usr/): ")
show_me_hbs(path = file_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment