-
-
Save sjl/a675c449a5452cb96a9fd8ce49741888 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################# | |
# # | |
# nmr-data_compilation.py # | |
# # | |
# Copyright Patrick H. Willoughby August 2013 # | |
# # | |
# Does Bolzmann-analysis and searches for imaginary # | |
# frequencies, and compiles the NMR shielding tensors # | |
# then prints the data into several .csv files for # | |
# viewing in a spreadsheet application # | |
# # | |
################################################################# | |
import re | |
import math | |
import glob | |
HARTREE_TO_KCAL = 627.509391 | |
TEMPERATURE = 298.0 | |
GAS_CONSTANT = 0.001986 | |
MENU_SELECTION = input("""Choose from the following options: | |
A. Enter reference and scaling factor data from regression analysis of a | |
test set of molecules. | |
B. Enter reference data from computation of a reference standard (e.g., | |
TMS) NMR shielding tensors. | |
C. Do not reference or scale NMR shielding tensor data. | |
""") | |
if MENU_SELECTION.upper() == "A": | |
PROTON_INTERCEPT = eval(input(""" | |
Enter the 1H scaling factor INTERCEPT: | |
""")) | |
PROTON_SCALING_SLOPE = eval(input(""" | |
Enter the 1H scaling factor SLOPE: | |
""")) | |
CARBON_INTERCEPT = eval(input(""" | |
Enter the 13C scaling factor INTERCEPT: | |
""")) | |
CARBON_SCALING_SLOPE = eval(input(""" | |
Enter the 13C scaling factor SLOPE: | |
""")) | |
OUTPUT_PREFIX = input(""" | |
Enter the name of the candidate structure: | |
""") + "-nmr_data_compilation" | |
if MENU_SELECTION.upper() == "B": | |
PROTON_REF_STANDARD = eval(input(""" | |
Enter the computed 1H shielding tensor for the reference standard: | |
""")) | |
PROTON_REL_TMS = eval(input(""" | |
Enter the experimental 1H chemical shift of the reference standard: | |
""")) | |
PROTON_INTERCEPT = PROTON_REF_STANDARD - PROTON_REL_TMS | |
PROTON_SCALING_SLOPE = -1 | |
CARBON_REF_STANDARD = eval(input(""" | |
Enter the computed 13C shielding tensor for the reference standard: | |
""")) | |
CARBON_REL_TMS = eval(input(""" | |
Enter the experimental 13C chemical shift of the reference standard: | |
""")) | |
CARBON_INTERCEPT = CARBON_REF_STANDARD - CARBON_REL_TMS | |
CARBON_SCALING_SLOPE = -1 | |
OUTPUT_PREFIX = input(""" | |
Enter the name of the candidate structure: | |
""") + "-nmr_data_compilation" | |
if MENU_SELECTION.upper() == "C": | |
PROTON_INTERCEPT = 0 | |
PROTON_SCALING_SLOPE = -1 | |
CARBON_INTERCEPT = 0 | |
CARBON_SCALING_SLOPE = -1 | |
OUTPUT_PREFIX = input(""" | |
Enter the name of the candidate structure: | |
""") + "-nmr_data_compilation" | |
#Below are the index values for the master data structure. | |
NAME = 0; CONF_NUM = 1; ENERGY = 2; KCAL_E = 3; REL_E = 4; BOLTZMANN_FACTOR = 5; MOL_X = 6; CARBON_CS = 7; PROTON_CS = 8; IMAG_FREQUENCIES = 9 | |
#Below are the index values for the proton and carbon chemical shift data substructures. | |
ATOM_NUMBER = 0 ; ISOTROPIC_VALUE = 1; REF_SHIFT = 2; WEIGHTED_SHIFT = 3; | |
def main(): | |
lofc = read_gaussian_outputfiles() | |
lofc_freq = read_gaussian_freq_outfiles(lofc) | |
lofc_nmr = read_gaussian_nmr_outfiles(lofc) | |
locs = prepare_list_of_chemical_shifts(lofc_nmr) | |
lofe = get_list_of_free_energies(lofc_freq) | |
lofe = boltzmann_analysis(lofe) | |
lofe = report_chemical_shifts(lofc_nmr, lofe) | |
summed_proton_shifts = final_proton_chemical_shifts(lofe) | |
summed_carbon_shifts = final_carbon_chemical_shifts(lofe) | |
lofe = count_imaginary_freq(lofc_freq, lofe) | |
write_final_shift_csv(summed_proton_shifts,summed_carbon_shifts) | |
write_master_csv(lofe) | |
def boltzmann_analysis(lofe): | |
lofe = kcal_convert(lofe) | |
minE = find_minimum_E(lofe) | |
lofe = calc_rel_E(lofe, minE) | |
lofe = calc_boltzmann_weights(lofe) | |
denom = calc_boltzmann_denomenator(lofe) | |
lofe = calc_mol_fraction(lofe,denom) | |
return lofe | |
def report_chemical_shifts(lofc_nmr,lofe): | |
get_chemical_shifts(lofc_nmr, lofe) | |
ref_chemical_shift(lofe) | |
boltzmann_chemical_shifts(lofe) | |
return lofe | |
def write_master_csv(lofe): | |
masterpwriter = open(OUTPUT_PREFIX+"-master_proton.csv",'w') | |
mastercwriter = open(OUTPUT_PREFIX+"-master_carbon.csv",'w') | |
print("filename, energy (a.u.), energy (kcal), rel energy (kcal), boltzmann factor, eq mole fraction, imaginary freqs", file=masterpwriter) | |
for conformation in lofe: | |
print(conformation[NAME],",", conformation[ENERGY],",", conformation[KCAL_E],",", conformation[REL_E],",",conformation[BOLTZMANN_FACTOR],",", conformation[MOL_X],",", conformation[IMAG_FREQUENCIES], file=masterpwriter) | |
print(" ", file=masterpwriter) | |
for conformation in lofe: | |
print("conformation",",", conformation[NAME],",", "mole fraction",",", conformation[MOL_X], file=masterpwriter) | |
print("Atom Number, Isotropic Value, Ref Chemical Shift, Avg Chemical Shift", file=masterpwriter) | |
for proton in conformation[PROTON_CS]: | |
print(proton[ATOM_NUMBER],",", proton[ISOTROPIC_VALUE],",", proton[REF_SHIFT],",",proton[WEIGHTED_SHIFT], file=masterpwriter) | |
print(" ", file=masterpwriter) | |
print("filename, energy (a.u.), energy (kcal), rel energy (kcal), boltzmann factor, eq mole fraction", file=mastercwriter) | |
for conformation in lofe: | |
print(conformation[NAME],",", conformation[ENERGY],",", conformation[KCAL_E],",", conformation[REL_E],",",conformation[BOLTZMANN_FACTOR],",", conformation[MOL_X], file=mastercwriter) | |
print(" ", file=mastercwriter) | |
for conformation in lofe: | |
print("conformation",",", conformation[NAME],",", "mole fraction",",", conformation[MOL_X], file=mastercwriter) | |
print("Atom Number, Isotropic Value, Ref Chemical Shift, Avg Chemical Shift", file=mastercwriter) | |
for carbon in conformation[CARBON_CS]: | |
print(carbon[ATOM_NUMBER],",", carbon[ISOTROPIC_VALUE],",", carbon[REF_SHIFT],",",carbon[WEIGHTED_SHIFT], file=mastercwriter) | |
print(" ", file=mastercwriter) | |
def write_final_shift_csv(summed_proton_shifts,summed_carbon_shifts): | |
ATOM_NUMBER = 0; SHIFT = 1 | |
pwriter = open(OUTPUT_PREFIX+"-avg_proton.csv",'w') | |
cwriter = open(OUTPUT_PREFIX+"-avg_carbon.csv",'w') | |
print("Gaussian atom numbers, logical atom numbers, chemical shift", file=pwriter) | |
for item in summed_proton_shifts: | |
print(item[ATOM_NUMBER],",,",item[SHIFT], file=pwriter) | |
print("Gaussian atom numbers, logical atom numbers, chemical shift", file=cwriter) | |
for item in summed_carbon_shifts: | |
print(item[ATOM_NUMBER],",,",item[SHIFT], file=cwriter) | |
return 0 | |
def final_proton_chemical_shifts(lofe): | |
ATOM_NUMBER = 0; SHIFT = 1 | |
final_proton_cshift = [] | |
for proton in lofe[0][PROTON_CS]: | |
final_proton_cshift.append([proton[ATOM_NUMBER],proton[WEIGHTED_SHIFT]]) | |
for conformation in lofe[1:]: | |
counter = 0 | |
for proton in conformation[PROTON_CS]: | |
final_proton_cshift[counter][SHIFT] += proton[WEIGHTED_SHIFT] | |
counter += 1 | |
return final_proton_cshift | |
def final_carbon_chemical_shifts(lofe): | |
ATOM_NUMBER = 0; SHIFT = 1 | |
final_carbon_cshift = [] | |
for carbon in lofe[0][CARBON_CS]: | |
final_carbon_cshift.append([carbon[ATOM_NUMBER],carbon[WEIGHTED_SHIFT]]) | |
for conformation in lofe[1:]: | |
counter = 0 | |
for carbon in conformation[CARBON_CS]: | |
final_carbon_cshift[counter][SHIFT] += carbon[WEIGHTED_SHIFT] | |
counter += 1 | |
return final_carbon_cshift | |
def kcal_convert(lofe): | |
NAME = 0; CONF_NUM = 1; ENERGY = 2 | |
for entry in lofe: | |
entry.append(entry[ENERGY] * HARTREE_TO_KCAL) | |
return lofe | |
def find_minimum_E(lofe): | |
minE = 0 | |
for entry in lofe: #This finds the minimum energy | |
if entry[KCAL_E] < minE: | |
minE = entry[KCAL_E] | |
return minE | |
def calc_rel_E(lofe, minE): | |
for entry in lofe: | |
entry.append(entry[KCAL_E] - minE) | |
return lofe | |
def calc_boltzmann_weights(lofe): | |
for entry in lofe: | |
entry.append(math.exp( (-1 * entry[REL_E]) / (TEMPERATURE * GAS_CONSTANT))) | |
return lofe | |
def calc_boltzmann_denomenator(lofe): | |
Boltzmann_denomenator = 0 | |
for entry in lofe: | |
Boltzmann_denomenator = Boltzmann_denomenator + entry[BOLTZMANN_FACTOR] | |
return Boltzmann_denomenator | |
def calc_mol_fraction(lofe,Boltzmann_denomenator): | |
for entry in lofe: | |
entry.append(entry[BOLTZMANN_FACTOR]/Boltzmann_denomenator) | |
return lofe | |
def ref_chemical_shift(lofe): | |
for conformation in lofe: | |
for proton in conformation[PROTON_CS]: | |
proton.append(abs((PROTON_INTERCEPT - float(proton[ISOTROPIC_VALUE])) / PROTON_SCALING_SLOPE)) | |
for carbon in conformation[CARBON_CS]: | |
carbon.append(abs((CARBON_INTERCEPT - float(carbon[ISOTROPIC_VALUE])) / CARBON_SCALING_SLOPE)) | |
return lofe | |
def boltzmann_chemical_shifts(lofe): | |
for conformation in lofe: | |
for proton in conformation[PROTON_CS]: | |
proton.append(proton[REF_SHIFT] * conformation[MOL_X]) | |
for carbon in conformation[CARBON_CS]: | |
carbon.append(carbon[REF_SHIFT] * conformation[MOL_X]) | |
return lofe | |
def get_chemical_shifts(lofc_nmr, lofe): | |
ATOM_NUMBER = 0; ATOM_SYMBOL = 1; ISOTROPIC_VALUE = 4 | |
counter = 0 | |
for file in lofc_nmr: | |
proton_chemicalshift_table = [] | |
carbon_chemicalshift_table = [] | |
for line in file[2]: | |
if "Isotropic" in line: | |
linesplit = line.split() | |
if linesplit[ATOM_SYMBOL] == "C": | |
carbon_chemicalshift_table.append([linesplit[ATOM_NUMBER],linesplit[ISOTROPIC_VALUE]]) | |
if linesplit[ATOM_SYMBOL] == "H": | |
proton_chemicalshift_table.append([linesplit[ATOM_NUMBER],linesplit[ISOTROPIC_VALUE]]) | |
lofe[counter].append(carbon_chemicalshift_table) | |
lofe[counter].append(proton_chemicalshift_table) | |
counter += 1 | |
return lofe | |
def prepare_list_of_chemical_shifts(lofc_nmr): | |
list_of_chemical_shifts = [] | |
for file in lofc_nmr: | |
list_of_chemical_shifts.append([file[0],file[1]]) | |
return list_of_chemical_shifts | |
def count_imaginary_freq(lofc_freq, lofe): | |
LINE_POS_OF_FREQUENCY_A = 2; LINE_POS_OF_FREQUENCY_B = 3; LINE_POS_OF_FREQUENCY_C = 4; | |
counter = 0 | |
for file in lofc_freq: | |
IMAG_FREQUENCIES = 0 | |
for line in file[2]: | |
if "Frequencies -- " in line: | |
freq_linesplit = line.split() | |
if float(freq_linesplit[LINE_POS_OF_FREQUENCY_A]) < 0: | |
IMAG_FREQUENCIES = IMAG_FREQUENCIES + 1 | |
if float(freq_linesplit[LINE_POS_OF_FREQUENCY_B]) < 0: | |
IMAG_FREQUENCIES = IMAG_FREQUENCIES + 1 | |
if float(freq_linesplit[LINE_POS_OF_FREQUENCY_C]) < 0: | |
IMAG_FREQUENCIES = IMAG_FREQUENCIES + 1 | |
lofe[counter].append(IMAG_FREQUENCIES) | |
counter += 1 | |
return lofe | |
def get_list_of_free_energies(lofc_freq): | |
LINE_POS_OF_FREE_ENERGY = 7 | |
list_of_free_energies = [] | |
for file in lofc_freq: | |
for line in file[2]: | |
if "Free Energies=" in line: | |
free_linesplit = line.split() | |
free_energy = float(free_linesplit[LINE_POS_OF_FREE_ENERGY]) | |
list_of_free_energies.append([file[0],file[1],free_energy]) | |
return list_of_free_energies | |
def get_conf_number(filename): | |
split_filename = re.findall(r'\w+', filename) | |
rev_filename = split_filename[::-1] | |
conf_number = rev_filename[1] | |
return conf_number | |
def read_gaussian_nmr_outfiles(list_of_files): | |
list_of_nmr_outfiles = [] | |
for file in list_of_files: | |
if file.find('nmr-') !=-1: | |
list_of_nmr_outfiles.append([file,int(get_conf_number(file)),open(file,"r").readlines()]) | |
return list_of_nmr_outfiles | |
def read_gaussian_freq_outfiles(list_of_files): | |
list_of_freq_outfiles = [] | |
for file in list_of_files: | |
if file.find('freq-') !=-1: | |
list_of_freq_outfiles.append([file,int(get_conf_number(file)),open(file,"r").readlines()]) | |
return list_of_freq_outfiles | |
def read_gaussian_outputfiles(): | |
list_of_files = [] | |
for file in glob.glob('*.out'): | |
list_of_files.append(file) | |
return list_of_files | |
if __name__ == "__main__": | |
main() | |
print(""" | |
The script successfully performed the Boltzmann weighting, | |
compiled the results of the NMR computation, assembled, scaled, | |
and/or referenced these data in the following ".csv" files: | |
%s-master_proton.csv | |
%s-avg_proton.csv | |
%s-master_carbon.csv | |
%s-avg_carbon.csv | |
""" % (OUTPUT_PREFIX,OUTPUT_PREFIX,OUTPUT_PREFIX,OUTPUT_PREFIX)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment