Created
March 12, 2018 04:45
-
-
Save zhantongz/ea2c894b3983f9b1f963820ec31992a2 to your computer and use it in GitHub Desktop.
Basic Gaussian 16 log file parser (no job flags etc.; basic HF/DFT/TDDFT jobs only)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
import argparse | |
from datetime import datetime | |
import numpy as np | |
def isfloat(value): | |
try: | |
float(value) | |
return True | |
except ValueError: | |
return False | |
def parse(infile): | |
regexes = { | |
'version': re.compile(r'Cite this work as:'), | |
'chk': re.compile(r'^%chk=(\S+)'), | |
'route': re.compile(r'^#'), | |
'hfenergy': re.compile(r'^SCF Done:'), | |
'input': re.compile(r'^Symbolic Z-matrix:'), | |
'tddft': re.compile(r'Convergence achieved on expansion vectors.'), | |
'tddft_root_info': re.compile(r'^(This state)|^(Total Energy)|^(Copying)'), | |
'freq': re.compile(r'^Harmonic frequencies \(cm\*\*-1\)'), | |
'hessian': re.compile(r'^(The second derivative matrix:)|^(ITU= 0)'), | |
'opt': re.compile(r'Stationary point found.'), | |
'force': re.compile(r'Center Atomic Forces'), | |
'cpu_time': re.compile(r'^Job cpu time:'), | |
'end': re.compile(r'(?:Normal termination of Gaussian \d+ at )(.*)\.') | |
} | |
parsed = {'type': 'g16'} | |
with infile as logfile: | |
for line in logfile: | |
line = line.strip() | |
re_order = ['version', 'chk', 'route', 'input', | |
'hfenergy', 'opt', 'freq', 'hessian', 'tddft', 'force', 'end'] | |
for regex in re_order: | |
try: | |
m = regexes[regex].search(line) | |
except KeyError as e: | |
print('Key', e, 'not found in list of regular expressions.') | |
raise | |
if m: | |
if regex == 'version': | |
parsed['version'] = next( | |
logfile).strip().replace(',', '') | |
elif regex == 'chk': | |
parsed['chk'] = m.group(1).replace('.chk', '') + '.chk' | |
elif regex == 'route': | |
parsed['route'] = [line[1:].strip()] | |
line = next(logfile).strip() | |
while line[0:3] != '---': | |
parsed['route'].append(line.strip()) | |
line = next(logfile).strip() | |
elif regex == 'input': | |
line = next(logfile).strip() | |
parsed['input'] = {'charge': line.split()[2], 'multi': line.split()[5], | |
'geom': []} | |
line = next(logfile).strip() | |
while line.split(): | |
parsed['input']['geom'].append( | |
line.split()) | |
line = next(logfile).strip() | |
elif regex == 'hfenergy': | |
parsed['energy'] = float(line.split()[4]) | |
elif regex == 'opt': | |
parsed['opt'] = {'success': True, 'geom': []} | |
geom = True | |
line = next(logfile).strip() | |
if (not line) or (line[0:4] == 'Grad'): | |
geom = False | |
if geom: | |
while ('Center Atomic Atomic Coordinates (Angstroms)' not in line): | |
line = next(logfile).strip() | |
line = next(logfile) | |
line = next(logfile) | |
line = next(logfile).strip() | |
while line[0:3] != '---': | |
parsed['opt']['geom'].append( | |
[int(i) for i in line.split()[0:3]] + [float(i) for i in line.split()[3:]]) | |
line = next(logfile).strip() | |
elif regex == 'freq': | |
freq = True | |
while line.split()[0] != '1': | |
line = next(logfile).strip() | |
if not line: | |
freq = False | |
break | |
if freq: | |
while line.split(): | |
freq_parsed = line.split() | |
symm, freq, red_mass, force_const, ir_inten, nmodes = [], [], [], [], [], [] | |
line = next(logfile).strip() | |
while (len(line.split()) > 3 or not line.replace(' ', '').isdigit()): | |
symm = symm + line.split() | |
freq += next(logfile).strip().split()[2:] | |
red_mass += next( | |
logfile).strip().split()[3:] | |
force_const += next( | |
logfile).strip().split()[3:] | |
ir_inten += next( | |
logfile).strip().split()[3:] | |
line = next(logfile) | |
modes = [] | |
line = next(logfile).strip() | |
while len(line.split()) > 3: | |
modes.append(line.split()) | |
line = next(logfile).strip() | |
modes = np.array(modes).astype(np.float) | |
parsed['nm_atom_order'] = modes[:, | |
1].astype(np.int).tolist() | |
modes = np.hsplit(modes[:, 2:], 3) | |
nmodes += [mode.tolist() for mode in modes] | |
freq_parsed += line.split() | |
if not line: | |
break | |
line = next(logfile).strip() | |
parsed['freq'] = freq_parsed | |
for mode in range(len(freq_parsed)): | |
parsed['freq'][mode] = { | |
'symmetry': symm[mode], | |
'freq': float(freq[mode]), | |
'reduced_mass': float(red_mass[mode]), | |
'force_constant': float(force_const[mode]), | |
'ir_intensity': float(ir_inten[mode]), | |
'mode': nmodes[mode] | |
} | |
elif regex == 'hessian': | |
parsed['hessian'] = parsed.setdefault('hessian', {}) | |
if m.group(1): | |
line = next(logfile).strip() | |
raw_matrix = [] | |
while not line.startswith('ITU='): | |
line = next(logfile).strip() | |
submatrix = [] | |
while (isfloat(line.split()[-1]) and | |
not line.startswith('ITU=')): | |
submatrix.append([float(i) | |
for i in line.split()[1:]]) | |
line = next(logfile).strip() | |
raw_matrix.append(submatrix) | |
matrix = raw_matrix[0] | |
for submatrix in raw_matrix[1:]: | |
for i, v in enumerate(reversed(submatrix)): | |
matrix[-i-1] += v | |
parsed['hessian']['matrix'] = matrix | |
parsed['hessian']['eigenvalues'] = [] | |
line = next(logfile).strip() | |
while line.startswith('Eigenvalues ---'): | |
parsed['hessian']['eigenvalues'] += [ | |
float(i) for i in line.split()[2:]] | |
line = next(logfile).strip() | |
elif regex == 'tddft': | |
parsed['tddft'] = {} | |
for _ in range(4): | |
next(logfile) | |
line = next(logfile).strip() | |
while (line.split() and 'electric dipole moments' not in line): | |
line = next(logfile).strip() | |
next(logfile) | |
parsed['tddft'] = {'electric_dipole_moments': []} | |
line = next(logfile).strip() | |
while (line.split() and line.split()[0].isdigit()): | |
parsed['tddft']['electric_dipole_moments'].append( | |
line.split()[1:]) | |
line = next(logfile).strip() | |
while (line[0:3] != '***' and 'Excitation energies and oscillator strengths:' not in line): | |
line = next(logfile).strip() | |
next(logfile) | |
line = next(logfile).strip() | |
parsed['tddft']['excited_states'] = [] | |
while line.startswith('Excited State'): | |
venergy = float(line.split()[4]) | |
osc_strength = float(line.split()[8][2:]) | |
total_energy = None | |
contribs = [] | |
line = next(logfile).strip() | |
while (line.split() and line.split()[0].isdigit()): | |
contrib = line.split() | |
del contrib[1] | |
contrib = [ | |
int(i) for i in contrib[:2]] + [float(contrib[2])] | |
contribs.append(contrib) | |
line = next(logfile).strip() | |
root_match = regexes['tddft_root_info'].search( | |
line) | |
while root_match: | |
if root_match.group(2): | |
total_energy = float(line.split()[4]) | |
line = next(logfile).strip() | |
root_match = regexes['tddft_root_info'].search( | |
line) | |
parsed['tddft']['excited_states'].append({ | |
'excitation_energy': venergy, | |
'oscillator_strength': osc_strength, | |
'contributions': contribs, | |
'total_energy': total_energy | |
}) | |
line = next(logfile).strip() | |
elif regex == 'force': | |
line = next(logfile) | |
line = next(logfile) | |
parsed['force'] = [] | |
line = next(logfile).strip() | |
while line[0:3] != '---': | |
parsed['force'].append([int(i) for i in line.split()[ | |
0:2]] + [float(i) for i in line.split()[2:]]) | |
line = next(logfile).strip() | |
elif regex == 'cpu_time': | |
cpu_time = line.split()[3:] | |
parsed['cpu_time'] = float(cpu_time[0])*24*60*60 + \ | |
float(cpu_time[2])*60*60 + \ | |
float(cpu_time[4])*60 + float(cpu_time[8]) | |
elif regex == 'end': | |
parsed['end'] = {'normal': True, | |
'time': datetime.strptime(' '.join(m.group(1).split()), | |
'%a %b %d %H:%M:%S %Y')} | |
break | |
try: | |
if parsed['opt']['success'] and not parsed['opt']['geom']: | |
del parsed['opt'] | |
except KeyError: | |
pass | |
if 'end' not in parsed: | |
parsed['end'] = {'normal': False} | |
return parsed | |
def output(outputfile, parsed): | |
print(parsed) | |
def main(): | |
args_parser = argparse.ArgumentParser( | |
description='Process Gaussian 16 output files.') | |
args_parser.add_argument('--file', '-f', type=argparse.FileType('r')) | |
args_parser.add_argument('--output', '-o', type=argparse.FileType('w')) | |
args = args_parser.parse_args() | |
parsed = parse(args.file) | |
output(args.output, parsed) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment