amjames/g09_parsers.py

## g09_parsers.py
def _array_from_fchk(fchk_text, array_name):
    # matches a line that looks like:
    # {array_name}          R        N=     {number of elements in array}
    matcher = re.compile(r'\A(?P<title>{})\s+R\s+N=\s+(?P<nele>\d+)\Z'.format(array_name))
    fchk_lines = fchk_text.split('\n')
    start_line = 0
    nline = 0
    found_match = False
    for i, line in enumerate(fchk_lines):
        # see if the line matches the regex
        match = matcher.match(line)
        # if it does, we can start processing
        if match is not None:
            found_match = True
            # The first line with data on it is the one after the line where the regex matches
            start_line = i+1
            # The number of lines is the number of elements /5 (throwing awway remainder) + one more line if the remainder was not zero
            nline = int(match.group('nele'))//5 + (1 * bool(int(match.group('nele'))%5))
            # we can stop searching now
            break
        else:
            # if the regex does not match we keep searching
            continue
     if found_match:
        # we join together the lines were the data is stored.
        # Then Split them so each element in the list is the string rep of one number in the array
        # The we loop over those strings, convert them to floats, and convert that whole list of floats to a numpy array.
        data = np.array([float(x) for x in " ".join(fchk_lines[start_line:start_line+nline]).split()])
        return data
    else:
        return None

def parse_hessian(fchk_text, natom):
    "If the matrix is symmetric only the upper(lower) triangle is stored, the hessian is an example"
    # you will have to google/pour over the gaussian docs to find out what the array is called in the fchk file
    # (NOTE: It can change between versions/revisions)
    hessian_dat = _array_from_fchk(fchk_text, "Cartesian Force Constants") # Works for v09.E_01
    if hessian_dat is None:
        raise Exception("This job should output a hessian, but something went wrong")
    hess = np.zeros((3*natom, 3*natom))
    ut_index = np.triu_indices_from(hess)
    lt_index = np.tril_indices_from(hess)
    hess[ut_index] = hessian_dat
    hess[lt_index] = hessian_dat
    return hess

def parse_gradient(fchk_text):
    """
    This is example of how how I handle output that may not be there.

    If the gradient is in the checkpoint file it is returned.
    If not, I return an empty array
    """
    grad_data = _array_from_fchk(fchk_text, array_name="Cartesian Gradient")
    if grad_data is not None:
        return grad_data
    else:
        return np.array() # return empty array


def example_main():
    "An example of how I have python drive a whole g09 job from input --> output"
    # run gaussain, the env kwarg is required in my experience so that g09 can "see" the envars set by sourcing the $GAUSSIAN_DIR/bsd/profile.sh script
    subprocess.run(['g09', 'input.com','output.log'], env=os.environ)
    # run formchk to generate the fchk file
    subprocess.run(['formchk', '-c', 'checkpoint.chk', 'checkpoint.fchk'], env=os.environ)
    # you would have the know the natom in this example somehow. Usually I write the input file in this function,
    # and know the natom from the data used to generate the geometry section in the input
    fchk_text = Path('checkpoint.fchk').read_text()
    # If the hessian can't be found there is an error
    hessian = parse_hessian(fchk_text, natom)
    # If the gradient can't be found I will have an empty array
    gradient = parse_gradient(fchk_text)

    # Then I can dump both to a numpy "archive file" or json, or whatever I want to do with them
    # NOTE: np.savez will append '.npz' to the file name passed here, so the datat will live in a file called `g09_output_matrix_data.npz`
    np.savez('g09_output_matrix_data', hessian, gradient)
	def _array_from_fchk(fchk_text, array_name):
	# matches a line that looks like:
	# {array_name} R N= {number of elements in array}
	matcher = re.compile(r'\A(?P<title>{})\s+R\s+N=\s+(?P<nele>\d+)\Z'.format(array_name))
	fchk_lines = fchk_text.split('\n')
	start_line = 0
	nline = 0
	found_match = False
	for i, line in enumerate(fchk_lines):
	# see if the line matches the regex
	match = matcher.match(line)
	# if it does, we can start processing
	if match is not None:
	found_match = True
	# The first line with data on it is the one after the line where the regex matches
	start_line = i+1
	# The number of lines is the number of elements /5 (throwing awway remainder) + one more line if the remainder was not zero
	nline = int(match.group('nele'))//5 + (1 * bool(int(match.group('nele'))%5))
	# we can stop searching now
	break
	else:
	# if the regex does not match we keep searching
	continue
	if found_match:
	# we join together the lines were the data is stored.
	# Then Split them so each element in the list is the string rep of one number in the array
	# The we loop over those strings, convert them to floats, and convert that whole list of floats to a numpy array.
	data = np.array([float(x) for x in " ".join(fchk_lines[start_line:start_line+nline]).split()])
	return data
	else:
	return None

	def parse_hessian(fchk_text, natom):
	"If the matrix is symmetric only the upper(lower) triangle is stored, the hessian is an example"
	# you will have to google/pour over the gaussian docs to find out what the array is called in the fchk file
	# (NOTE: It can change between versions/revisions)
	hessian_dat = _array_from_fchk(fchk_text, "Cartesian Force Constants") # Works for v09.E_01
	if hessian_dat is None:
	raise Exception("This job should output a hessian, but something went wrong")
	hess = np.zeros((3natom, 3natom))
	ut_index = np.triu_indices_from(hess)
	lt_index = np.tril_indices_from(hess)
	hess[ut_index] = hessian_dat
	hess[lt_index] = hessian_dat
	return hess

	def parse_gradient(fchk_text):
	"""
	This is example of how how I handle output that may not be there.

	If the gradient is in the checkpoint file it is returned.
	If not, I return an empty array
	"""
	grad_data = _array_from_fchk(fchk_text, array_name="Cartesian Gradient")
	if grad_data is not None:
	return grad_data
	else:
	return np.array() # return empty array


	def example_main():
	"An example of how I have python drive a whole g09 job from input --> output"
	# run gaussain, the env kwarg is required in my experience so that g09 can "see" the envars set by sourcing the $GAUSSIAN_DIR/bsd/profile.sh script
	subprocess.run(['g09', 'input.com','output.log'], env=os.environ)
	# run formchk to generate the fchk file
	subprocess.run(['formchk', '-c', 'checkpoint.chk', 'checkpoint.fchk'], env=os.environ)
	# you would have the know the natom in this example somehow. Usually I write the input file in this function,
	# and know the natom from the data used to generate the geometry section in the input
	fchk_text = Path('checkpoint.fchk').read_text()
	# If the hessian can't be found there is an error
	hessian = parse_hessian(fchk_text, natom)
	# If the gradient can't be found I will have an empty array
	gradient = parse_gradient(fchk_text)

	# Then I can dump both to a numpy "archive file" or json, or whatever I want to do with them
	# NOTE: np.savez will append '.npz' to the file name passed here, so the datat will live in a file called `g09_output_matrix_data.npz`
	np.savez('g09_output_matrix_data', hessian, gradient)