Skip to content

Instantly share code, notes, and snippets.

@louity
Last active April 19, 2022 15:35
Show Gist options
  • Save louity/c7bb42fb577a0827aebed1c984e9098c to your computer and use it in GitHub Desktop.
Save louity/c7bb42fb577a0827aebed1c984e9098c to your computer and use it in GitHub Desktop.
Angular Fourier Series in python
"""Python implementation of the Angular Fourier Series descriptors defined in the paper
'On representing chemical environments', DOI: https://doi.org/10.1103/PhysRevB.87.184115
"""
import argparse
import os
import numpy as np
import scipy
import scipy.spatial as spatial
from mpl_toolkits.mplot3d import axes3d # noqa: f401 unused import
import matplotlib.pyplot as plt
try:
from tqdm import tqdm
except ImportError: tqdm = lambda x: x
def gaussian(x, mu, sig):
return np.exp(-np.power(x - mu, 2.) / (2 * np.power(sig, 2.)))
def compute_W_matrix(n_max, reg=0.):
""" W matrix involved in formula (25) section III.D (page 7) concerning
orthonomalization of the radial functions \phi.
W is defined as the square root of the scalar product matrix S(i,j) = <phi_i, phi_j>.
Beacause phi functions are correlated, large n_max may cause negative
eigenvalues that yield nans in sqrt matrix, so we added a reg term on the diagonal.
Note that scipy.linalg.sqrtm can also be used, but similarly, complex value appear for large
n_max.
"""
overlapp_matrix = np.zeros((n_max, n_max))
for alpha in range(1, n_max+1):
for beta in range(1, n_max+1):
overlapp_matrix[alpha-1, beta-1] = np.sqrt((5 + 2*alpha)*(5 + 2*beta)) / (5 + alpha + beta)
if alpha == beta and reg > 0:
overlapp_matrix[alpha-1, beta-1] += reg
eigvals, eigvecs = scipy.linalg.eigh(overlapp_matrix)
if (eigvals < 0).sum():
print('Negative eigenvalues in matrix W: {}'.format(eigvals))
W_matrix = np.dot(np.dot(eigvecs, np.diag(1. / np.sqrt(eigvals))), eigvecs.transpose())
return W_matrix
def periodize_configuration(configuration, r_cut, dimensions):
"""Periodically replicate the atoms in a rectangular box that are distance <= r_cut of the faces.
Parameters
configuration: np.array of shape (n_atoms, 3)
coordinates of the atoms to be periodized
r_cut: float
cutoff radius
dimensions: np.array of shape (3,) (or list length 3)
dimensions of the periodic rectangle
Returns
periodized_configuration: np.array of shape (n_atoms_periodized, 3)
initial_atom_ids: np.array of shape (n_atoms_periodized, )
ids of the periodized atoms in the initial configuration
"""
periodized_configuration = []
initial_atom_ids = []
x_translation = np.array([[dimensions[0], 0, 0]], dtype=configuration.dtype)
y_translation = np.array([[0, dimensions[1], 0]], dtype=configuration.dtype)
z_translation = np.array([[0, 0, dimensions[2]]], dtype=configuration.dtype)
mask_true = np.ones(configuration.shape[0], dtype=bool)
for i_x, mask_x in [(-1., configuration[:, 0] > (dimensions[0] - r_cut)), (0., mask_true), (1., configuration[:, 0] < r_cut)]:
for i_y, mask_y in [(-1., configuration[:, 1] > (dimensions[1] - r_cut)), (0., mask_true), (1., configuration[:, 1] < r_cut)]:
for i_z, mask_z in [(-1., configuration[:, 2] > (dimensions[2] - r_cut)), (0., mask_true), (1., configuration[:, 2] < r_cut)]:
mask = mask_x * mask_y * mask_z
initial_atom_ids.append(np.nonzero(mask)[0])
periodized_configuration.append(configuration[mask] + i_x*x_translation + i_y*y_translation + i_z*z_translation)
periodized_configuration = np.concatenate(periodized_configuration, axis=0)
initial_atom_ids = np.concatenate(initial_atom_ids, axis=0)
return periodized_configuration, initial_atom_ids
def compute_AFS_descriptors(configurations, n_max, l_max, r_cut, dimensions,
radial_function_type='g_functions', reg_eigenvalues=0.,
neighbors_in_r_cut=False, radial_tensor_product=False):
"""Implementation of the formula given in section III.G (page 9).
The indices i and i' in the sum are interpreted as the neighbor indices of a central atom.
If neighbors_in_r_cut=True, we add the constraint that the neighbors i and i' must be at a distance <= r_cut
"""
assert radial_function_type in ['g_function', 'gaussian'], f'invalid radial function type {radial_function_type}'
l_values = np.arange(l_max+1).reshape(1, 1, -1)
if radial_function_type == 'g_function':
W_matrix = compute_W_matrix(n_max, reg=reg_eigenvalues)
alphas = np.arange(1, n_max+1).astype('float64').reshape((1, -1))
exponents = alphas + 2
normalizing_constants = np.sqrt(2*alphas+5) / np.power(r_cut, alphas+2.5)
elif radial_function_type == 'gaussian':
centers = np.linspace(0, r_cut, n_max, endpoint=False).reshape((1, -1))
sigma = 0.5 * centers[0, 1]
if radial_tensor_product:
AFS_descriptors = np.zeros((configurations.shape[0], configurations.shape[1], n_max**2, l_max+1))
else:
AFS_descriptors = np.zeros((configurations.shape[0], configurations.shape[1], n_max, l_max+1))
for i_config in tqdm(range(configurations.shape[0])):
configuration = configurations[i_config]
periodized_configuration, initial_atom_ids = periodize_configuration(configuration, r_cut, dimensions)
point_tree = spatial.cKDTree(periodized_configuration)
for i_atom in range(configuration.shape[0]):
atom = configuration[i_atom:i_atom+1]
neighbors_indices = [n_id for n_id in point_tree.query_ball_point(configuration[i_atom], r_cut) if initial_atom_ids[n_id] != i_atom]
neighbors = periodized_configuration[neighbors_indices]
r_vectors = neighbors - atom
r_norms = np.linalg.norm(r_vectors, axis=1, keepdims=True)
if radial_function_type == 'g_function':
phi_functions = normalizing_constants * (r_cut - r_norms)**exponents
radial_functions = np.dot(phi_functions, W_matrix)
elif radial_function_type == 'gaussian':
radial_functions = gaussian(r_norms, centers, sigma)
r_normalized = r_vectors / r_norms
cos_angles = np.dot(r_normalized, r_normalized.transpose())
# triangular-upper mask corresponding to pairs (i,j) with i<j, i.e. pair of different atoms
n_neighbors = neighbors.shape[0]
triu_mask = np.arange(n_neighbors)[:,None] < np.arange(n_neighbors)
if neighbors_in_r_cut:
neighbors_indices_pdist_matrix = spatial.distance.squareform(spatial.distance.pdist(neighbors))
neighbors_in_r_cut_mask = neighbors_indices_pdist_matrix < r_cut
triu_mask *= neighbors_in_r_cut_mask
# triangular-upper indices correspond to pairs (i,j) with i<j, i.e. pair of different atoms
cos_angles = np.clip(cos_angles, -1, 1)
angles_triu = np.arccos(cos_angles[triu_mask]).reshape(-1, 1, 1)
cos_l_angles_triu = np.cos(l_values * angles_triu)
if radial_tensor_product:
radial_functions_product_triu = np.tensordot(radial_functions.transpose(), radial_functions, axes=0).transpose(1, 2, 0, 3)[triu_mask, :, :].reshape(-1, n_max**2, 1)
else:
radial_functions_product_triu = np.stack([np.dot(radial_functions[:, n:n+1], radial_functions[:, n:n+1].transpose())[triu_mask] for n in range(n_max)], axis=-1)[:, :, np.newaxis]
AFS_descriptors[i_config, i_atom] += (radial_functions_product_triu * cos_l_angles_triu).sum(axis=0)
return AFS_descriptors
def compute_AFS_descriptors_method_2(configurations, n_max, l_max, r_cut, dimensions, radial_function_type='g_functions', reg_eigenvalues=0.):
"""Implementation of the formula given in section III.G (page 9).
The index i in the sum is interpreted as the neighbor indices of a central atom and the index i' is interpreted as the neighbors of i.
"""
assert radial_function_type in ['g_function', 'gaussian'], 'invalid radial function type {}'.format(radial_function_type)
l_values = np.arange(l_max+1)[np.newaxis, np.newaxis, :]
if radial_function_type == 'g_function':
W_matrix = compute_W_matrix(n_max, reg=reg_eigenvalues)
alphas = np.arange(1, n_max+1).astype('float64').reshape((1, -1))
exponents = alphas + 2
normalizing_constants = np.sqrt(2*alphas+5) / np.power(r_cut, alphas+2.5)
elif radial_function_type == 'gaussian':
centers = np.linspace(0, r_cut, n_max, endpoint=False).reshape((1, -1))
sigma = 0.5 * centers[0, 1]
AFS_descriptors = np.zeros((configurations.shape[0], configurations.shape[1], n_max, l_max+1))
for i_config in tqdm(range(configurations.shape[0])):
configuration = configurations[i_config]
periodized_configuration, initial_atom_ids = periodize_configuration(configuration, r_cut, dimensions)
point_tree = spatial.cKDTree(periodized_configuration)
neighbors_indices_periodized = []
neighbors_indices_initial = []
neighbors_radius_vectors = []
neighbors_radial_functions = []
for i_atom in range(configuration.shape[0]):
indices = point_tree.query_ball_point(configuration[i_atom], r_cut)
indices = np.array([ind for ind in indices if initial_atom_ids[ind] != i_atom])
neighbors_indices_periodized.append(indices)
neighbors_indices_initial.append(initial_atom_ids[indices])
neighbors = periodized_configuration[indices]
atom = configuration[i_atom:i_atom+1]
radius_vectors = neighbors - atom
r_norms = np.linalg.norm(radius_vectors, axis=1, keepdims=True)
neighbors_radius_vectors.append(radius_vectors / r_norms)
if radial_function_type == 'g_function':
phi_functions = normalizing_constants * (r_cut - r_norms)**exponents
neighbors_radial_functions.append(np.dot(phi_functions, W_matrix))
elif radial_function_type == 'gaussian':
neighbors_radial_functions.append(gaussian(r_norms, centers, sigma))
for i_atom in range(configuration.shape[0]):
for j_, j_atom in enumerate(neighbors_indices_initial[i_atom]):
r_ij_vector = neighbors_radius_vectors[i_atom][j_][:, np.newaxis]
cos_angles_jk = np.dot(neighbors_radius_vectors[j_atom], r_ij_vector)
cos_angles_jk = np.clip(cos_angles_jk, -1, 1)
angles_jk = np.arccos(cos_angles_jk)[:, :, np.newaxis]
g_r_ij = neighbors_radial_functions[i_atom][j_][:, np.newaxis]
g_r_jk = neighbors_radial_functions[j_atom][:, :, np.newaxis]
cos_l_angles_jk = np.cos(l_values * angles_jk)
AFS_descriptors[i_config, i_atom] += 0.5 * (g_r_ij * (g_r_jk * cos_l_angles_jk).sum(axis=0) - g_r_ij**2)
return AFS_descriptors
def read_poscar_files(path):
poscar_filepathes = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) and os.path.splitext(f)[1] == '.poscar']
poscar_filepathes.sort()
configurations = []
for file_path in poscar_filepathes:
f = open(os.path.join(path, file_path))
f.readline() # ignore first line " 111 1 Fe 26 0 0 0"
f.readline() # ignore second line " 1.0000000 "
lattice = np.array([[float(x) for x in f.readline().split()] for _ in range(3)])
n_atoms = int(f.readline())
f.readline() # ignore coordinate type line " Cartesian "
atom_positions = np.zeros((n_atoms, 3))
for i_atom in range(n_atoms):
atom_positions[i_atom] = np.array([float(x) for x in f.readline().split()])[:3]
f.close()
configurations.append(atom_positions)
return np.stack(configurations, axis=0), lattice
def plot_config(configuration):
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
x, y, z = configuration[:, 0].copy(), configuration[:, 1].copy(), configuration[:, 2].copy()
ax.scatter(x, y, z, marker='o')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()
return None
if __name__ == '__main__':
parser = argparse.ArgumentParser('Compute AFS descriptor')
parser.add_argument('--poscar_paths', nargs='+', help="paths of poscar files", required=True)
parser.add_argument('--r_cut', type=float, help="cutoff radius", required=True)
parser.add_argument('--n_max', type=int, help="n radial functions", required=True)
parser.add_argument('--l_max', type=int, help="n angular functions", required=True)
parser.add_argument('--output_paths', nargs='+', help="path of output np array", default=[])
parser.add_argument('--radial_function_type', choices=['gaussian', 'g_function'], help='radial function type', required=True)
parser.add_argument('--reg_eigenvalues', type=float, default=1e-15, help="reg for eigvalues calculations")
parser.add_argument('--neighbors_in_r_cut', action='store_true', help="constraint ")
parser.add_argument('--radial_tensor_product', action='store_true', help="tensor product of radial")
args = parser.parse_args()
for i, poscar_path in enumerate(args.poscar_paths):
print(f'reading poscar files in {poscar_path}...')
configurations, lattice = read_poscar_files(poscar_path)
dimensions = np.linalg.norm(lattice, axis=1)
print('...done, configurations {} , lattice dimensions {}'.format(configurations.shape, dimensions))
n_max, l_max, r_cut = args.n_max, args.l_max, args.r_cut
AFS_desc = compute_AFS_descriptors(configurations, n_max, l_max, r_cut, dimensions,
args.radial_function_type, args.reg_eigenvalues,
neighbors_in_r_cut=args.neighbors_in_r_cut,
radial_tensor_product=args.radial_tensor_product)
print(f'AFS descriptor of shape {AFS_desc.shape} computed.')
if i < len(args.output_paths):
print(f'saving to file {args.output_paths[i]}')
np.save(args.output_paths[i], AFS_desc.sum(axis=1))
# commande pour lancer le calcul des descripteurs
python AFS.py --poscar_paths data/bulk/ data/DB_92_002/ data/DB_92_003/ data/DB_92_004/ data/DB_92_400/ --output_paths AFS_20_10_5_gfunc_bulk.npy AFS_20_10_5_gfunc_92_002.npy AFS_20_10_5_gfunc_92_003.npy AFS_20_10_5_gfunc_92_004.npy AFS_20_10_5_gfunc_92_400.npy --n_max 20 --l_max 10 --r_cut 5.0 --reg_eigenvalues 1e-15 --radial_function_type g_function
# commande pour lancer la regression lineaire
python linear_regression.py --bulk_desc_file AFS_20_10_5_gfunc_bulk.npy --thermo_files data/92_002_normalized.data data/92_003_normalized.data data/92_004_normalized.data data/92_400_normalized.data --desc_files AFS_20_10_5_gfunc_92_002.npy AFS_20_10_5_gfunc_92_003.npy AFS_20_10_5_gfunc_92_004.npy AFS_20_10_5_gfunc_92_400.npy
import argparse
import numpy as np
from sklearn import linear_model, model_selection
from sklearn.metrics import mean_squared_error, mean_absolute_error
parser = argparse.ArgumentParser('Linear regression with descriptors')
parser.add_argument('--thermo_files', nargs='+', help="paths of thermo files", required=True)
parser.add_argument('--desc_files', nargs='+', help="path of descriptors files", required=True)
parser.add_argument('--bulk_desc_file', help="path of bulk desc files", required=True)
args = parser.parse_args()
assert len(args.thermo_files) == len(args.desc_files), f'number of themo files must be equal to number of desc files'
bulk_descriptor = np.load(args.bulk_desc_file).reshape((1, -1))
X, y = [], []
for i_file, thermo_file in enumerate(args.thermo_files):
thermo_data = np.loadtxt(thermo_file)
sorted_ids = thermo_data[:,0].argsort()
thermo_data = thermo_data[sorted_ids]
config_ids = (thermo_data[:, 0] - 10001).astype('int')
entropies = thermo_data[:, 3]
y.append(entropies)
missing_entropy_ids = [i for i in np.arange(1, config_ids[-1]+1) if i not in config_ids]
print('missing_entropy_ids {}'.format(missing_entropy_ids))
descriptors = np.load(args.desc_files[i_file])
descriptors = descriptors.reshape((descriptors.shape[0], -1))[config_ids] - bulk_descriptor
X.append(descriptors)
X = np.concatenate(X)
y = np.concatenate(y)
print('X shape {}, y shape {} y min {} y max {} '.format(X.shape,y.shape, y.min(), y.max()))
clf = linear_model.BayesianRidge(compute_score=True, fit_intercept=False)
clf.fit(X, y)
y_predict_bayes = clf.predict(X)
train_rmse = np.sqrt(mean_squared_error(y, y_predict_bayes))
train_mae = mean_absolute_error(y, y_predict_bayes)
print(f'BayesianRidge train mae {train_mae:.4f} kB, train rmse {train_rmse:.4f}kB')
n_crossval_folds = 5
y_prediction = model_selection.cross_val_predict(clf, X=X, y=y, cv=n_crossval_folds)
cv_mae = mean_absolute_error(y_prediction, y)
cv_rmse = np.sqrt(mean_squared_error(y_prediction, y))
print(f'BayesianRidge {n_crossval_folds} folds cross val mae {cv_mae:.4f} kB, rmse {cv_rmse:.4f} kB')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment