Last active
May 26, 2022 09:33
-
-
Save jthorton/ae7f3945dc35ecb91d3119a167b557f3 to your computer and use it in GitHub Desktop.
Plot the results of a ForceBalance optimize.out file, supports density and hvap.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: plot_opt_out | |
channels: | |
- simonboothroyd | |
- conda-forge | |
dependencies: | |
- black=22.3.0=pyhd8ed1ab_0 | |
- bokeh=2.4.2=py39h6e9494a_0 | |
- boost=1.74.0=py39ha1f3e3e_4 | |
- boost-cpp=1.74.0=hf3dc895_5 | |
- brotli=1.0.9=h0d85af4_6 | |
- brotli-bin=1.0.9=h0d85af4_6 | |
- bzip2=1.0.8=h0d85af4_4 | |
- ca-certificates=2022.5.18.1=h033912b_0 | |
- cairo=1.16.0=he01c77b_1009 | |
- certifi=2022.5.18.1=py39h6e9494a_0 | |
- click=8.0.3=py39h6e9494a_1 | |
- cycler=0.11.0=pyhd8ed1ab_0 | |
- dataclasses=0.8=pyhc8e2a94_3 | |
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0 | |
- font-ttf-inconsolata=3.000=h77eed37_0 | |
- font-ttf-source-code-pro=2.038=h77eed37_0 | |
- font-ttf-ubuntu=0.83=hab24e00_0 | |
- fontconfig=2.13.1=h10f422b_1005 | |
- fonts-conda-ecosystem=1=0 | |
- fonts-conda-forge=1=0 | |
- fonttools=4.28.5=py39h89e85a6_0 | |
- freetype=2.10.4=h4cff582_1 | |
- gettext=0.19.8.1=hd1a6beb_1008 | |
- greenlet=1.1.2=py39h9fcab8e_1 | |
- icu=69.1=he49afe7_0 | |
- isort=5.10.1=pyhd8ed1ab_0 | |
- jbig=2.1=h0d85af4_2003 | |
- jinja2=3.0.3=pyhd8ed1ab_0 | |
- jpeg=9d=hbcb3906_0 | |
- kiwisolver=1.3.2=py39hf018cea_1 | |
- lcms2=2.12=h577c468_0 | |
- lerc=3.0=he49afe7_0 | |
- libblas=3.9.0=12_osx64_openblas | |
- libbrotlicommon=1.0.9=h0d85af4_6 | |
- libbrotlidec=1.0.9=h0d85af4_6 | |
- libbrotlienc=1.0.9=h0d85af4_6 | |
- libcblas=3.9.0=12_osx64_openblas | |
- libcxx=12.0.1=habf9029_1 | |
- libdeflate=1.8=h0d85af4_0 | |
- libffi=3.4.2=h0d85af4_5 | |
- libgfortran=5.0.0=9_3_0_h6c81a4c_23 | |
- libgfortran5=9.3.0=h6c81a4c_23 | |
- libglib=2.70.2=hf1fb8c0_1 | |
- libiconv=1.16=haf1e3a3_0 | |
- liblapack=3.9.0=12_osx64_openblas | |
- libopenblas=0.3.18=openmp_h3351f45_0 | |
- libpng=1.6.37=h7cec526_2 | |
- libtiff=4.3.0=hd146c10_2 | |
- libwebp-base=1.2.1=h0d85af4_0 | |
- libxml2=2.9.12=h7e28ab6_1 | |
- libzlib=1.2.11=h9173be1_1013 | |
- llvm-openmp=12.0.1=hda6cdc1_1 | |
- lz4-c=1.9.3=he49afe7_1 | |
- markupsafe=2.0.1=py39h89e85a6_1 | |
- matplotlib=3.5.1=py39h6e9494a_0 | |
- matplotlib-base=3.5.1=py39hb07454d_0 | |
- munkres=1.1.4=pyh9f0ad1d_0 | |
- mypy_extensions=0.4.3=py39h6e9494a_5 | |
- ncurses=6.2=h2e338ed_4 | |
- numpy=1.22.0=py39h9d9ce41_0 | |
- olefile=0.46=pyh9f0ad1d_1 | |
- openjpeg=2.4.0=h6e7aa92_1 | |
- openssl=3.0.3=hfe4f2af_0 | |
- packaging=21.3=pyhd8ed1ab_0 | |
- pandas=1.3.5=py39h4d6be9b_0 | |
- pathspec=0.9.0=pyhd8ed1ab_0 | |
- pcre=8.45=he49afe7_0 | |
- pillow=8.4.0=py39he9bb72f_0 | |
- pip=21.3.1=pyhd8ed1ab_0 | |
- pixman=0.40.0=hbcb3906_0 | |
- platformdirs=2.5.1=pyhd8ed1ab_0 | |
- plotmol=0.0.3=pyhcce984b_0 | |
- pycairo=1.20.1=py39hbe14034_1 | |
- pydantic=1.9.0=py39h89e85a6_0 | |
- pyparsing=3.0.6=pyhd8ed1ab_0 | |
- python=3.9.9=h38b4d05_0_cpython | |
- python-dateutil=2.8.2=pyhd8ed1ab_0 | |
- python_abi=3.9=2_cp39 | |
- pytz=2021.3=pyhd8ed1ab_0 | |
- pyyaml=6.0=py39h89e85a6_3 | |
- rdkit=2021.09.4=py39h88273a1_0 | |
- readline=8.1=h05e3726_0 | |
- reportlab=3.5.68=py39hf37cc50_1 | |
- setuptools=60.5.0=py39h6e9494a_0 | |
- six=1.16.0=pyh6c4a22f_0 | |
- sqlalchemy=1.4.29=py39h89e85a6_0 | |
- sqlite=3.37.0=h23a322b_0 | |
- tk=8.6.11=h5dbffcc_1 | |
- tomli=2.0.1=pyhd8ed1ab_0 | |
- tornado=6.1=py39h89e85a6_2 | |
- typed-ast=1.5.4=py39h701faf5_0 | |
- typing-extensions=4.0.1=hd8ed1ab_0 | |
- typing_extensions=4.0.1=pyha770c72_0 | |
- tzdata=2021e=he74cb21_0 | |
- wheel=0.37.1=pyhd8ed1ab_0 | |
- xz=5.2.5=haf1e3a3_1 | |
- yaml=0.2.5=h0d85af4_2 | |
- zlib=1.2.11=h9173be1_1013 | |
- zstd=1.5.1=h582d3a0_0 | |
prefix: /Users/joshua/mambaforge/envs/plot_opt_out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# script to plot the results of a density and hvap qubekit fitting in force balance | |
import os | |
import re | |
from collections import defaultdict | |
from typing import Dict, List, Tuple | |
import click | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from bokeh.palettes import Spectral4 | |
from bokeh.plotting import Figure, save | |
from plotmol.plotmol import scatter | |
from pydantic import BaseModel, Field | |
custom_tooltip_template = """ | |
<div> | |
<div> | |
<span>@title</span> | |
<span><br>reference:@ref<br>calculated:@prediction</span> | |
<img src="@image" ></img> | |
</div> | |
</div> | |
""" | |
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") | |
class Prediction(BaseModel): | |
""" | |
A class to hold the individual predictions of a property. | |
""" | |
prediction: float = Field(..., description="The value for this prediction.") | |
std: float = Field(..., description="The std for this prediction.") | |
class Property(BaseModel): | |
""" | |
A general class to hold predictions. | |
""" | |
id: str = Field(..., description="The id for the property prediction.") | |
smiles: str = Field( | |
..., description="The smiles for the target molecule used in ploting." | |
) | |
property_name: str = Field(..., description="The property to be predicted.") | |
reference: float = Field( | |
..., description="The reference value we are trying to describe." | |
) | |
predictions: List[Prediction] = Field( | |
[], description="The list of predictions for this property." | |
) | |
def parse_block(lines: List[str]) -> Tuple[str, Dict[str, List[float]]]: | |
""" | |
Parse the block of lines into a dictionary of results. | |
Args: | |
lines: The list of lines which should be parsed. | |
Returns: | |
A dictionary of the properties extracted from the results. | |
""" | |
data = dict() | |
prop_name = lines[0].strip() | |
# ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') | |
prop_name = ansi_escape.sub("", prop_name) | |
prop_name = prop_name.split()[1:-1] | |
prop_name = " ".join(prop_name) | |
for line in lines[3:]: | |
ls = line.split() | |
# data["Temperature K"].append(float(ls[0])) | |
# data["Pressure"].append(float(ls[1])) | |
# data["Pressure Unit"].append(ls[2]) | |
data["reference"] = float(ls[3]) | |
data["prediction"] = float(ls[4]) | |
data["std"] = float(ls[6]) | |
return prop_name, data | |
def parse_targets(optimise_lines) -> List[str]: | |
""" | |
Parse the names of the targets for the FB fitting, we can use this to keep track of how many results we should have per iteration. | |
""" | |
targets = [] | |
for line in optimise_lines: | |
if "Setup for target " in line: | |
target = line.split()[5] | |
target = target.split("_")[0] | |
targets.append(target) | |
return targets | |
def parse_smiles(smiles_file: str) -> Dict[str, str]: | |
""" | |
Parse a smiles file for molecules and labels these must match the labels in the optimise out file. | |
""" | |
smiles_and_labels = dict() | |
with open(smiles_file) as smiles: | |
for line in smiles.readlines(): | |
data = line.split() | |
if data: | |
smiles_and_labels[data[1]] = data[0] | |
return smiles_and_labels | |
def plot_property( | |
property_name: str, iteration: int, plot_name: str, data: Dict[str, Property] | |
): | |
""" | |
Plot the requested property. | |
""" | |
figure = Figure( | |
tooltips=custom_tooltip_template, | |
title=property_name, | |
x_axis_label="Calculated", | |
y_axis_label="Reference", | |
plot_width=500, | |
plot_height=500, | |
) | |
calculated = [] | |
reference = [] | |
smiles = [] | |
titles = [] | |
for target_name, predictions in data.items(): | |
titles.append(target_name) | |
reference.append(predictions[property_name].reference) | |
smiles.append(predictions[property_name].smiles) | |
calculated.append(predictions[property_name].predictions[iteration].prediction) | |
scatter( | |
figure=figure, | |
x=calculated, | |
y=reference, | |
smiles=smiles, | |
legend_label="x", | |
marker_size=15, | |
custom_column_data={ | |
"title": titles, | |
"prediction": [str(c) for c in calculated], | |
}, | |
) | |
figure.legend.location = "top_left" | |
figure.legend.click_policy = "hide" | |
figure.line([min(calculated), max(calculated)], [min(calculated), max(calculated)]) | |
save(figure, title=plot_name, filename=plot_name + ".html") | |
def plot_initial_vs_final_property( | |
property_name: str, plot_name: str, data: Dict[str, Property] | |
): | |
""" | |
Plot the intial and final values on the same plot. | |
""" | |
palette = Spectral4 | |
figure = Figure( | |
tooltips=custom_tooltip_template, | |
title=property_name, | |
x_axis_label="Calculated", | |
y_axis_label="Reference", | |
plot_width=500, | |
plot_height=500, | |
) | |
plot_labels = [(0, "Initial force field"), (-1, "Optimised force field")] | |
for i, (iteration, label) in enumerate(plot_labels): | |
calculated = [] | |
reference = [] | |
smiles = [] | |
titles = [] | |
for target_name, predictions in data.items(): | |
titles.append(target_name) | |
reference.append(predictions[property_name].reference) | |
smiles.append(predictions[property_name].smiles) | |
calculated.append( | |
predictions[property_name].predictions[iteration].prediction | |
) | |
scatter( | |
figure=figure, | |
x=calculated, | |
y=reference, | |
smiles=smiles, | |
legend_label=label, | |
marker_size=15, | |
marker_color=palette[i], | |
custom_column_data={ | |
"title": titles, | |
"prediction": calculated, | |
"ref": reference, | |
}, | |
) | |
figure.legend.location = "top_left" | |
figure.legend.click_policy = "hide" | |
figure.line( | |
[min(calculated), max(calculated)], | |
[min(calculated), max(calculated)], | |
line_dash="dashed", | |
line_color="black", | |
legend_label="x=y", | |
) | |
save(figure, title=plot_name, filename=plot_name + ".html") | |
def plot_objective(objective_func: List[float], plot_name: str): | |
""" | |
simple plot of the objective function. | |
""" | |
iterations = [i for i in range(len(objective_func))] | |
plt.plot(iterations, objective_func, marker="o") | |
plt.xlabel("Iteration") | |
plt.ylabel("Objective function") | |
plt.savefig(plot_name) | |
plt.close() | |
def plot_rfree(rfree_dict: Dict[str, List[float]], plot_name: str): | |
""" | |
Make a simple plot to show the progression of the Rfree values during the optimisation. | |
""" | |
element_to_colour = { | |
"CElement/cfree": "black", | |
"NElement/nfree": "royalblue", | |
"OElement/ofree": "tomato", | |
"HElement/hfree": "silver", | |
"XElement/xfree": "darkorange", | |
"xalpha/alpha": "brown", | |
"xbeta/beta": "olivedrab", | |
"FElement/ffree": "deepskyblue", | |
"SElement/sfree": "gold", | |
"BrElement/brfree": "firebrick", | |
"ClElement/clfree": "lime", | |
"IElement/ifree": "darkviolet", | |
"BElement/bfree": "hotpink", | |
"SiElement/sifree": "lightgray", | |
} | |
iterations = [i for i in range(len(list(rfree_dict.values())[0]))] | |
for param_name, values in rfree_dict.items(): | |
plt.plot( | |
iterations, | |
values, | |
marker="o", | |
color=element_to_colour[param_name], | |
label=param_name, | |
) | |
plt.xlabel("Iteration") | |
plt.ylabel("Rfree A") | |
plt.legend() | |
plt.savefig(plot_name) | |
plt.close() | |
def parse_rfree(rfree_lines: List[str]) -> Dict[str, float]: | |
""" | |
Parse the rfree parameters from a forcebalance parameter block and return the values. | |
""" | |
parameters = {} | |
for line in rfree_lines: | |
if "----------------------------------------------------------" in line: | |
return parameters | |
else: | |
parameter = float(line.split()[2]) | |
p_name = line.split()[-1] | |
parameters[p_name] = parameter | |
def MUE(experiment_values: List[float], prediction_values: List[float]) -> float: | |
"""Calculate the MUE for a set of experimental values and corresponding predictions.""" | |
return np.mean(np.absolute(experiment_values - prediction_values)) | |
def compute_bootstraped_statistics( | |
properties: List[Property], | |
optimisation_iteration: int, | |
bootstrap_iterations: int = 1000, | |
interval: float = 0.95, | |
) -> Dict[str, float]: | |
""" | |
Compute bootstraped statistics for the optimisation. | |
""" | |
# make a list of experimental and predicted values | |
experimental_values = np.array([property.reference for property in properties]) | |
prediction_values = np.array( | |
[ | |
property.predictions[optimisation_iteration].prediction | |
for property in properties | |
] | |
) | |
mue = MUE( | |
experiment_values=experimental_values, prediction_values=prediction_values | |
) | |
samples = len(experimental_values) | |
sample_statistics = [] | |
for _ in range(bootstrap_iterations): | |
sample_indicies = np.random.randint(low=0, high=samples, size=samples) | |
sample_experiment_values = experimental_values[sample_indicies] | |
sample_prediction_values = prediction_values[sample_indicies] | |
sample_statistics.append( | |
MUE( | |
experiment_values=sample_experiment_values, | |
prediction_values=sample_prediction_values, | |
) | |
) | |
# Compute the confidence intervals. | |
lower_percentile_index = int(bootstrap_iterations * (1 - interval) / 2) | |
upper_percentile_index = int(bootstrap_iterations * (1 + interval) / 2) | |
sorted_samples = np.sort(sample_statistics) | |
return { | |
"lower": sorted_samples[lower_percentile_index], | |
"higher": sorted_samples[upper_percentile_index], | |
"mue": mue, | |
} | |
@click.command() | |
@click.argument("optimise_file", type=click.Path(exists=True)) | |
@click.argument("model_name", type=click.STRING) | |
@click.argument( | |
"smiles_file", type=click.Path(exists=True, file_okay=True, dir_okay=False) | |
) | |
def parse_optimise_out( | |
optimise_file: str, model_name: str, smiles_file: str | |
) -> Dict[str, Dict[str, List[float]]]: | |
""" | |
Parse the optimise outfile for all relavent data to the optimisation. | |
Returns: | |
A dict of results by molecule target name. Also includes the values of the optimised parameters per iteration under parameters. | |
""" | |
available_properties = ["Density (kg m^-3)", "Enthalpy of Vaporization (kJ mol^-1)"] | |
all_results = {} | |
molecules_and_labels = parse_smiles(smiles_file=smiles_file) | |
# the place to write the data to | |
os.makedirs(model_name, exist_ok=True) | |
with open(optimise_file) as f: | |
lines = f.readlines() | |
targets = parse_targets(optimise_lines=lines) | |
all_results = dict((target, dict()) for target in targets) | |
objective_func = [] | |
rfree_opt_params = defaultdict(list) | |
for name in molecules_and_labels.keys(): | |
assert name in all_results | |
start_line = 0 | |
reading = False | |
iteration = -1 | |
for i, line in enumerate(lines): | |
# sometimes the object rises and so it is re-evaluated at the last point. | |
if "Iteration" in line or "Objective function rises!" in line: | |
iteration += 1 | |
# reset | |
print("reading data for iteration", line.split()[3]) | |
# os.makedirs(f"iteration_{iteration}", exist_ok=True) | |
for prop in available_properties: | |
if prop in line: | |
start_line = i | |
reading = True | |
if ( | |
"--------------------------------------------------------------------------------------------" | |
in line | |
and reading | |
): | |
property_name, data = parse_block(lines[start_line:i]) | |
# handle the output and save | |
target_string = property_name.split() | |
target_name = target_string[0] | |
target_name = target_name.split("_")[0] | |
property_string = " ".join(target_string[1:]) | |
if property_string not in all_results[target_name]: | |
property_data = Property( | |
id=target_name, | |
smiles=molecules_and_labels[target_name], | |
property_name=property_string, | |
reference=data["reference"], | |
predictions=[ | |
Prediction(prediction=data["prediction"], std=data["std"]) | |
], | |
) | |
all_results[target_name][property_string] = property_data | |
else: | |
all_results[target_name][property_string].predictions.append( | |
Prediction(prediction=data["prediction"], std=data["std"]) | |
) | |
elif ( | |
"Step |k| |dk| |grad| -=X2=- Delta(X2) StepQual" | |
in line | |
): | |
objective = ansi_escape.sub("", lines[i + 1]) | |
objective = float(objective.split()[4]) | |
objective_func.append(objective) | |
elif "Physical Parameters (Current + Step = Next)" in line: | |
rfree_data = parse_rfree(lines[i + 2 :]) | |
for key, value in rfree_data.items(): | |
rfree_opt_params[key].append(value) | |
# now we have all the data plot the initial values | |
plot_initial_vs_final_property( | |
property_name="Density (kg m^-3)", | |
plot_name=os.path.join(model_name, "density"), | |
data=all_results, | |
) | |
plot_initial_vs_final_property( | |
property_name="Enthalpy of Vaporization (kJ mol^-1)", | |
plot_name=os.path.join(model_name, "hvap"), | |
data=all_results, | |
) | |
plot_objective( | |
objective_func, os.path.join(model_name, "objective_function.pdf") | |
) | |
plot_rfree(rfree_opt_params, os.path.join(model_name, "rfree.pdf")) | |
density_predictions = [ | |
value["Density (kg m^-3)"] for value in all_results.values() | |
] | |
hvap_predictions = [ | |
value["Enthalpy of Vaporization (kJ mol^-1)"] | |
for value in all_results.values() | |
] | |
density_data_initial = compute_bootstraped_statistics( | |
properties=density_predictions, optimisation_iteration=0 | |
) | |
density_data_final = compute_bootstraped_statistics( | |
properties=density_predictions, optimisation_iteration=-1 | |
) | |
hvap_data_initial = compute_bootstraped_statistics( | |
properties=hvap_predictions, optimisation_iteration=0 | |
) | |
hvap_data_final = compute_bootstraped_statistics( | |
properties=hvap_predictions, optimisation_iteration=-1 | |
) | |
with open(os.path.join(model_name, "results.txt"), "w") as output: | |
output.write("Results\n") | |
output.write( | |
f"Initial Density MUE: {density_data_initial['mue']} (kg m^-3) upper: {density_data_initial['higher']} lower: {density_data_initial['lower']}\n" | |
) | |
output.write( | |
f"Final Density MUE: {density_data_final['mue']} (kg m^-3) upper: {density_data_final['higher']} lower: {density_data_final['lower']}\n" | |
) | |
output.write( | |
f"Initial HVAP MUE: {hvap_data_initial['mue']} (kJ mol^-1) upper: {hvap_data_initial['higher']} lower: {hvap_data_initial['lower']}\n" | |
) | |
output.write( | |
f"Final HVAP MUE: {hvap_data_final['mue']} (kJ mol^-1) upper: {hvap_data_final['higher']} lower: {hvap_data_final['lower']}\n" | |
) | |
if __name__ == "__main__": | |
parse_optimise_out() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment