Skip to content

Instantly share code, notes, and snippets.

@jthorton
Last active May 26, 2022 09:33
Show Gist options
  • Save jthorton/ae7f3945dc35ecb91d3119a167b557f3 to your computer and use it in GitHub Desktop.
Save jthorton/ae7f3945dc35ecb91d3119a167b557f3 to your computer and use it in GitHub Desktop.
Plot the results of a ForceBalance optimize.out file, supports density and hvap.
name: plot_opt_out
channels:
- simonboothroyd
- conda-forge
dependencies:
- black=22.3.0=pyhd8ed1ab_0
- bokeh=2.4.2=py39h6e9494a_0
- boost=1.74.0=py39ha1f3e3e_4
- boost-cpp=1.74.0=hf3dc895_5
- brotli=1.0.9=h0d85af4_6
- brotli-bin=1.0.9=h0d85af4_6
- bzip2=1.0.8=h0d85af4_4
- ca-certificates=2022.5.18.1=h033912b_0
- cairo=1.16.0=he01c77b_1009
- certifi=2022.5.18.1=py39h6e9494a_0
- click=8.0.3=py39h6e9494a_1
- cycler=0.11.0=pyhd8ed1ab_0
- dataclasses=0.8=pyhc8e2a94_3
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
- font-ttf-inconsolata=3.000=h77eed37_0
- font-ttf-source-code-pro=2.038=h77eed37_0
- font-ttf-ubuntu=0.83=hab24e00_0
- fontconfig=2.13.1=h10f422b_1005
- fonts-conda-ecosystem=1=0
- fonts-conda-forge=1=0
- fonttools=4.28.5=py39h89e85a6_0
- freetype=2.10.4=h4cff582_1
- gettext=0.19.8.1=hd1a6beb_1008
- greenlet=1.1.2=py39h9fcab8e_1
- icu=69.1=he49afe7_0
- isort=5.10.1=pyhd8ed1ab_0
- jbig=2.1=h0d85af4_2003
- jinja2=3.0.3=pyhd8ed1ab_0
- jpeg=9d=hbcb3906_0
- kiwisolver=1.3.2=py39hf018cea_1
- lcms2=2.12=h577c468_0
- lerc=3.0=he49afe7_0
- libblas=3.9.0=12_osx64_openblas
- libbrotlicommon=1.0.9=h0d85af4_6
- libbrotlidec=1.0.9=h0d85af4_6
- libbrotlienc=1.0.9=h0d85af4_6
- libcblas=3.9.0=12_osx64_openblas
- libcxx=12.0.1=habf9029_1
- libdeflate=1.8=h0d85af4_0
- libffi=3.4.2=h0d85af4_5
- libgfortran=5.0.0=9_3_0_h6c81a4c_23
- libgfortran5=9.3.0=h6c81a4c_23
- libglib=2.70.2=hf1fb8c0_1
- libiconv=1.16=haf1e3a3_0
- liblapack=3.9.0=12_osx64_openblas
- libopenblas=0.3.18=openmp_h3351f45_0
- libpng=1.6.37=h7cec526_2
- libtiff=4.3.0=hd146c10_2
- libwebp-base=1.2.1=h0d85af4_0
- libxml2=2.9.12=h7e28ab6_1
- libzlib=1.2.11=h9173be1_1013
- llvm-openmp=12.0.1=hda6cdc1_1
- lz4-c=1.9.3=he49afe7_1
- markupsafe=2.0.1=py39h89e85a6_1
- matplotlib=3.5.1=py39h6e9494a_0
- matplotlib-base=3.5.1=py39hb07454d_0
- munkres=1.1.4=pyh9f0ad1d_0
- mypy_extensions=0.4.3=py39h6e9494a_5
- ncurses=6.2=h2e338ed_4
- numpy=1.22.0=py39h9d9ce41_0
- olefile=0.46=pyh9f0ad1d_1
- openjpeg=2.4.0=h6e7aa92_1
- openssl=3.0.3=hfe4f2af_0
- packaging=21.3=pyhd8ed1ab_0
- pandas=1.3.5=py39h4d6be9b_0
- pathspec=0.9.0=pyhd8ed1ab_0
- pcre=8.45=he49afe7_0
- pillow=8.4.0=py39he9bb72f_0
- pip=21.3.1=pyhd8ed1ab_0
- pixman=0.40.0=hbcb3906_0
- platformdirs=2.5.1=pyhd8ed1ab_0
- plotmol=0.0.3=pyhcce984b_0
- pycairo=1.20.1=py39hbe14034_1
- pydantic=1.9.0=py39h89e85a6_0
- pyparsing=3.0.6=pyhd8ed1ab_0
- python=3.9.9=h38b4d05_0_cpython
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python_abi=3.9=2_cp39
- pytz=2021.3=pyhd8ed1ab_0
- pyyaml=6.0=py39h89e85a6_3
- rdkit=2021.09.4=py39h88273a1_0
- readline=8.1=h05e3726_0
- reportlab=3.5.68=py39hf37cc50_1
- setuptools=60.5.0=py39h6e9494a_0
- six=1.16.0=pyh6c4a22f_0
- sqlalchemy=1.4.29=py39h89e85a6_0
- sqlite=3.37.0=h23a322b_0
- tk=8.6.11=h5dbffcc_1
- tomli=2.0.1=pyhd8ed1ab_0
- tornado=6.1=py39h89e85a6_2
- typed-ast=1.5.4=py39h701faf5_0
- typing-extensions=4.0.1=hd8ed1ab_0
- typing_extensions=4.0.1=pyha770c72_0
- tzdata=2021e=he74cb21_0
- wheel=0.37.1=pyhd8ed1ab_0
- xz=5.2.5=haf1e3a3_1
- yaml=0.2.5=h0d85af4_2
- zlib=1.2.11=h9173be1_1013
- zstd=1.5.1=h582d3a0_0
prefix: /Users/joshua/mambaforge/envs/plot_opt_out
# script to plot the results of a density and hvap qubekit fitting in force balance
import os
import re
from collections import defaultdict
from typing import Dict, List, Tuple
import click
import matplotlib.pyplot as plt
import numpy as np
from bokeh.palettes import Spectral4
from bokeh.plotting import Figure, save
from plotmol.plotmol import scatter
from pydantic import BaseModel, Field
custom_tooltip_template = """
<div>
<div>
<span>@title</span>
<span><br>reference:@ref<br>calculated:@prediction</span>
<img src="@image" ></img>
</div>
</div>
"""
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
class Prediction(BaseModel):
"""
A class to hold the individual predictions of a property.
"""
prediction: float = Field(..., description="The value for this prediction.")
std: float = Field(..., description="The std for this prediction.")
class Property(BaseModel):
"""
A general class to hold predictions.
"""
id: str = Field(..., description="The id for the property prediction.")
smiles: str = Field(
..., description="The smiles for the target molecule used in ploting."
)
property_name: str = Field(..., description="The property to be predicted.")
reference: float = Field(
..., description="The reference value we are trying to describe."
)
predictions: List[Prediction] = Field(
[], description="The list of predictions for this property."
)
def parse_block(lines: List[str]) -> Tuple[str, Dict[str, List[float]]]:
"""
Parse the block of lines into a dictionary of results.
Args:
lines: The list of lines which should be parsed.
Returns:
A dictionary of the properties extracted from the results.
"""
data = dict()
prop_name = lines[0].strip()
# ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
prop_name = ansi_escape.sub("", prop_name)
prop_name = prop_name.split()[1:-1]
prop_name = " ".join(prop_name)
for line in lines[3:]:
ls = line.split()
# data["Temperature K"].append(float(ls[0]))
# data["Pressure"].append(float(ls[1]))
# data["Pressure Unit"].append(ls[2])
data["reference"] = float(ls[3])
data["prediction"] = float(ls[4])
data["std"] = float(ls[6])
return prop_name, data
def parse_targets(optimise_lines) -> List[str]:
"""
Parse the names of the targets for the FB fitting, we can use this to keep track of how many results we should have per iteration.
"""
targets = []
for line in optimise_lines:
if "Setup for target " in line:
target = line.split()[5]
target = target.split("_")[0]
targets.append(target)
return targets
def parse_smiles(smiles_file: str) -> Dict[str, str]:
"""
Parse a smiles file for molecules and labels these must match the labels in the optimise out file.
"""
smiles_and_labels = dict()
with open(smiles_file) as smiles:
for line in smiles.readlines():
data = line.split()
if data:
smiles_and_labels[data[1]] = data[0]
return smiles_and_labels
def plot_property(
property_name: str, iteration: int, plot_name: str, data: Dict[str, Property]
):
"""
Plot the requested property.
"""
figure = Figure(
tooltips=custom_tooltip_template,
title=property_name,
x_axis_label="Calculated",
y_axis_label="Reference",
plot_width=500,
plot_height=500,
)
calculated = []
reference = []
smiles = []
titles = []
for target_name, predictions in data.items():
titles.append(target_name)
reference.append(predictions[property_name].reference)
smiles.append(predictions[property_name].smiles)
calculated.append(predictions[property_name].predictions[iteration].prediction)
scatter(
figure=figure,
x=calculated,
y=reference,
smiles=smiles,
legend_label="x",
marker_size=15,
custom_column_data={
"title": titles,
"prediction": [str(c) for c in calculated],
},
)
figure.legend.location = "top_left"
figure.legend.click_policy = "hide"
figure.line([min(calculated), max(calculated)], [min(calculated), max(calculated)])
save(figure, title=plot_name, filename=plot_name + ".html")
def plot_initial_vs_final_property(
property_name: str, plot_name: str, data: Dict[str, Property]
):
"""
Plot the intial and final values on the same plot.
"""
palette = Spectral4
figure = Figure(
tooltips=custom_tooltip_template,
title=property_name,
x_axis_label="Calculated",
y_axis_label="Reference",
plot_width=500,
plot_height=500,
)
plot_labels = [(0, "Initial force field"), (-1, "Optimised force field")]
for i, (iteration, label) in enumerate(plot_labels):
calculated = []
reference = []
smiles = []
titles = []
for target_name, predictions in data.items():
titles.append(target_name)
reference.append(predictions[property_name].reference)
smiles.append(predictions[property_name].smiles)
calculated.append(
predictions[property_name].predictions[iteration].prediction
)
scatter(
figure=figure,
x=calculated,
y=reference,
smiles=smiles,
legend_label=label,
marker_size=15,
marker_color=palette[i],
custom_column_data={
"title": titles,
"prediction": calculated,
"ref": reference,
},
)
figure.legend.location = "top_left"
figure.legend.click_policy = "hide"
figure.line(
[min(calculated), max(calculated)],
[min(calculated), max(calculated)],
line_dash="dashed",
line_color="black",
legend_label="x=y",
)
save(figure, title=plot_name, filename=plot_name + ".html")
def plot_objective(objective_func: List[float], plot_name: str):
"""
simple plot of the objective function.
"""
iterations = [i for i in range(len(objective_func))]
plt.plot(iterations, objective_func, marker="o")
plt.xlabel("Iteration")
plt.ylabel("Objective function")
plt.savefig(plot_name)
plt.close()
def plot_rfree(rfree_dict: Dict[str, List[float]], plot_name: str):
"""
Make a simple plot to show the progression of the Rfree values during the optimisation.
"""
element_to_colour = {
"CElement/cfree": "black",
"NElement/nfree": "royalblue",
"OElement/ofree": "tomato",
"HElement/hfree": "silver",
"XElement/xfree": "darkorange",
"xalpha/alpha": "brown",
"xbeta/beta": "olivedrab",
"FElement/ffree": "deepskyblue",
"SElement/sfree": "gold",
"BrElement/brfree": "firebrick",
"ClElement/clfree": "lime",
"IElement/ifree": "darkviolet",
"BElement/bfree": "hotpink",
"SiElement/sifree": "lightgray",
}
iterations = [i for i in range(len(list(rfree_dict.values())[0]))]
for param_name, values in rfree_dict.items():
plt.plot(
iterations,
values,
marker="o",
color=element_to_colour[param_name],
label=param_name,
)
plt.xlabel("Iteration")
plt.ylabel("Rfree A")
plt.legend()
plt.savefig(plot_name)
plt.close()
def parse_rfree(rfree_lines: List[str]) -> Dict[str, float]:
"""
Parse the rfree parameters from a forcebalance parameter block and return the values.
"""
parameters = {}
for line in rfree_lines:
if "----------------------------------------------------------" in line:
return parameters
else:
parameter = float(line.split()[2])
p_name = line.split()[-1]
parameters[p_name] = parameter
def MUE(experiment_values: List[float], prediction_values: List[float]) -> float:
"""Calculate the MUE for a set of experimental values and corresponding predictions."""
return np.mean(np.absolute(experiment_values - prediction_values))
def compute_bootstraped_statistics(
properties: List[Property],
optimisation_iteration: int,
bootstrap_iterations: int = 1000,
interval: float = 0.95,
) -> Dict[str, float]:
"""
Compute bootstraped statistics for the optimisation.
"""
# make a list of experimental and predicted values
experimental_values = np.array([property.reference for property in properties])
prediction_values = np.array(
[
property.predictions[optimisation_iteration].prediction
for property in properties
]
)
mue = MUE(
experiment_values=experimental_values, prediction_values=prediction_values
)
samples = len(experimental_values)
sample_statistics = []
for _ in range(bootstrap_iterations):
sample_indicies = np.random.randint(low=0, high=samples, size=samples)
sample_experiment_values = experimental_values[sample_indicies]
sample_prediction_values = prediction_values[sample_indicies]
sample_statistics.append(
MUE(
experiment_values=sample_experiment_values,
prediction_values=sample_prediction_values,
)
)
# Compute the confidence intervals.
lower_percentile_index = int(bootstrap_iterations * (1 - interval) / 2)
upper_percentile_index = int(bootstrap_iterations * (1 + interval) / 2)
sorted_samples = np.sort(sample_statistics)
return {
"lower": sorted_samples[lower_percentile_index],
"higher": sorted_samples[upper_percentile_index],
"mue": mue,
}
@click.command()
@click.argument("optimise_file", type=click.Path(exists=True))
@click.argument("model_name", type=click.STRING)
@click.argument(
"smiles_file", type=click.Path(exists=True, file_okay=True, dir_okay=False)
)
def parse_optimise_out(
optimise_file: str, model_name: str, smiles_file: str
) -> Dict[str, Dict[str, List[float]]]:
"""
Parse the optimise outfile for all relavent data to the optimisation.
Returns:
A dict of results by molecule target name. Also includes the values of the optimised parameters per iteration under parameters.
"""
available_properties = ["Density (kg m^-3)", "Enthalpy of Vaporization (kJ mol^-1)"]
all_results = {}
molecules_and_labels = parse_smiles(smiles_file=smiles_file)
# the place to write the data to
os.makedirs(model_name, exist_ok=True)
with open(optimise_file) as f:
lines = f.readlines()
targets = parse_targets(optimise_lines=lines)
all_results = dict((target, dict()) for target in targets)
objective_func = []
rfree_opt_params = defaultdict(list)
for name in molecules_and_labels.keys():
assert name in all_results
start_line = 0
reading = False
iteration = -1
for i, line in enumerate(lines):
# sometimes the object rises and so it is re-evaluated at the last point.
if "Iteration" in line or "Objective function rises!" in line:
iteration += 1
# reset
print("reading data for iteration", line.split()[3])
# os.makedirs(f"iteration_{iteration}", exist_ok=True)
for prop in available_properties:
if prop in line:
start_line = i
reading = True
if (
"--------------------------------------------------------------------------------------------"
in line
and reading
):
property_name, data = parse_block(lines[start_line:i])
# handle the output and save
target_string = property_name.split()
target_name = target_string[0]
target_name = target_name.split("_")[0]
property_string = " ".join(target_string[1:])
if property_string not in all_results[target_name]:
property_data = Property(
id=target_name,
smiles=molecules_and_labels[target_name],
property_name=property_string,
reference=data["reference"],
predictions=[
Prediction(prediction=data["prediction"], std=data["std"])
],
)
all_results[target_name][property_string] = property_data
else:
all_results[target_name][property_string].predictions.append(
Prediction(prediction=data["prediction"], std=data["std"])
)
elif (
"Step |k| |dk| |grad| -=X2=- Delta(X2) StepQual"
in line
):
objective = ansi_escape.sub("", lines[i + 1])
objective = float(objective.split()[4])
objective_func.append(objective)
elif "Physical Parameters (Current + Step = Next)" in line:
rfree_data = parse_rfree(lines[i + 2 :])
for key, value in rfree_data.items():
rfree_opt_params[key].append(value)
# now we have all the data plot the initial values
plot_initial_vs_final_property(
property_name="Density (kg m^-3)",
plot_name=os.path.join(model_name, "density"),
data=all_results,
)
plot_initial_vs_final_property(
property_name="Enthalpy of Vaporization (kJ mol^-1)",
plot_name=os.path.join(model_name, "hvap"),
data=all_results,
)
plot_objective(
objective_func, os.path.join(model_name, "objective_function.pdf")
)
plot_rfree(rfree_opt_params, os.path.join(model_name, "rfree.pdf"))
density_predictions = [
value["Density (kg m^-3)"] for value in all_results.values()
]
hvap_predictions = [
value["Enthalpy of Vaporization (kJ mol^-1)"]
for value in all_results.values()
]
density_data_initial = compute_bootstraped_statistics(
properties=density_predictions, optimisation_iteration=0
)
density_data_final = compute_bootstraped_statistics(
properties=density_predictions, optimisation_iteration=-1
)
hvap_data_initial = compute_bootstraped_statistics(
properties=hvap_predictions, optimisation_iteration=0
)
hvap_data_final = compute_bootstraped_statistics(
properties=hvap_predictions, optimisation_iteration=-1
)
with open(os.path.join(model_name, "results.txt"), "w") as output:
output.write("Results\n")
output.write(
f"Initial Density MUE: {density_data_initial['mue']} (kg m^-3) upper: {density_data_initial['higher']} lower: {density_data_initial['lower']}\n"
)
output.write(
f"Final Density MUE: {density_data_final['mue']} (kg m^-3) upper: {density_data_final['higher']} lower: {density_data_final['lower']}\n"
)
output.write(
f"Initial HVAP MUE: {hvap_data_initial['mue']} (kJ mol^-1) upper: {hvap_data_initial['higher']} lower: {hvap_data_initial['lower']}\n"
)
output.write(
f"Final HVAP MUE: {hvap_data_final['mue']} (kJ mol^-1) upper: {hvap_data_final['higher']} lower: {hvap_data_final['lower']}\n"
)
if __name__ == "__main__":
parse_optimise_out()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment