Skip to content

Instantly share code, notes, and snippets.

@jonas-eschle
Created March 31, 2021 23:03
Show Gist options
  • Save jonas-eschle/f6db265941f8444774dbed745503d019 to your computer and use it in GitHub Desktop.
Save jonas-eschle/f6db265941f8444774dbed745503d019 to your computer and use it in GitHub Desktop.
Demo of tensorwaves using zfit for inference
#!/usr/bin/env python
import expertsystem as es
import graphviz
import matplotlib.pyplot as plt
import pandas as pd
import zfit # import here to suppress TF warnings :)
from expertsystem.amplitude.dynamics.builder import (
create_relativistic_breit_wigner_with_ff,
)
from tensorwaves.data import generate_data, generate_phsp
from tensorwaves.data.transform import HelicityTransformer
from tensorwaves.estimator import UnbinnedNLL
from tensorwaves.model import LambdifiedFunction, SympyModel
from tensorwaves.optimizer.callbacks import CSVSummary
from tensorwaves.optimizer.minuit import Minuit2
result = es.generate_transitions(
initial_state=("J/psi(1S)", [-1, +1]),
final_state=["gamma", "pi0", "pi0"],
allowed_intermediate_particles=["f(0)"],
allowed_interaction_types=["strong", "EM"],
formalism_type="canonical-helicity",
)
dot = es.io.asdot(result, collapse_graphs=True)
graphviz.Source(dot)
model_builder = es.amplitude.get_builder(result)
for name in result.get_intermediate_particles().names:
model_builder.set_dynamics(name, create_relativistic_breit_wigner_with_ff)
model = model_builder.generate()
next(iter(model.components.values())).doit()
# ### Generate data sample
# :::{seealso}
#
# {doc}`usage/step2`
#
# :::
sympy_model = SympyModel(
expression=model.expression,
parameters=model.parameter_defaults,
)
intensity = LambdifiedFunction(sympy_model, backend="numpy")
data_converter = HelicityTransformer(model.adapter)
phsp_sample = generate_phsp(300_000, model.adapter.reaction_info)
data_sample = generate_data(
30_000, model.adapter.reaction_info, data_converter, intensity
)
import numpy as np
from matplotlib import cm
reaction_info = model.adapter.reaction_info
intermediate_states = sorted(
(
p
for p in model.particles
if p not in reaction_info.final_state.values()
and p not in reaction_info.initial_state.values()
),
key=lambda p: p.mass,
)
evenly_spaced_interval = np.linspace(0, 1, len(intermediate_states))
colors = [cm.rainbow(x) for x in evenly_spaced_interval]
def indicate_masses():
plt.xlabel("$m$ [GeV]")
for i, p in enumerate(intermediate_states):
plt.gca().axvline(
x=p.mass, linestyle="dotted", label=p.name, color=colors[i]
)
phsp_set = data_converter.transform(phsp_sample)
data_set = data_converter.transform(data_sample)
data_frame = pd.DataFrame(data_set.to_pandas())
data_frame["m_12"].hist(bins=100, alpha=0.5, density=True)
indicate_masses()
plt.legend();
# ### Optimize the model
# :::{seealso}
#
# {doc}`usage/step3`
#
# :::
import matplotlib.pyplot as plt
import numpy as np
def compare_model(
variable_name,
data_set,
phsp_set,
intensity_model,
bins=150,
):
data = data_set[variable_name]
phsp = phsp_set[variable_name]
intensities = intensity_model(phsp_set)
plt.hist(data, bins=bins, alpha=0.5, label="data", density=True)
plt.hist(
phsp,
weights=intensities,
bins=bins,
histtype="step",
color="red",
label="initial fit model",
density=True,
)
indicate_masses()
plt.legend()
estimator = UnbinnedNLL(
sympy_model,
data_set,
phsp_set,
backend="jax",
)
initial_parameters = {
"C[J/\\psi(1S) \\to f_{0}(1500)_{0} \\gamma_{+1};f_{0}(1500) \\to \\pi^{0}_{0} \\pi^{0}_{0}]": 1.0
+ 0.0j,
"Gamma_f(0)(500)": 0.3,
"Gamma_f(0)(980)": 0.1,
"m_f(0)(1710)": 1.75,
"Gamma_f(0)(1710)": 0.2,
}
intensity.update_parameters(initial_parameters)
compare_model("m_12", data_set, phsp_set, intensity)
print("Number of free parameters:", len(initial_parameters))
import tensorflow.experimental.numpy as znp
import zfit # also at top to suppress tf warnings
from zfit import z
zfit.run.set_graph_mode(False)
zfit.run.set_autograd_mode(False)
class TensorWavePDF(zfit.pdf.BasePDF):
def __init__(self, intensity, norm, obs, params=None, name="tensorwaves"):
"""tensorwave intensity normalized over the *norm* dataset."""
super().__init__(obs, params, name)
self.intensity = intensity
norm = {ob: znp.array(ar) for ob, ar in zip(self.obs, z.unstack_x(norm))}
self.norm_sample = norm
def _pdf(self, x, norm_range): # we can also use better mechanics, where it automatically normalizes or not
# this here is rather to take full control, it is always possible
# updating the parameters of the model. This seems not very TF compatible?
self.intensity.update_parameters({p.name: float(p) for p in self.params.values()})
# converting the data to a dict for tensorwaves
data = {ob: znp.array(ar) for ob, ar in zip(self.obs, z.unstack_x(x))}
unnormalized_pdf = self.intensity(data)
if norm_range is False: # I mean this is not really needed,
# but useful for e.g. sampling with `pdf(..., norm_range=False)
return unnormalized_pdf
else:
return unnormalized_pdf / znp.mean(self.intensity(self.norm_sample))
# this conversion is actually not needed for the rest, we could just convert the ones in initial_parameters
params = [zfit.param.convert_to_parameter(val, name, prefer_constant=False)
for name, val in sympy_model.parameters.items()]
# this is the "ugliest" part: zfit defines observables with limits (usually) as they often come in handy
obs = [zfit.Space(ob, limits=(np.min(data_frame[ob]) - 1, np.max(data_frame[ob]) + 1))
for ob in phsp_set.to_pandas()]
obsall = zfit.dimension.combine_spaces(*obs)
# data conversion
phsp_zfit = zfit.Data.from_pandas(pd.DataFrame(phsp_set.to_pandas()), obs=obsall)
data_zfit = zfit.Data.from_pandas(pd.DataFrame(data_set.to_pandas()), obs=obsall)
# filter the parameters that we want to use in the model. We can also use more and only minimize
# with respect to a subset
params_fit = [p for p in params if p.name in initial_parameters if p.independent] # remove the complex param
# set the initial values (can be done differently). A tricky thing here are complex parameters:
# in zfit they are composed of two free parameters. A complex parameter cannot be set directly
# in the minimizer, it is (AFAIK) always decomposed.
[p.set_value(initial_parameters[p.name]) for p in params_fit if p.name in initial_parameters]
pdf = TensorWavePDF(obs=obsall,
intensity=intensity,
norm=phsp_zfit,
params={f'param{i}': p for i, p in enumerate(params_fit)})
loss = zfit.loss.UnbinnedNLL(pdf, data_zfit)
# ok, here I was caught up playing around :) Minuit seems to perform clearly the best though
minimizer = zfit.minimize.Minuit(verbosity=8, gradient=True)
# minimizer = zfit.minimize.ScipyLBFGSBV1(verbosity=8)
# minimizer = zfit.minimize.ScipyTrustKrylovV1(verbosity=8)
# minimizer = zfit.minimize.NLoptMMAV1(verbosity=8)
# minimizer = zfit.minimize.IpyoptV1(verbosity=8)
result = minimizer.minimize(loss)
print(result)
result.hesse()
print(result)
# commented out below
#
# callback = CSVSummary("fit_traceback.csv")
# minuit2 = Minuit2(callback)
# fit_result = minuit2.optimize(estimator, initial_parameters)
# fit_result
#
#
#
#
# optimized_parameters = fit_result["parameter_values"]
# intensity.update_parameters(optimized_parameters)
# compare_model("m_12", data_set, phsp_set, intensity)
#
#
#
#
# import pandas as pd
# import sympy as sp
#
# converters = {p: lambda s: complex(s).real for p in initial_parameters}
# fit_traceback = pd.read_csv("fit_traceback.csv", converters=converters)
# fig, (ax1, ax2) = plt.subplots(
# 2, figsize=(7, 8), gridspec_kw={"height_ratios": [1, 1.8]}
# )
# fit_traceback.plot("function_call", "estimator_value", ax=ax1)
# fit_traceback.plot("function_call", sorted(initial_parameters), ax=ax2)
# ax1.set_title("Negative log likelihood")
# ax2.set_title("Parameter values")
# ax1.set_xlabel("function call")
# ax2.set_xlabel("function call")
# fig.tight_layout()
# ax1.legend().remove()
# legend_texts = ax2.legend().get_texts()
# for text in legend_texts:
# latex = f"${sp.latex(sp.Symbol(text.get_text()))}$"
# latex = latex.replace("\\\\", "\\")
# if latex[2] == "C":
# latex = fR"\left|{latex}\right|"
# text.set_text(latex)
# for line in ax2.get_lines():
# label = line.get_label()
# color = line.get_color()
# ax2.axhline(
# y=complex(sympy_model.parameters[label]).real,
# color=color,
# alpha=0.5,
# linestyle="dotted",
# )
# ## Step-by-step workflow
# The following pages go through the usual workflow when using {mod}`tensorwaves`. The output in each of these steps is written to disk, so that each notebook can be run separately.
# ```{toctree}
# ---
# maxdepth: 2
# ---
# usage/step1
# usage/step2
# usage/step3
# usage/basics
# ```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment