-
-
Save SchattenGenie/b0e5e2e5633f944024d9b801fb64a68f to your computer and use it in GitHub Desktop.
MLHEP2019 2 stage metric: generation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def get_assymetry(imgs, ps, points, orthog=False): | |
# асимметрия ливня вдоль и поперек направнения наклона | |
assym_res = [] | |
zoff = 25 | |
x = np.linspace(-14.5, 14.5, 30) | |
y = np.linspace(-14.5, 14.5, 30) | |
xx, yy = np.meshgrid(x, y) | |
xx = np.repeat(xx[np.newaxis, ...], len(imgs), axis=0) | |
yy = np.repeat(yy[np.newaxis, ...], len(imgs), axis=0) | |
points_0 = points[:, 0] + zoff * ps[:, 0] / ps[:, 2] | |
points_1 = points[:, 1] + zoff * ps[:, 1] / ps[:, 2] | |
if orthog: | |
line_func = lambda x: (x - points_0[..., np.newaxis, np.newaxis]) / (ps[:, 0] / ps[:, 1])[..., np.newaxis, np.newaxis] + points_1[..., np.newaxis, np.newaxis] | |
else: | |
line_func = lambda x: -(x - points_0[..., np.newaxis, np.newaxis]) / (ps[:, 1] / ps[:, 0])[..., np.newaxis, np.newaxis] + points_1[..., np.newaxis, np.newaxis] | |
sign = np.ones(len(ps)) | |
if not orthog: | |
sign = (ps[:, 1] > 0).astype(int) | |
sign = 2 * (sign - 0.5) | |
idx = np.where((yy - line_func(xx)) * sign[..., np.newaxis, np.newaxis] < 0) | |
zz = np.ones((len(imgs), 30, 30)) | |
zz[idx] = 0 | |
assym = (np.sum(imgs * zz, axis=(1, 2)) - | |
np.sum(imgs * (1 - zz), axis=(1, 2))) / np.sum(imgs, axis=(1, 2)) | |
return assym | |
def zz_to_line(zz): | |
res = ( | |
np.concatenate([np.abs(np.diff(zz, axis=2)), np.zeros((len(zz), 30, 1))], axis=2) + | |
np.concatenate([np.abs(np.diff(zz, axis=1)), np.zeros((len(zz), 1, 30))], axis=1) | |
) | |
return np.clip(res, 0, 1) | |
def get_shower_width(data, ps, points, orthog=False): | |
# ширина ливня вдоль и поперек направления | |
res = [] | |
spreads = [] | |
assym_res = [] | |
zoff = 25 | |
x = np.linspace(-14.5, 14.5, 30) | |
y = np.linspace(-14.5, 14.5, 30) | |
xx, yy = np.meshgrid(x, y) | |
xx = np.repeat(xx[np.newaxis, ...], len(data), axis=0) | |
yy = np.repeat(yy[np.newaxis, ...], len(data), axis=0) | |
points_0 = points[:, 0] + zoff * ps[:, 0] / ps[:, 2] | |
points_1 = points[:, 1] + zoff * ps[:, 1] / ps[:, 2] | |
if orthog: | |
line_func = lambda x: -(x - points_0[..., np.newaxis, np.newaxis]) / (ps[:, 0] / ps[:, 1])[..., np.newaxis, np.newaxis] + points_1[..., np.newaxis, np.newaxis] | |
else: | |
line_func = lambda x: (x - points_0[..., np.newaxis, np.newaxis]) / (ps[:, 1] / ps[:, 0])[..., np.newaxis, np.newaxis] + points_1[..., np.newaxis, np.newaxis] | |
rescale = np.sqrt(1 + (ps[:, 1] / ps[:, 0])**2) | |
sign = np.ones(len(ps)) | |
if not orthog: | |
sign = (ps[:, 1] < 0).astype(int) | |
sign = 2 * (sign - 0.5) | |
idx = np.where((yy - line_func(xx)) * sign[..., np.newaxis, np.newaxis] < 0) | |
zz = np.ones((len(data), 30, 30)) | |
zz[idx] = 0 | |
line = zz_to_line(zz) | |
ww = (line * data) # * rescale[..., np.newaxis, np.newaxis] | |
sum_0 = ww.sum(axis=(1, 2)) | |
sum_1 = (ww * rescale[..., np.newaxis, np.newaxis] * xx).sum(axis=(1, 2)) | |
sum_2 = (ww * (rescale[..., np.newaxis, np.newaxis] * xx)**2).sum(axis=(1, 2)) | |
sum_1 = sum_1 / sum_0 | |
sum_2 = sum_2 / sum_0 | |
sigma = np.sqrt(sum_2 - sum_1 * sum_1) | |
return sigma | |
def get_ms_ratio2(data, alpha=0.1): | |
ms = np.sum(data, axis=(1, 2)) | |
num = np.sum((data >= (ms * alpha)[:, np.newaxis, np.newaxis]), axis=(1, 2)) | |
return num / 900. | |
def get_sparsity_level(data): | |
alphas = np.logspace(-5, 0, 50) | |
sparsity = [] | |
for alpha in alphas: | |
sparsity.append(get_ms_ratio2(data, alpha)) | |
return np.array(sparsity) | |
def get_physical_stats(EnergyDeposit, ParticleMomentum, ParticlePoint): | |
assym = get_assymetry(EnergyDeposit, ParticleMomentum, ParticlePoint, orthog=False) | |
assym_ortho = get_assymetry(EnergyDeposit, ParticleMomentum, ParticlePoint, orthog=True) | |
sh_width = get_shower_width(EnergyDeposit, ParticleMomentum, ParticlePoint, orthog=False) | |
sh_width_ortho = get_shower_width(EnergyDeposit, ParticleMomentum, ParticlePoint, orthog=True) | |
sparsity_level = get_sparsity_level(EnergyDeposit) | |
stats = np.c_[ | |
assym, | |
assym_ortho, | |
sh_width, | |
sh_width_ortho, | |
sparsity_level.T | |
] | |
return stats |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from prd_score import compute_prd_from_embedding | |
from calogan_metrics import get_assymetry, get_shower_width, get_sparsity_level | |
from calogan_metrics import get_physical_stats | |
from sklearn.metrics import auc | |
import numpy as np | |
def scoring_function(solution_file, predict_file): | |
np.random.seed(1337) | |
score = 0. | |
solution = np.load(solution_file, allow_pickle=True) | |
predict = np.load(predict_file, allow_pickle=True) | |
EnergyDeposit_sol = solution['EnergyDeposit'] | |
ParticleMomentum_sol = solution['ParticleMomentum'] | |
ParticlePoint_sol = solution['ParticlePoint'] | |
EnergyDeposit_pred = predict['EnergyDeposit'] | |
precision, recall = compute_prd_from_embedding( | |
EnergyDeposit_sol.reshape(len(EnergyDeposit_sol), -1), | |
EnergyDeposit_pred.reshape(len(EnergyDeposit_sol), -1),, | |
num_clusters=100, | |
num_runs=100) | |
auc_img = auc(precision, recall) | |
physical_metrics_sol = get_physical_stats( | |
EnergyDeposit_sol, | |
ParticleMomentum_sol, | |
ParticlePoint_sol) | |
physical_metrics_pred = get_physical_stats( | |
EnergyDeposit_pred, | |
ParticleMomentum_sol, | |
ParticlePoint_sol) | |
precision, recall = compute_prd_from_embedding( | |
physical_metrics_sol, | |
physical_metrics_pred, | |
num_clusters=100, | |
num_runs=100) | |
auc_physical_metrics = auc(precision, recall) | |
return min(auc_img, auc_physical_metrics) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
# Taken from: | |
# https://github.com/google/compare_gan/blob/master/compare_gan/src/prd_score.py | |
# | |
# Changes: | |
# - default dpi changed from 150 to 300 | |
# - added handling of cases where P = Q, where precision/recall may be | |
# just above 1, leading to errors for the f_beta computation | |
# | |
# Copyright 2018 Google LLC & Hwalsuk Lee. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Precision and recall computation based on samples from two distributions. | |
Given a sample from the true and the fake distribution embedded in some feature | |
space (say, Inception), it computes the precision and recall via the algorithm | |
presented in [arxiv.org/abs/1806.00035]. Finally, one can plot the resulting | |
curves for different models. | |
Typical usage example: | |
import prd | |
prd_data_1 = prd.compute_prd_from_embedding(eval_feats_1, ref_feats_1) | |
prd_data_2 = prd.compute_prd_from_embedding(eval_feats_2, ref_feats_2) | |
prd.plot([prd_data_1, prd_data_2], ['GAN_1', 'GAN_2']) | |
""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
from matplotlib import pyplot as plt | |
import numpy as np | |
import sklearn.cluster | |
def compute_prd(eval_dist, ref_dist, num_angles=1001, epsilon=1e-10): | |
"""Computes the PRD curve for discrete distributions. | |
This function computes the PRD curve for the discrete distribution eval_dist | |
with respect to the reference distribution ref_dist. This implements the | |
algorithm in [arxiv.org/abs/1806.2281349]. The PRD will be computed for an | |
equiangular grid of num_angles values between [0, pi/2]. | |
Args: | |
eval_dist: 1D NumPy array or list of floats with the probabilities of the | |
different states under the distribution to be evaluated. | |
ref_dist: 1D NumPy array or list of floats with the probabilities of the | |
different states under the reference distribution. | |
num_angles: Number of angles for which to compute PRD. Must be in [3, 1e6]. | |
The default value is 1001. | |
epsilon: Angle for PRD computation in the edge cases 0 and pi/2. The PRD | |
will be computes for epsilon and pi/2-epsilon, respectively. | |
The default value is 1e-10. | |
Returns: | |
precision: NumPy array of shape [num_angles] with the precision for the | |
different ratios. | |
recall: NumPy array of shape [num_angles] with the recall for the different | |
ratios. | |
Raises: | |
ValueError: If not 0 < epsilon <= 0.1. | |
ValueError: If num_angles < 3. | |
""" | |
if not (epsilon > 0 and epsilon < 0.1): | |
raise ValueError('epsilon must be in (0, 0.1] but is %s.' % str(epsilon)) | |
if not (num_angles >= 3 and num_angles <= 1e6): | |
raise ValueError('num_angles must be in [3, 1e6] but is %d.' % num_angles) | |
# Compute slopes for linearly spaced angles between [0, pi/2] | |
angles = np.linspace(epsilon, np.pi/2 - epsilon, num=num_angles) | |
slopes = np.tan(angles) | |
# Broadcast slopes so that second dimension will be states of the distribution | |
slopes_2d = np.expand_dims(slopes, 1) | |
# Broadcast distributions so that first dimension represents the angles | |
ref_dist_2d = np.expand_dims(ref_dist, 0) | |
eval_dist_2d = np.expand_dims(eval_dist, 0) | |
# Compute precision and recall for all angles in one step via broadcasting | |
precision = np.minimum(ref_dist_2d*slopes_2d, eval_dist_2d).sum(axis=1) | |
recall = precision / slopes | |
# handle numerical instabilities leaing to precision/recall just above 1 | |
max_val = max(np.max(precision), np.max(recall)) | |
if max_val > 1.001: | |
raise ValueError('Detected value > 1.001, this should not happen.') | |
precision = np.clip(precision, 0, 1) | |
recall = np.clip(recall, 0, 1) | |
return precision, recall | |
def _cluster_into_bins(eval_data, ref_data, num_clusters): | |
"""Clusters the union of the data points and returns the cluster distribution. | |
Clusters the union of eval_data and ref_data into num_clusters using minibatch | |
k-means. Then, for each cluster, it computes the number of points from | |
eval_data and ref_data. | |
Args: | |
eval_data: NumPy array of data points from the distribution to be evaluated. | |
ref_data: NumPy array of data points from the reference distribution. | |
num_clusters: Number of cluster centers to fit. | |
Returns: | |
Two NumPy arrays, each of size num_clusters, where i-th entry represents the | |
number of points assigned to the i-th cluster. | |
""" | |
cluster_data = np.vstack([eval_data, ref_data]) | |
kmeans = sklearn.cluster.MiniBatchKMeans(n_clusters=num_clusters, n_init=10) | |
labels = kmeans.fit(cluster_data).labels_ | |
eval_labels = labels[:len(eval_data)] | |
ref_labels = labels[len(eval_data):] | |
eval_bins = np.histogram(eval_labels, bins=num_clusters, | |
range=[0, num_clusters], density=True)[0] | |
ref_bins = np.histogram(ref_labels, bins=num_clusters, | |
range=[0, num_clusters], density=True)[0] | |
return eval_bins, ref_bins | |
def compute_prd_from_embedding(eval_data, ref_data, num_clusters=20, | |
num_angles=1001, num_runs=10, | |
enforce_balance=True): | |
"""Computes PRD data from sample embeddings. | |
The points from both distributions are mixed and then clustered. This leads | |
to a pair of histograms of discrete distributions over the cluster centers | |
on which the PRD algorithm is executed. | |
The number of points in eval_data and ref_data must be equal since | |
unbalanced distributions bias the clustering towards the larger dataset. The | |
check can be disabled by setting the enforce_balance flag to False (not | |
recommended). | |
Args: | |
eval_data: NumPy array of data points from the distribution to be evaluated. | |
ref_data: NumPy array of data points from the reference distribution. | |
num_clusters: Number of cluster centers to fit. The default value is 20. | |
num_angles: Number of angles for which to compute PRD. Must be in [3, 1e6]. | |
The default value is 1001. | |
num_runs: Number of independent runs over which to average the PRD data. | |
enforce_balance: If enabled, throws exception if eval_data and ref_data do | |
not have the same length. The default value is True. | |
Returns: | |
precision: NumPy array of shape [num_angles] with the precision for the | |
different ratios. | |
recall: NumPy array of shape [num_angles] with the recall for the different | |
ratios. | |
Raises: | |
ValueError: If len(eval_data) != len(ref_data) and enforce_balance is set to | |
True. | |
""" | |
if enforce_balance and len(eval_data) != len(ref_data): | |
raise ValueError( | |
'The number of points in eval_data %d is not equal to the number of ' | |
'points in ref_data %d. To disable this exception, set enforce_balance ' | |
'to False (not recommended).' % (len(eval_data), len(ref_data))) | |
eval_data = np.array(eval_data, dtype=np.float64) | |
ref_data = np.array(ref_data, dtype=np.float64) | |
precisions = [] | |
recalls = [] | |
for _ in range(num_runs): | |
eval_dist, ref_dist = _cluster_into_bins(eval_data, ref_data, num_clusters) | |
precision, recall = compute_prd(eval_dist, ref_dist, num_angles) | |
precisions.append(precision) | |
recalls.append(recall) | |
precision = np.mean(precisions, axis=0) | |
recall = np.mean(recalls, axis=0) | |
return precision, recall | |
def _prd_to_f_beta(precision, recall, beta=1, epsilon=1e-10): | |
"""Computes F_beta scores for the given precision/recall values. | |
The F_beta scores for all precision/recall pairs will be computed and | |
returned. | |
For precision p and recall r, the F_beta score is defined as: | |
F_beta = (1 + beta^2) * (p * r) / ((beta^2 * p) + r) | |
Args: | |
precision: 1D NumPy array of precision values in [0, 1]. | |
recall: 1D NumPy array of precision values in [0, 1]. | |
beta: Beta parameter. Must be positive. The default value is 1. | |
epsilon: Small constant to avoid numerical instability caused by division | |
by 0 when precision and recall are close to zero. | |
Returns: | |
NumPy array of same shape as precision and recall with the F_beta scores for | |
each pair of precision/recall. | |
Raises: | |
ValueError: If any value in precision or recall is outside of [0, 1]. | |
ValueError: If beta is not positive. | |
""" | |
if not ((precision >= 0).all() and (precision <= 1).all()): | |
raise ValueError('All values in precision must be in [0, 1].') | |
if not ((recall >= 0).all() and (recall <= 1).all()): | |
raise ValueError('All values in recall must be in [0, 1].') | |
if beta <= 0: | |
raise ValueError('Given parameter beta %s must be positive.' % str(beta)) | |
return (1 + beta**2) * (precision * recall) / ( | |
(beta**2 * precision) + recall + epsilon) | |
def prd_to_max_f_beta_pair(precision, recall, beta=8): | |
"""Computes max. F_beta and max. F_{1/beta} for precision/recall pairs. | |
Computes the maximum F_beta and maximum F_{1/beta} score over all pairs of | |
precision/recall values. This is useful to compress a PRD plot into a single | |
pair of values which correlate with precision and recall. | |
For precision p and recall r, the F_beta score is defined as: | |
F_beta = (1 + beta^2) * (p * r) / ((beta^2 * p) + r) | |
Args: | |
precision: 1D NumPy array or list of precision values in [0, 1]. | |
recall: 1D NumPy array or list of precision values in [0, 1]. | |
beta: Beta parameter. Must be positive. The default value is 8. | |
Returns: | |
f_beta: Maximum F_beta score. | |
f_beta_inv: Maximum F_{1/beta} score. | |
Raises: | |
ValueError: If beta is not positive. | |
""" | |
if not ((precision >= 0).all() and (precision <= 1).all()): | |
raise ValueError('All values in precision must be in [0, 1].') | |
if not ((recall >= 0).all() and (recall <= 1).all()): | |
raise ValueError('All values in recall must be in [0, 1].') | |
if beta <= 0: | |
raise ValueError('Given parameter beta %s must be positive.' % str(beta)) | |
f_beta = np.max(_prd_to_f_beta(precision, recall, beta)) | |
f_beta_inv = np.max(_prd_to_f_beta(precision, recall, 1/beta)) | |
return f_beta, f_beta_inv | |
def plot(precision_recall_pairs, labels=None, out_path=None, | |
legend_loc='lower left', dpi=300): | |
"""Plots precision recall curves for distributions. | |
Creates the PRD plot for the given data and stores the plot in a given path. | |
Args: | |
precision_recall_pairs: List of prd_data to plot. Each item in this list is | |
a 2D array of precision and recall values for the | |
same number of ratios. | |
labels: Optional list of labels of same length as list_of_prd_data. The | |
default value is None. | |
out_path: Output path for the resulting plot. If None, the plot will be | |
opened via plt.show(). The default value is None. | |
legend_loc: Location of the legend. The default value is 'lower left'. | |
dpi: Dots per inch (DPI) for the figure. The default value is 150. | |
Raises: | |
ValueError: If labels is a list of different length than list_of_prd_data. | |
""" | |
if labels is not None and len(labels) != len(precision_recall_pairs): | |
raise ValueError( | |
'Length of labels %d must be identical to length of ' | |
'precision_recall_pairs %d.' | |
% (len(labels), len(precision_recall_pairs))) | |
fig = plt.figure(figsize=(3.5, 3.5), dpi=dpi) | |
plot_handle = fig.add_subplot(111) | |
plot_handle.tick_params(axis='both', which='major', labelsize=12) | |
for i in range(len(precision_recall_pairs)): | |
precision, recall = precision_recall_pairs[i] | |
label = labels[i] if labels is not None else None | |
plt.plot(recall, precision, label=label, alpha=0.5, linewidth=3) | |
if labels is not None: | |
plt.legend(loc=legend_loc) | |
plt.xlim([0, 1]) | |
plt.ylim([0, 1]) | |
plt.xlabel('Recall', fontsize=12) | |
plt.ylabel('Precision', fontsize=12) | |
plt.tight_layout() | |
if out_path is None: | |
plt.show() | |
else: | |
plt.savefig(out_path, bbox_inches='tight', dpi=dpi) | |
plt.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment