Last active
April 17, 2024 21:19
-
-
Save yrahul3910/553f255e4305a82b32da14bf23db805c to your computer and use it in GitHub Desktop.
Synthetic data metrics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from tqdm import tqdm | |
from sklearn.neighbors import KNeighborsClassifier | |
from scipy.special import gamma | |
def smape(A, F): | |
total = 0. | |
bar = tqdm(total=len(A) * len(F)) | |
for x in A: | |
for y in F: | |
bar.update(1) | |
total += 1./ len(x) * np.sum(2 * np.abs(y - x) / (np.abs(x) + np.abs(y) + np.finfo(float).eps)) | |
return total / (len(A) * len(F)) | |
def js_knn(A, F, grid_size=100, k=3): | |
""" | |
Based on https://faculty.washington.edu/yenchic/18W_425/Lec7_knn_basis.pdf | |
""" | |
jsd = 0. | |
d = A.shape[1] | |
n = A.shape[0] | |
Vd = np.pi ** (d / 2) / gamma(d / 2 + 1) | |
mins, maxes = np.min([A.min(axis=0), F.min(axis=0)], axis=0), np.max([A.max(axis=0), F.max(axis=0)], axis=0) | |
# Generate grid_size points in the range mins to maxes | |
grid = np.array([np.linspace(mins[i], maxes[i], grid_size) for i in range(d)]).T | |
assert grid.shape == (grid_size, d) | |
# Compute KDE for A | |
knn = KNeighborsClassifier(n_neighbors=k) | |
knn.fit(A, np.zeros(A.shape[0])) | |
kde_A = [] | |
for point in grid: | |
dist, _ = knn.kneighbors(point.reshape(1, -1), return_distance=True) | |
kde_A.append(k / (n * Vd * dist[0][-1])) | |
kde_A = np.array(kde_A) | |
assert kde_A.shape == (grid_size,) | |
# Compute KDE for F | |
knn = KNeighborsClassifier(n_neighbors=k) | |
knn.fit(F, np.zeros(F.shape[0])) | |
kde_F = [] | |
for point in grid: | |
dist, _ = knn.kneighbors(point.reshape(1, -1), return_distance=True) | |
kde_F.append(k / (n * Vd * dist[0][-1])) | |
kde_F = np.array(kde_F) | |
assert kde_F.shape == (grid_size,) | |
# Compute JSD | |
# First, compute M | |
M = (kde_A + kde_F) / 2 | |
# Now, compute JSD using KL-divergences | |
jsd = 0.5 * (np.sum(kde_A * np.log2(kde_A / M)) + np.sum(kde_F * np.log2(kde_F / M))) | |
return jsd | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment