Last active
April 21, 2020 06:59
-
-
Save ImScientist/764484ef4a04cd40e6512c078e869d0e to your computer and use it in GitHub Desktop.
metrics_federated_learning_p1.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.metrics import roc_auc_score | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
sns.set(color_codes=True) | |
def split_members_into_n_groups( | |
members, | |
similarity_ratio: float = 1., | |
n: int = 4 | |
): | |
""" Split the data points into n groups. | |
The data points distribution similarity between the groups | |
depends on the similarity_ratio. | |
""" | |
n_el = members.shape[0] | |
n_identical = int(n_el * similarity_ratio) | |
# generate n parts with identical distributions | |
identical_parts = members[:n_identical] | |
identical_parts = np.split(identical_parts, n) | |
# generate n parts with non-identical distributions | |
sorted_parts = np.array(sorted(members[n_identical:])) | |
sorted_parts = np.split(sorted_parts, n) | |
members_fed = [ | |
np.concatenate((sorted_part, identical_part)) | |
for sorted_part, identical_part in zip(sorted_parts, identical_parts) | |
] | |
return members_fed | |
def avg_roc_auc_fed(fm_members_fed, fp_members_fed): | |
""" Generate the roc-auc scores for n data sets, their average and std. | |
""" | |
roc_auc_fed = list() | |
for fm, fp in zip(fm_members_fed, fp_members_fed): | |
roc_auc_fed.append( | |
roc_auc_score( | |
y_true=np.concatenate((np.ones_like(fp), np.zeros_like(fm))), | |
y_score=np.concatenate((fp, fm)) | |
) | |
) | |
return roc_auc_fed, np.mean(roc_auc_fed), np.std(roc_auc_fed) | |
def plot_fed_distributions(fm_members_fed, fp_members_fed, n=4, fig_size=(7, 4)): | |
fig1 = plt.figure(figsize=fig_size) | |
with sns.color_palette("GnBu_d", n): | |
for fm in fm_members_fed: | |
sns.distplot(fm) | |
plt.title("Model scores of the members belonging to the negative class for each one of the pods") | |
plt.show() | |
fig2 = plt.figure(figsize=fig_size) | |
with sns.color_palette("YlOrRd", n): | |
for fp in fp_members_fed: | |
sns.distplot(fp) | |
plt.title("Model scores of the members belonging to the positive class for each one of the pods") | |
plt.show() | |
return fig1, fig2 | |
# | |
# Generate scores for the positive and negative class | |
# | |
fig_size=(7, 4) | |
seed = 15 | |
size = 4000 # number of data points | |
n = 4 # number of devices | |
fp_mean, fp_std = 16, 10 | |
fm_mean, fm_std = -16, 20 | |
np.random.seed(seed) | |
fp_members = np.random.randn(size)*fp_std+fp_mean | |
fm_members = np.random.randn(size)*fm_std+fm_mean | |
fig = plt.figure(figsize=fig_size) | |
sns.distplot(fp_members); | |
sns.distplot(fm_members); | |
plt.title('Scores distribution for members of the positive and the negative class.') | |
plt.show() | |
score = roc_auc_score( | |
y_true=np.concatenate((np.ones_like(fp_members), np.zeros_like(fm_members))), | |
y_score=np.concatenate((fp_members, fm_members)) | |
) | |
print(f'roc_auc score = {np.round(score,3)}') | |
# | |
# Split the data into n identically distributed parts | |
# | |
similarity_ratio_p = 1 | |
similarity_ratio_n = 1 | |
fm_members_fed = split_members_into_n_groups(fm_members, similarity_ratio_p, n) | |
fp_members_fed = split_members_into_n_groups(fp_members, similarity_ratio_n, n) | |
fig1, fig2 = plot_fed_distributions(fm_members_fed, fp_members_fed, n=n, fig_size=fig_size) | |
roc_auc_scores, avg_score, std_score = avg_roc_auc_fed(fm_members_fed, fp_members_fed) | |
print('Scores:\t', np.round(roc_auc_scores,3)) | |
print('Avg score:\t', np.round(avg_score,3)) | |
# | |
# Split the data into n non-identically distributed parts | |
# | |
similarity_ratio_p = 0.2 | |
similarity_ratio_n = 1 | |
fm_members_fed = split_members_into_n_groups(fm_members, similarity_ratio_p, n) | |
fp_members_fed = split_members_into_n_groups(fp_members, similarity_ratio_n, n) | |
fig1, fig2 = plot_fed_distributions(fm_members_fed, fp_members_fed, n=n, fig_size=fig_size) | |
roc_auc_scores, avg_score, std_score = avg_roc_auc_fed(fm_members_fed, fp_members_fed) | |
print('Scores:\t', np.round(roc_auc_scores,3)) | |
print('Avg score:\t', np.round(avg_score,3)) | |
# | |
# Split the data into n non-identically distributed parts | |
# | |
similarity_ratio_p = 1 | |
similarity_ratio_n = 0.02 | |
fm_members_fed = split_members_into_n_groups(fm_members, similarity_ratio_p, n) | |
fp_members_fed = split_members_into_n_groups(fp_members, similarity_ratio_n, n) | |
fig1, fig2 = plot_fed_distributions(fm_members_fed, fp_members_fed, n=n, fig_size=fig_size) | |
roc_auc_scores, avg_score, std_score = avg_roc_auc_fed(fm_members_fed, fp_members_fed) | |
print('Scores:\t', np.round(roc_auc_scores,3)) | |
print('Avg score:\t', np.round(avg_score,3)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment