Skip to content

Instantly share code, notes, and snippets.

@zachbe
Created June 28, 2024 19:28
Show Gist options
  • Select an option

  • Save zachbe/92b43aca3756db16194d20a29a40baf1 to your computer and use it in GitHub Desktop.

Select an option

Save zachbe/92b43aca3756db16194d20a29a40baf1 to your computer and use it in GitHub Desktop.
import warnings
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import numpy as np
# Load data from https://www.openml.org/d/554
X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
X = X / 255.0
# Split data into train partition and test partition
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.7)
mlp = MLPClassifier(
hidden_layer_sizes=(40,),
max_iter=8,
alpha=1e-4,
solver="sgd",
verbose=10,
random_state=1,
learning_rate_init=0.2,
)
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
mlp.fit(X_train, y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))
noises = [0, 0.2, 0.4, 0.6, 0.8, 1]
scores = []
baselines = []
num_tests = 1000
num_nudges = 15
for noise_level in noises:
correct = 0
baseline = 0
for i in range(num_tests):
# Baseline noisy data
noisy_data = X_test[i] + np.random.uniform(-noise_level,noise_level, (28*28,))
np.clip(noisy_data,0,1)
bs_probs =mlp.predict_proba([noisy_data])
if (int(np.argmax(bs_probs)) == int(y_test[i])): baseline += 1
# Linear approximation
probs = [0 for _ in range(10)]
for j in range(num_nudges):
noisy_data = X_test[i] + np.random.uniform(-noise_level,noise_level, (28*28,))
np.clip(noisy_data,0,1)
probs += mlp.predict_proba([noisy_data])/num_nudges
if (int(np.argmax(probs)) == int(y_test[i])): correct += 1
b_score = baseline / num_tests
baselines.append(b_score)
score = correct / num_tests
scores.append(score)
print("Noisy set score: " + str(score))
fig, (ax1, ax2, ax3) = plt.subplots(1,3)
ax1.matshow((X_test[0] + np.random.normal(0,1, (28*28,))).reshape(28, 28), cmap=plt.cm.gray, vmin=0, vmax=1)
ax2.plot(noises, baselines, color="red")
ax2.set_title("Baseline Noisy Data Accuracy")
ax2.set_xlabel("Noise Level")
ax2.set_ylabel("Classification Accuracy")
ax2.set_ylim([0.7, 1])
ax3.plot(noises, scores, color="green")
ax3.set_title("Obfuscated Query Accuracy")
ax3.set_xlabel("Noise Level")
ax3.set_ylabel("Classification Accuracy")
ax3.set_ylim([0.8, 1])
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment