Created
March 9, 2017 01:26
-
-
Save john-bradshaw/e63d2a20537beda035b32224a1be8831 to your computer and use it in GitHub Desktop.
Simple demo for a section in Hall, R., Rinaldo, A. and Wasserman, L., 2013. Differential privacy for functions and functional data. Journal of Machine Learning Research, 14(Feb), pp.703-727.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Code for section 4.1 of: | |
Hall, R., Rinaldo, A. and Wasserman, L., 2013. Differential privacy for | |
functions and functional data. Journal of Machine Learning Research, 14(Feb), pp.703-727. | |
Coded up in a bit of a rush so apologies if there are bugs. | |
""" | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from scipy.spatial import distance | |
from matplotlib.widgets import Slider, Button | |
plt.style.use('fivethirtyeight') | |
class DiffFunctions(object): | |
def __init__(self, rng): | |
""" | |
:param rng: | |
:type rng: np.random.RandomState | |
""" | |
self.rng = rng | |
self.n_points = None | |
self.d = 1. | |
self.h = 0.1 | |
self.x_points = None | |
self.sames = None | |
self.kde = None | |
self._gp = None | |
def sample_for_x(self): | |
sizes = np.ceil(self.rng.uniform(high=80, size=3)).astype(np.int) | |
print "Total sizes is {}".format(np.sum(sizes)) | |
self.n_points = np.sum(sizes) | |
locs = [0.3, 0.7, 1.4] | |
scales = [0.3, 0.2, 0.1] | |
for l, s, si in zip(locs, scales, sizes): | |
print "Sampling {} from a Gaussian of mean of {} and std dev of {}".format(si, l, s) | |
self.samples = np.vstack([self.rng.normal(loc=l, scale=s, size=(si, 1)) for l, s, si in zip(locs, scales, sizes)]) | |
def compute_func_and_noisy_one_inner(self, x_points): | |
self.x_points = x_points | |
self.kde = self._compute_kde(x_points) | |
self._gp = self._calc_gaussian_sample(x_points) | |
def calc_noisy_func(self, alpha, beta): | |
sf = self._scaling_factor(beta, alpha) | |
noisy_func = self.kde + sf * self._gp | |
return noisy_func | |
def _calc_gaussian_sample(self, x_points): | |
kernel = self._compute_kernel(x_points, x_points) | |
gp = self.rng.multivariate_normal(np.zeros(x_points.shape[0], dtype=np.float), kernel)[:, np.newaxis] | |
return gp | |
def _compute_kde(self, x_points): | |
factor = 1. / (self.n_points * (2*np.pi*self.h**2)**(self.d/2.)) | |
return factor * np.sum(self._compute_kernel(x_points, self.samples), axis=1, keepdims=True) | |
def _compute_kernel(self, x1, x2): | |
distance_matrix_sq = distance.cdist(x1, x2, 'sqeuclidean') | |
kernel = np.exp(-distance_matrix_sq/(2.*self.h**2)) | |
return kernel | |
def _scaling_factor(self, beta, alpha): | |
return self._c_beta_above(beta) * self._compute_delta() / float(alpha) | |
def _compute_delta(self): | |
return np.sqrt(2.) / (self.n_points * (2 * np.pi * self.h**2)**(self.d/2.)) | |
def _c_beta_above(self, beta): | |
return np.sqrt(2* np.log(2. / beta)) | |
def simple(): | |
plt.figure(figsize=(20,10)) | |
x_indices = np.linspace(-0.5, 1.8, 500)[:, np.newaxis] | |
y_lim = [0, 5] | |
rng = np.random.RandomState(100) | |
alpha = 1. | |
beta = 1.5 | |
diff_function = DiffFunctions(rng) | |
diff_function.sample_for_x() | |
diff_function.compute_func_and_noisy_one_inner(x_indices) | |
plt.scatter(diff_function.samples, np.zeros_like(diff_function.samples), 30, label="true points") | |
plt.plot(x_indices, diff_function.kde, label="kde") | |
plt.plot(x_indices, diff_function.calc_noisy_func(alpha, beta), '--', label="noisy function") | |
plt.legend() | |
plt.title("Alpha is {} and beta is {}".format(alpha, beta)) | |
plt.show() | |
def interactive(): | |
fig, ax = plt.subplots(figsize=(20,10)) | |
x_indices = np.linspace(-0.5, 1.8, 500)[:, np.newaxis] | |
rng = np.random.RandomState(100) | |
plt.subplots_adjust(bottom=0.25) | |
alpha = 1. | |
beta = 0.1 | |
diff_function = DiffFunctions(rng) | |
diff_function.sample_for_x() | |
diff_function.compute_func_and_noisy_one_inner(x_indices) | |
kde_plt, = plt.plot(x_indices, diff_function.kde, label="kde") | |
f_noisy_plt, = plt.plot(x_indices, diff_function.calc_noisy_func(alpha, beta), '--', label="noisy function") | |
scatter_plt, = plt.plot(diff_function.samples, np.zeros_like(diff_function.samples), 'o', markersize=5, | |
label="true points") | |
plt.axis([-0.6, 2, -0.1, 1.5]) | |
axalpha = plt.axes([0.25, 0.1, 0.65, 0.03]) | |
axbeta = plt.axes([0.25, 0.15, 0.65, 0.03]) | |
salpha = Slider(axalpha, 'Alpha', 0.01, 10.0, valinit=alpha) | |
sbeta = Slider(axbeta, 'Beta', -4.60, 0.641, valinit=np.log(beta)) | |
def update(val): | |
alpha_val = salpha.val | |
beta_val = np.exp(sbeta.val) | |
sbeta.valtext.set_text(beta_val) | |
f_noisy_plt.set_ydata(diff_function.calc_noisy_func(alpha_val, beta_val)) | |
fig.canvas.draw_idle() | |
salpha.on_changed(update) | |
sbeta.on_changed(update) | |
resetax = plt.axes([0.8, 0.025, 0.1, 0.04]) | |
button = Button(resetax, 'New draw') | |
def reset(event): | |
diff_function.sample_for_x() | |
diff_function.compute_func_and_noisy_one_inner(x_indices) | |
kde_plt.set_ydata(diff_function.kde) | |
scatter_plt.set_data(diff_function.samples, np.zeros_like(diff_function.samples)) | |
salpha.reset() | |
sbeta.reset() | |
update(None) | |
button.on_clicked(reset) | |
plt.show() | |
def main(): | |
interactive() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment