Skip to content

Instantly share code, notes, and snippets.

@raprasad
Last active December 21, 2023 18:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save raprasad/79265f84e94424fe3409c7022a99b47c to your computer and use it in GitHub Desktop.
Save raprasad/79265f84e94424fe3409c7022a99b47c to your computer and use it in GitHub Desktop.
PCA snippet
"""
This snippet may be used after installing the following libraries:
# (1) OpenDP library with DP-PCA
# - reference: https://test.pypi.org/project/opendp/0.9.0.dev20231221001/
#
pip install -i https://test.pypi.org/simple/ opendp==0.9.0.dev20231221001
# (2) scikit-learn (includes numpy)
#
pip install scikit-learn
"""
import numpy as np
import opendp.prelude as dp
dp.enable_features("honest-but-curious", "contrib", "floating-point")
# DATASET CREATION
def sample_microdata(*, num_columns=None, num_rows=None, cov=None):
cov = cov or sample_covariance(num_columns)
microdata = np.random.multivariate_normal(
np.zeros(cov.shape[0]), cov, size=num_rows or 100_000
)
microdata -= microdata.mean(axis=0)
return microdata
def sample_covariance(num_features):
A = np.random.uniform(0, num_features, size=(num_features, num_features))
return A.T @ A
# USAGE EXAMPLE
num_columns = 4
num_rows = 10_000
model = dp.PCA(
epsilon=1.,
row_norm=1.,
n_samples=num_rows,
n_features=4,
)
model.fit(sample_microdata(num_columns=num_columns, num_rows=num_rows))
# EXTRACT RELEASES
print("mean", model.mean_)
print("singular values", model.singular_values_)
print("components", model.components_)
loadings = model.singular_values_ * model.components_
print("loadings", loadings)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment