Skip to content

Instantly share code, notes, and snippets.

Last active March 31, 2020 11:42
Show Gist options
  • Save sn1p3r46/aaa127996404378c10fdaf0632a07632 to your computer and use it in GitHub Desktop.
Save sn1p3r46/aaa127996404378c10fdaf0632a07632 to your computer and use it in GitHub Desktop.
Python Implementation of: "Fast Generation of Accurate Synthetic Microdata"
import numpy as np
def compute_mean(v):
return sum(v)/len(v)
def subtract_mean_to_col(M, idx):
M[:, idx] = M[:, idx] - compute_mean(M[:, idx])
def alg_two(n=None, m=None, A=None):
# we can provide dimensions or a random matrix
if n is None and A is None:
raise ValueError("At least one parameter among n and A should be passed")
#if random matrix is not provided then generate one!
if A is None:
if m is None:
m = n
A = np.random.rand(n, m)
(n, m) = np.shape(A)
subtract_mean_to_col(A, 0)
for i in range(1, m):
A[-i:,i] = np.linalg.solve(A[-i:,:i].T, -A[:-i,:i][:-i,i]))
return A / np.std(A,0)
def alg_one(X):
# 1
A = alg_two(*X.shape)
# 2
C_x = np.cov(X,ddof=0, rowvar=False)
# 3
U = np.linalg.cholesky(C_x).T
# 4
X_I =
# 5
X_col_means = np.mean(X,0)
return X_I + X_col_means
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment