Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Create synthetic 2D dataset with gaussian centers to test clustering algorithms
import numpy as np
import matplotlib.pyplot as plt
n=200 # samples per center
centers= [ [10,5], [-2,4], [13,-25], [11,20], [15,-30], [3,-2], ] # centers
#centers= [ [10,5], [-2,4], [11,20], [15,-34], ]
dataset=np.zeros((0,3))
sigma=2
for i in range(len(centers)):
correlation=np.random.rand()
center=centers[i]
cluster=np.random.multivariate_normal(center, [[sigma, correlation],[correlation, sigma]], n)
label=np.zeros((n,1))+i
cluster=np.hstack([cluster,label])
dataset=np.vstack([dataset,cluster])
print(dataset.shape)
plt.scatter(dataset[:,0],dataset[:,1])
filename="2d_complex.csv"
np.savetxt(filename,dataset,delimiter=",",fmt='%f')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.