Last active
August 3, 2021 06:40
-
-
Save narphorium/d06b7ed234287e319f18 to your computer and use it in GitHub Desktop.
Hello,
when I input values of shape (1000,1), I'm getting a lot of NaNs in the centroid list.
array([[-0.0615779 ],
[ 0. ],
[-0.01855482],
[ nan],
[ nan],
[ nan],
[ nan],
[-0.03768255],
[ 0.01288017],
[ 0.01535422],
[ 0.04958867],
[ nan],
[-0.01960552],
[ 0.09472825],
[-0.09461572],
[ nan]]
Basically I want to do the same as this MATLAB code does:
>> load fisheriris
>> X = meas(:,3);
>> [idx,C] = kmeans(X,3);
>> size(X) => [150,1]
>> size(idx) => [150,1]
>> size(C) => [3,1]
I think there's problem with the calculation of means
, because this is where the assignment for centroids is coming from, but I'm not sure where the nan is coming from. Can somebody please give me a hint to fix? :)
tf.sub
need changes to tf.subtract
and
means = tf.concat(0, [
tf.reduce_mean(
tf.gather(vectors,
tf.reshape(
tf.where(
tf.equal(assignments, c)
),[1,-1])
),reduction_indices=[1])
for c in xrange(num_clusters)])
to
means = tf.concat([
tf.reduce_mean(
tf.gather(vectors,
tf.reshape(
tf.where(
tf.equal(assignments, c)
),[1,-1])
),reduction_indices=[1])
for c in xrange(num_clusters)], 0)
Thank you!!
In python 3 , I think it works!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
num_points = 2000
vectors_set = []
for i in range(num_points):
if np.random.random() > 0.5:
vectors_set.append([np.random.normal(0.0, 0.9), np.random.normal(0.0, 0.9)])
else :
vectors_set.append([np.random.normal(3.0, 0.5), np.random.normal(1.0, 0.5)])
df = pd.DataFrame({"x": [v[0] for v in vectors_set], "y": [v[1] for v in vectors_set]})
sns.lmplot("x","y", data=df, fit_reg=False, size=6)
plt.show()
# k-means algorithm
vectors = tf.constant(vectors_set)
num_clusters = 4
centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors),[0,0],[k,-1]))
expanded_vectors = tf.expand_dims(vectors, 0)
expanded_centroides = tf.expand_dims(centroides, 1)
assignments = tf.argmin(tf.reduce_sum(tf.square(tf.subtract(expanded_vectors,expanded_centroides)), 2), 0)
means = tf.concat(axis=0, values=[
tf.reduce_mean(
tf.gather(vectors,
tf.reshape(
tf.where(
tf.equal(assignments, c)
), [1,-1])
), axis=[1])
for c in range(num_clusters)])
update_centroides = tf.assign(centroides, means)
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
for step in range(100):
_, centroid_values, assignment_values = sess.run([update_centroides, centroides, assignments])
data = {"x": [], "y": [], "cluster": []}
for i in range(len(assignment_values)):
data["x"].append(vectors_set[i][0])
data["y"].append(vectors_set[i][1])
data["cluster"].append(assignment_values[i])
df = pd.DataFrame(data)
sns.lmplot("x","y",data=df,fit_reg=False, size=6, hue="cluster", legend=False)
plt.show()
In python 3.6, it still works well. Thank You :D
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
num_points = 2000
vectors_set = []
for i in range(num_points):
if np.random.random() > 0.5:
vectors_set.append([np.random.normal(0.0, 0.9), np.random.normal(0.0, 0.9)])
else:
vectors_set.append([np.random.normal(3.0, 0.5), np.random.normal(1.0, 0.5)])
df = pd.DataFrame({"x": [v[0] for v in vectors_set], "y": [v[1] for v in vectors_set]})
sns.lmplot("x", "y", data=df, fit_reg=False, height=6)
plt.show()
# k-means algorithm
vectors = tf.constant(vectors_set)
num_clusters = 4
centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors), [0, 0], [num_clusters, -1]))
expanded_vectors = tf.expand_dims(vectors, 0)
expanded_centroides = tf.expand_dims(centroides, 1)
assignments = tf.argmin(tf.reduce_sum(tf.square(tf.subtract(expanded_vectors, expanded_centroides)), 2), 0)
means = tf.concat(axis=0, values=[
tf.reduce_mean(
tf.gather(vectors,
tf.reshape(
tf.where(
tf.equal(assignments, c)
), [1, -1])
), axis=[1])
for c in range(num_clusters)])
update_centroides = tf.assign(centroides, means)
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
for step in range(100):
_, centroid_values, assignment_values = sess.run([update_centroides, centroides, assignments])
data = {"x": [], "y": [], "cluster": []}
for i in range(len(assignment_values)):
data["x"].append(vectors_set[i][0])
data["y"].append(vectors_set[i][1])
data["cluster"].append(assignment_values[i])
df = pd.DataFrame(data)
sns.lmplot("x", "y", data=df, fit_reg=False, height=6, hue="cluster", legend=False)
plt.show()
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Good tutorial,
We can simplify the code of calculating the means by using
tf.boolean_mask
instead oftf.reshape(tf.where(..))
:I think it's more intuitive