Skip to content

Instantly share code, notes, and snippets.

@quinsulon
Forked from kaustubhn/crp.py
Created Oct 4, 2017
Embed
What would you like to do?
Chinese Restaurant Process
# Implementation of a chinese restaurant process function for a given list of word vectors.
def crp(vecs):
clusterVec = [[0.0] * 25] # tracks sum of vectors in a cluster
clusterIdx = [[]] # array of index arrays. e.g. [[1, 3, 5], [2, 4, 6]]
ncluster = 0
# probablity to create a new table if new customer
# is not strongly "similar" to any existing table
pnew = 1.0/ (1 + ncluster)
N = len(vecs)
rands = [random.random() for x in range(N)] # N rand variables sampled from U(0, 1)
for i in range(N):
maxSim = -1
maxIdx = 0
v = vecs[i]
for j in range(ncluster):
sim = cosinesim(v, clusterVec[j])
if sim > maxSim:
maxIdx = j
maxSim = sim
if maxSim < pnew:
if rands[i] < pnew:
clusterVec.append(v)
clusterIdx.append([i])
ncluster += 1
pnew = 1.0 / (1 + ncluster)
continue
clusterVec[maxIdx] = clusterVec[maxIdx] + v
clusterIdx[maxIdx] = clusterIdx[maxIdx] + [i]
if(ncluster == 0):
ncluster += 1
return clusterIdx
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment