Skip to content

Instantly share code, notes, and snippets.

@uiur
Created January 18, 2012 13:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save uiur/1632915 to your computer and use it in GitHub Desktop.
Save uiur/1632915 to your computer and use it in GitHub Desktop.
カニバリズム山脈クラスタリング
class Cluster
attr_accessor :left, :right, :vec, :id, :distance
def initialize(args)
@vec = args[:vec]
@left = args[:left]
@right = args[:right]
@id = args[:id]
@distance = args[:distance]
end
def leaf?
!self.left && !self.right
end
def self.tanimoto(c1,c2)
n = c1.vec.size
n1, n2 = 0, 0
cross = 0
v1, v2 = c1.vec, c2.vec
n.times do |i|
n1 += 1 unless v1[i] == 0
n2 += 1 unless v2[i] == 0
cross += 1 if v1[i] != 0 && v2[i] != 0
end
1.0 - cross.to_f / (n1 + n2 - cross)
end
end
def hclusters(rows)
currentid = -1
distances = {}
clust = rows.map {|k,v| Cluster.new(vec: v, id: k) }
n = clust[0].vec.size
while clust.size > 1
p clust.size
lowestpair = [0,1]
closest = Cluster.tanimoto(clust[0], clust[1])
clust.size.times do |i|
(i+1...clust.size).each do |j|
distances[[clust[i].id,clust[j].id]] ||= Cluster.tanimoto(clust[i], clust[j])
d = distances[[clust[i].id, clust[j].id]]
if d < closest
closest = d
lowestpair = [i,j]
end
end
end
mergevec = []
n.times do |i|
mergevec << (clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i]) / 2.0
end
newclust = Cluster.new(vec: mergevec, id: currentid, left: clust[lowestpair[0]], right: clust[lowestpair[1]],distance: closest)
clust.delete_at(lowestpair[0])
clust.delete_at(lowestpair[1]-1)
clust << newclust
currentid -= 1
end
return clust[0]
end
require 'cairo'
def tanimoto(user1, user2)
ids1 = user1[:subject_ids]
ids2 = user2[:subject_ids]
1.0 - ((ids1 & ids2).size.to_f / (ids1 | ids2).size)
end
def scaledown(data,m=1000,rate=0.001)
n = data.size
dist = Array.new(n) { Array.new(n, nil) }
fakedist = Array.new(n) { Array.new(n, nil) }
n.times do |i|
n.times do |j|
if i == j
dist[i][j] = 0.0
else
dist[i][j] = tanimoto(data[i], data[j])
end
end
end
lasterror = nil
pos = Array.new(n) { [rand, rand] }
m.times do |m|
n.times do |i|
n.times do |j|
fakedist[i][j] = Math.sqrt((pos[i][0] - pos[j][0]) ** 2 + (pos[i][1] - pos[j][1]) ** 2)
end
end
grad = Array.new(n) { [0.0,0.0] }
n.times do |i|
n.times do |j|
next if i == j
errorterm = (fakedist[i][j] - dist[i][j])
grad[i][0] += ((pos[i][0] - pos[j][0]) / fakedist[i][j])*errorterm
grad[i][1] += ((pos[i][1] - pos[j][1]) / fakedist[i][j])*errorterm
end
end
n.times do |i|
pos[i][0] -= rate*grad[i][0]
pos[i][1] -= rate*grad[i][1]
end
p grad[0]
p pos[0]
end
pos
end
def draw(data, labels=$labels, png='kanisan_cluster.png')
w = 2000
h = 2000
surface = Cairo::ImageSurface.new(w, h)
context = Cairo::Context.new(surface)
context.set_source_rgb(255, 255, 255)
context.rectangle(0, 0, w, h)
context.fill
context.set_source_rgb(0, 0, 0)
context.font_size = 25
data.size.times do |i|
x = (data[i][0] + 0.5)*1000
y = (data[i][1] + 0.5)*1000
context.move_to(x,y)
context.show_text(labels[i])
end
surface.write_to_png(png)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment