Skip to content

Instantly share code, notes, and snippets.

@ybenjo
Created April 24, 2010 19:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ybenjo/377859 to your computer and use it in GitHub Desktop.
Save ybenjo/377859 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
#/opt/lcoal/bin/ruby
#Query Clustering using Click-Through Graph
#http://portal.acm.org/citation.cfm?id=1526853
class BiGraph
def initialize
@nodes = Hash.new{|h,k|h[k] = 0}
@out_degree = Hash.new{|h,k|h[k] = 0}
@in_degree = Hash.new{|h,k|h[k] = 0}
@out_list = Hash.new{|h,k|h[k] = Array.new()}
@in_list = Hash.new{|h,k|h[k] = Array.new()}
@bi = []
end
def get_sub_edges(sub_q, sub_p)
return sub_q.product(sub_p)
end
def set_sub_graph(sub_q, sub_p)
sub_g = BiGraph.new
get_sub_edges(sub_q, sub_p).map{|i, j| sub_g.set(i, j, @nodes[i => j])}
@bi.push sub_g if sub_g.weak_valid?
end
def delete_edge(i, j)
@out_degree[i] -= @nodes[i => j]
@in_degree[j] -= @nodes[i => j]
@nodes.delete(i => j)
@out_list[i].delete(j)
@in_list[j].delete(i)
end
def delete(i, j)
@out_list[i].map do |v|
@out_degree[i] -= @nodes[i => v]
@in_degree[v] -= @nodes[i => v] unless v == j
end
@in_list[j].map do |v|
@in_degree[j] -= @nodes[v => j]
@out_degree[v] -= @nodes[v => j] unless v == i
end
@out_list[i].map{|v| @nodes.delete(i => v)}
@in_list[j].map{|v| @nodes.delete(v => j)}
@out_list.delete(i)
@in_list.map{|v| v[1].delete(i)}
@in_list.delete(j)
@out_list.map{|v| v[1].delete(j)}
end
def weak_valid?
return @in_list.size != 0 && @out_list.size != 0
end
def bulk_delete(sub_q, sub_p)
get_sub_edges(sub_q, sub_p).each do |i, j|
delete(i, j)
end
end
def biclique?
q_list = @out_degree.keys
p_list = @in_degree.keys
return q_list.uniq.product(p_list.uniq).inject(true){|r, v|
r &= @nodes[v[0] => v[1]] == 0 ? false : true
}
end
def split
#step 1
#find query q, with out-degree is max
sub_q = @out_degree.max{|a,b|a[1] <=> b[1]}[0]
#list the neighbors of each q_k
sub_p = @out_list[sub_q]
#step 2
large_q = []
sub_p.each do |page|
large_q.push @in_list[page]
end
#step 3
q_in = []
q_in = large_q.inject([]){|a, b|a.empty? ? b : a & b}
#step 4
set_sub_graph(q_in, sub_p)
bulk_delete(q_in, sub_p)
#step4.1と4.2の判定がよく分からないのでスルー
end
def sort
@in_list.map!{|k,v|v.sort!}
@out_list.map!{|k,v|v.sort!}
end
def find(query)
return @out_degree.include?(query) || @in_degree.include?(query)
end
def set(u, v, w = 1)
#u -- w --> v
@nodes[u=>v] = w
@out_list[u].push v
@in_list[v].push u
@out_degree[u] += w
@in_degree[v] += w
end
def clustering
sort
loop do
sum = @nodes.values.inject(0){|s,v|s+=v}
if sum != 0 && !biclique?
p sum
split
else
break
end
end
end
def check_answer
#答えとして得られたbicliqueが妥当かどうかのチェック
t = true
@bi.each do |sub_g|
if sub_g.biclique?
else
puts "Failed"
t = false
end
end
puts "All ok" if t
end
def get_cluster(query)
@bi.each do |sub_g|
if sub_g.find(query)
return sub_g
break
end
end
end
end
if __FILE__ == $0
g = BiGraph.new
g.set(:x,:a,1)
g.set(:x,:b,2)
g.set(:y,:b,2)
g.set(:y,:c,1)
g.clustering
g.check_answer
puts "#{:y}'s subgraph is #{g.get_cluster(:y)}"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment