Created
April 24, 2010 19:07
-
-
Save ybenjo/377859 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
#/opt/lcoal/bin/ruby | |
#Query Clustering using Click-Through Graph | |
#http://portal.acm.org/citation.cfm?id=1526853 | |
class BiGraph | |
def initialize | |
@nodes = Hash.new{|h,k|h[k] = 0} | |
@out_degree = Hash.new{|h,k|h[k] = 0} | |
@in_degree = Hash.new{|h,k|h[k] = 0} | |
@out_list = Hash.new{|h,k|h[k] = Array.new()} | |
@in_list = Hash.new{|h,k|h[k] = Array.new()} | |
@bi = [] | |
end | |
def get_sub_edges(sub_q, sub_p) | |
return sub_q.product(sub_p) | |
end | |
def set_sub_graph(sub_q, sub_p) | |
sub_g = BiGraph.new | |
get_sub_edges(sub_q, sub_p).map{|i, j| sub_g.set(i, j, @nodes[i => j])} | |
@bi.push sub_g if sub_g.weak_valid? | |
end | |
def delete_edge(i, j) | |
@out_degree[i] -= @nodes[i => j] | |
@in_degree[j] -= @nodes[i => j] | |
@nodes.delete(i => j) | |
@out_list[i].delete(j) | |
@in_list[j].delete(i) | |
end | |
def delete(i, j) | |
@out_list[i].map do |v| | |
@out_degree[i] -= @nodes[i => v] | |
@in_degree[v] -= @nodes[i => v] unless v == j | |
end | |
@in_list[j].map do |v| | |
@in_degree[j] -= @nodes[v => j] | |
@out_degree[v] -= @nodes[v => j] unless v == i | |
end | |
@out_list[i].map{|v| @nodes.delete(i => v)} | |
@in_list[j].map{|v| @nodes.delete(v => j)} | |
@out_list.delete(i) | |
@in_list.map{|v| v[1].delete(i)} | |
@in_list.delete(j) | |
@out_list.map{|v| v[1].delete(j)} | |
end | |
def weak_valid? | |
return @in_list.size != 0 && @out_list.size != 0 | |
end | |
def bulk_delete(sub_q, sub_p) | |
get_sub_edges(sub_q, sub_p).each do |i, j| | |
delete(i, j) | |
end | |
end | |
def biclique? | |
q_list = @out_degree.keys | |
p_list = @in_degree.keys | |
return q_list.uniq.product(p_list.uniq).inject(true){|r, v| | |
r &= @nodes[v[0] => v[1]] == 0 ? false : true | |
} | |
end | |
def split | |
#step 1 | |
#find query q, with out-degree is max | |
sub_q = @out_degree.max{|a,b|a[1] <=> b[1]}[0] | |
#list the neighbors of each q_k | |
sub_p = @out_list[sub_q] | |
#step 2 | |
large_q = [] | |
sub_p.each do |page| | |
large_q.push @in_list[page] | |
end | |
#step 3 | |
q_in = [] | |
q_in = large_q.inject([]){|a, b|a.empty? ? b : a & b} | |
#step 4 | |
set_sub_graph(q_in, sub_p) | |
bulk_delete(q_in, sub_p) | |
#step4.1と4.2の判定がよく分からないのでスルー | |
end | |
def sort | |
@in_list.map!{|k,v|v.sort!} | |
@out_list.map!{|k,v|v.sort!} | |
end | |
def find(query) | |
return @out_degree.include?(query) || @in_degree.include?(query) | |
end | |
def set(u, v, w = 1) | |
#u -- w --> v | |
@nodes[u=>v] = w | |
@out_list[u].push v | |
@in_list[v].push u | |
@out_degree[u] += w | |
@in_degree[v] += w | |
end | |
def clustering | |
sort | |
loop do | |
sum = @nodes.values.inject(0){|s,v|s+=v} | |
if sum != 0 && !biclique? | |
p sum | |
split | |
else | |
break | |
end | |
end | |
end | |
def check_answer | |
#答えとして得られたbicliqueが妥当かどうかのチェック | |
t = true | |
@bi.each do |sub_g| | |
if sub_g.biclique? | |
else | |
puts "Failed" | |
t = false | |
end | |
end | |
puts "All ok" if t | |
end | |
def get_cluster(query) | |
@bi.each do |sub_g| | |
if sub_g.find(query) | |
return sub_g | |
break | |
end | |
end | |
end | |
end | |
if __FILE__ == $0 | |
g = BiGraph.new | |
g.set(:x,:a,1) | |
g.set(:x,:b,2) | |
g.set(:y,:b,2) | |
g.set(:y,:c,1) | |
g.clustering | |
g.check_answer | |
puts "#{:y}'s subgraph is #{g.get_cluster(:y)}" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment