Created
August 16, 2012 17:25
-
-
Save SirVer/3371833 to your computer and use it in GitHub Desktop.
Actor centrality with RBTrees as containers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
load("examples/rbtree.jl") | |
type RBTreeSet{K} | |
tree::RBTree{K,Bool} | |
RBTreeSet() = new(RBTree{K, Bool}()) | |
end | |
add(set::RBTreeSet, key) = insert_or_modify(set.tree, true, key) | |
length(set::RBTreeSet) = length(set.tree) | |
start(set::RBTreeSet) = start(set.tree) | |
done(set::RBTreeSet, a) = done(set.tree, a) | |
isempty(set::RBTreeSet) = length(set.tree)== 0 | |
function next(set::RBTreeSet, state) | |
value, nstate = next(set.tree, state) | |
return value[1], nstate | |
end | |
type Node | |
name::UTF8String | |
n::RBTreeSet{Node} | |
Node(name) = new(name, RBTreeSet{Node}()) | |
end | |
function isless(n1::Node, n2::Node) | |
n1.name < n2.name | |
end | |
typealias Graph RBTree{UTF8String, Node} | |
function get(G::Graph, name) | |
name = convert(UTF8String, name) | |
if has(G, name) | |
return G[name] | |
end | |
G[name] = Node(name) | |
end | |
function centrality_mean(G::Graph, start_node) | |
dists = RBTree{Node,Int64}() | |
next = RBTreeSet{Node}(); add(next, G[start_node]) | |
cdist = 0 | |
while !isempty(next) | |
nnext = RBTreeSet{Node}() | |
for n in next | |
if !has(dists, n) | |
dists[n] = cdist | |
for neigh in n.n | |
add(nnext, neigh) | |
end | |
end | |
end | |
cdist += 1 | |
next = nnext | |
end | |
# rv = 0 | |
# nitems = 0 | |
# for (k,v) in dists | |
# rv += v | |
# nitems += 1 | |
# end | |
# return rv/nitems | |
mean([ v for (k,v) in dists ]) | |
end | |
function read_graph() | |
G = Graph() | |
actors = RBTreeSet{UTF8String}() | |
open("/Users/sirver/Desktop/Programming/Python/learning/udacity/cs215/ps4/imdb-1.tsv", "r") do io | |
while !eof(io) | |
k = split(strip(readline(io)), "\t") | |
actor, movie = k[1], join(k[2:3], "_") | |
ac, mn = get(G, actor), get(G, movie) | |
add(actors, convert(UTF8String, actor)) | |
add(ac.n, mn) | |
add(mn.n, ac) | |
end | |
end | |
G, sort!([ a for a in actors]) | |
end | |
function main() | |
G, actors = read_graph() | |
d = RBTree{UTF8String, Float64}() | |
for a in actors[1:100] | |
a = convert(UTF8String, a) | |
d[a] = centrality_mean(G, a) | |
print("$a: ", d[a], "\n") | |
end | |
for (k,v) in d | |
print("$k:$v\n") | |
end | |
vals = sort!([(v,k) for (k,v) in d]) | |
for i=1:20 | |
print("$i: ", vals[i], "\n") | |
end | |
end | |
print("Elapsed: ", @elapsed main(), "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment