Skip to content

Instantly share code, notes, and snippets.

@SirVer
Created August 16, 2012 17:25
Show Gist options
  • Save SirVer/3371833 to your computer and use it in GitHub Desktop.
Save SirVer/3371833 to your computer and use it in GitHub Desktop.
Actor centrality with RBTrees as containers
load("examples/rbtree.jl")
type RBTreeSet{K}
tree::RBTree{K,Bool}
RBTreeSet() = new(RBTree{K, Bool}())
end
add(set::RBTreeSet, key) = insert_or_modify(set.tree, true, key)
length(set::RBTreeSet) = length(set.tree)
start(set::RBTreeSet) = start(set.tree)
done(set::RBTreeSet, a) = done(set.tree, a)
isempty(set::RBTreeSet) = length(set.tree)== 0
function next(set::RBTreeSet, state)
value, nstate = next(set.tree, state)
return value[1], nstate
end
type Node
name::UTF8String
n::RBTreeSet{Node}
Node(name) = new(name, RBTreeSet{Node}())
end
function isless(n1::Node, n2::Node)
n1.name < n2.name
end
typealias Graph RBTree{UTF8String, Node}
function get(G::Graph, name)
name = convert(UTF8String, name)
if has(G, name)
return G[name]
end
G[name] = Node(name)
end
function centrality_mean(G::Graph, start_node)
dists = RBTree{Node,Int64}()
next = RBTreeSet{Node}(); add(next, G[start_node])
cdist = 0
while !isempty(next)
nnext = RBTreeSet{Node}()
for n in next
if !has(dists, n)
dists[n] = cdist
for neigh in n.n
add(nnext, neigh)
end
end
end
cdist += 1
next = nnext
end
# rv = 0
# nitems = 0
# for (k,v) in dists
# rv += v
# nitems += 1
# end
# return rv/nitems
mean([ v for (k,v) in dists ])
end
function read_graph()
G = Graph()
actors = RBTreeSet{UTF8String}()
open("/Users/sirver/Desktop/Programming/Python/learning/udacity/cs215/ps4/imdb-1.tsv", "r") do io
while !eof(io)
k = split(strip(readline(io)), "\t")
actor, movie = k[1], join(k[2:3], "_")
ac, mn = get(G, actor), get(G, movie)
add(actors, convert(UTF8String, actor))
add(ac.n, mn)
add(mn.n, ac)
end
end
G, sort!([ a for a in actors])
end
function main()
G, actors = read_graph()
d = RBTree{UTF8String, Float64}()
for a in actors[1:100]
a = convert(UTF8String, a)
d[a] = centrality_mean(G, a)
print("$a: ", d[a], "\n")
end
for (k,v) in d
print("$k:$v\n")
end
vals = sort!([(v,k) for (k,v) in d])
for i=1:20
print("$i: ", vals[i], "\n")
end
end
print("Elapsed: ", @elapsed main(), "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment