|
|
|
|
|
# LOAD |
|
|
|
from graph_tool.all import * |
|
|
|
print 'Loading graph...' |
|
g = load_graph('wnen30_noun_taxonomy.graphml.xml.gz') |
|
|
|
# read properties for synset nodes |
|
synsetid = g.vertex_properties['synsetid'] |
|
pos = g.vertex_properties['pos'] |
|
definition = g.vertex_properties['definition'] |
|
|
|
|
|
# COMPUTE |
|
|
|
print 'Computing topological sort...' |
|
topological_order = topological_sort(g) |
|
# print topological_order |
|
|
|
print 'Computing longest distances from root (entity synset is vertex 0)...' |
|
print 'Assigning initial distances...' |
|
longest_dist = g.new_vertex_property('int32_t') |
|
for v in g.vertices(): |
|
longest_dist[v] = -2147483648 # minimum int32_t number as -Inf ??? |
|
|
|
longest_dist[g.vertex(0)] = 0 # root is initialized to 0 |
|
|
|
from itertools import imap |
|
|
|
# unweighted topological sort (all edges have weight=1) |
|
# adapted from http://www.geeksforgeeks.org/find-longest-path-directed-acyclic-graph/ |
|
for u in imap(g.vertex, reversed(topological_order)): |
|
# print 'Considering node %d (current distance %d)' % (u, longest_dist[u]) |
|
for v in u.out_neighbours(): |
|
# print ' Comparing with node %d (current distance %d)' % (v, longest_dist[v]) |
|
if longest_dist[v] < longest_dist[u]+1: |
|
longest_dist[v] = longest_dist[u]+1 |
|
# print ' Updated distance to %d' % (longest_dist[v]) |
|
|
|
print 'Creating the longest path tree...' |
|
# all edges have to lead to a node with depth equal to longest_dist, otherwise they are part of a not-maximal path |
|
# in the case of a tie, an already seen node must not be taken twice |
|
from itertools import izip |
|
|
|
retain_longest = g.new_edge_property('bool') |
|
already_visited = set() |
|
|
|
def walk(node, depth): |
|
for edge, child in izip(node.out_edges(), node.out_neighbours()): |
|
if longest_dist[child] == depth and child not in already_visited: |
|
retain_longest[edge] = 1 |
|
already_visited.add(child) |
|
walk(child, depth+1) |
|
else: |
|
retain_longest[edge] = 0 |
|
|
|
tree = walk(g.vertex(0), 1) |
|
|
|
|
|
# EXPORT |
|
|
|
print 'Saving the graph (retained links are marked with "tree_link" = 1)...' |
|
g.edge_properties['tree_link'] = retain_longest |
|
g.save('wnen30_noun_tree_longest.graphml.xml.gz') |
Is there any program could I open the taxonomy?