-
-
Save mjbommar/a645be7a0c66265b98f8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
@date Nov 27, 2009 | |
@author: Michael Bommarito | |
@contact michael.bommarito@gmail.com | |
This is provided for purely academic purposes. | |
''' | |
import igraph,cairo | |
# Set of nodes | |
nodes = set() | |
# List of edges - edges can have multiplicity > 1 | |
edges = [] | |
# Read in the data from emailList.txt, which stores the output of parseEmails.py | |
emailData = [line.split(',') for line in open('emailList.txt').read().strip().splitlines()] | |
# Iterate over all emails | |
for email in emailData: | |
# Update the node set | |
nodes.update(email[2:]) | |
# Now update the edge list | |
for b in email[3:]: | |
edges.append((email[2],b)) | |
''' | |
Convert the node set into a sorted list so we can index the nodes to integer. | |
This is necessary for igraph. | |
Then build the node map which goes from (node->index) for fast indexing next step. | |
''' | |
nodes = sorted(list(nodes)) | |
nodeMap = dict([(v,i) for i,v in enumerate(nodes)]) | |
''' | |
This line is tricky but fast - we want a dictionary with key = integer-indexed edge | |
and value = edge multiplicity. This line does that. | |
At this point, our edgelist is edgeDict.keys() and our edgeweights are edgeDict.values(). | |
''' | |
edgeDict = dict([((nodeMap[e[0]],nodeMap[e[1]]),edges.count(e)) for e in set(edges)]) | |
edges = edgeDict.keys() | |
weights = edgeDict.values() | |
# Create the graph! | |
g = igraph.Graph(edges, directed = True) | |
''' | |
Now do some cosmetic stuff. | |
Label the nodes and then resize them and the edges. | |
''' | |
degree = g.degree() | |
for i,v in enumerate(g.vs): | |
v['label'] = nodes[i].split('@')[1] | |
v['label_color'] = 'red' | |
v['label_size'] = 8 | |
v['label_dist'] = 0 | |
v['size'] = degree[i] / 10.0 | |
v['color'] = 'blue' | |
for e in g.es: | |
e['width'] = weights[i]/5.0 | |
e['color'] = 'grey' | |
e['arrow_width'] = 0.4 | |
e['arrow_size'] = 0.4 | |
''' | |
Drop the pieces out of the largest weakly connected component. | |
We go from: | |
Directed graph (|V| = 2239, |E| = 4513) | |
Directed graph (|V| = 2261, |E| = 4529) | |
''' | |
g = g.clusters(mode = igraph.WEAK).subgraph(0) | |
weights = [e['width'] for e in g.es] | |
''' | |
Layout and plot the graph. | |
The layout is 3-step: randomize, apply grid-FR to get close to home, then apply real FR with weighting. | |
Deal with bbox and margin size stuff too... | |
Then output to SVG. | |
I manually backlit this in Inkscape. | |
''' | |
layout = g.layout_random() | |
layout = g.layout_grid_fruchterman_reingold(maxiter = 100, seed = layout) | |
layout = g.layout_fruchterman_reingold(maxiter=250, weights = weights, seed = layout) | |
pixels = 8000 | |
margin = 50 | |
svg = cairo.SVGSurface("emails.svg", pixels, pixels) | |
p = igraph.drawing.plot(g, target = svg, layout = layout, bbox = (0,0,pixels,pixels), margin = (0,0,margin,margin)) | |
p.save() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment