Skip to content

Instantly share code, notes, and snippets.

@edsu

edsu/authorship.py

Created Aug 12, 2013
Embed
What would you like to do?
#!/usr/bin/env python
"""
Queries a populated rdflib store (created with crawl.py), generates a
networkx graph of author/title links, and outputs as a PNG.
"""
from rdflib.graph import ConjunctiveGraph
from rdflib.namespace import Namespace
import networkx as nx
import matplotlib.pyplot as plt
dct = Namespace('http://purl.org/dc/terms/')
foaf = Namespace('http://xmlns.com/foaf/0.1/')
graph = ConjunctiveGraph('Sleepycat')
graph.open('store')
G = nx.Graph()
q = '''
SELECT ?title_uri ?title ?author_uri ?author
WHERE {
?title_uri dct:title ?title .
?title_uri dct:creator ?author_uri .
?author_uri foaf:name ?author .
}
'''
results = graph.query(q, initNs={'dct': dct, 'foaf': foaf})
# build networkx graph of authors and titles
for title_uri, title, author_uri, author in results:
G.add_node(author_uri, label=str(author), type='author')
G.add_node(title_uri, label=str(title), type='title')
G.add_edge(author_uri, title_uri)
print "%s -> %s" % (author_uri, title_uri)
# remove smallish clusters of 4 (makes the graph less cluttered)
for c in nx.connected_component_subgraphs(G):
if len(c) <= 4:
G.remove_nodes_from(c.nodes())
# create colors for titles and authors, and labels for authors who have
# written more than 1 book
node_colors = []
node_labels = {}
for node_id in G.nodes_iter():
node = G.node[node_id]
type = node['type']
label = node['label']
if type == 'author':
node_colors.append('r')
if G.degree(node_id) > 2:
node_labels[node_id] = label
else:
node_colors.append('g')
# draw the graph
pos = nx.graphviz_layout(G)
nx.draw(G,
pos=pos,
node_size=10,
node_color=node_colors,
labels=node_labels,
font_size=6,
vmin=80.0)
plt.savefig('authorship.png', dpi=500)
# finish up
graph.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment