Last active
August 29, 2015 14:19
-
-
Save gapato/05bfa41d2ef75e9ce392 to your computer and use it in GitHub Desktop.
Plot collaboration graph from list of papers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from __future__ import division | |
import sys | |
import csv | |
import numpy # MATLAB-like numerical library | |
import networkx as nx # To plot the graph | |
import matplotlib.pyplot as plt | |
from matplotlib.patches import FancyArrowPatch, Circle | |
# Example | |
# 1,A,B,C,-,-,-,- | |
# 2,D,E,F,A,C,-,- | |
# 3,B,C,F,-,-,-,- | |
# 4,A,B,C,D,E,F,- | |
# [[ 0. 2. 3. 2. 2. 2.] | |
# [ 2. 0. 3. 1. 1. 2.] | |
# [ 3. 3. 0. 2. 2. 3.] | |
# [ 2. 1. 2. 0. 2. 2.] | |
# [ 2. 1. 2. 2. 0. 2.] | |
# [ 2. 2. 3. 2. 2. 0.]] | |
try: | |
# sys.argv is like argv in the main function of a C program | |
# argv[0] is the name of the command and argv[1] is the first parameter | |
filename = sys.argv[1] | |
except: | |
raise ValueError("You must provide a CSV file to work on!") | |
# Keep track of author names and papers count | |
authors = [] | |
# We don't know the number of author a priori so we use | |
# a matrix of size 10 that we will grow (exponentially) when needed | |
N = 10 | |
def grow(A, papers): | |
global N | |
next_N = N*10 | |
# Create an empty matrix | |
B = numpy.zeros((next_N, next_N)) | |
p = numpy.zeros(next_N, dtype=numpy.intc) | |
# And copy existing data | |
B[:N,:N] = A | |
p[:N] = papers | |
N = next_N | |
return B, p | |
def add_author(name, inc=False): | |
""" Helper function which adds a name to the list | |
when not already in it, and returns the corresponding index | |
""" | |
if name not in authors: | |
authors.append(name) | |
idx = authors.index(name) | |
return idx | |
def draw_network(G,pos,ax,sg=None): | |
for n in G: | |
c=Circle(pos[n],radius=0.01,alpha=1) | |
ax.add_patch(c) | |
G.node[n]['patch']=c | |
x,y=pos[n] | |
seen={} | |
for (u,v,d) in G.edges(data=True): | |
n1=G.node[u]['patch'] | |
n2=G.node[v]['patch'] | |
rad=0.1 | |
if (u,v) in seen: | |
rad=seen.get((u,v)) | |
rad=(rad+np.sign(rad)*0.1)*-1 | |
alpha=1 | |
color='k' | |
e = FancyArrowPatch(n1.center,n2.center,patchA=n1,patchB=n2, | |
arrowstyle='-|>', | |
connectionstyle='arc3,rad=%s'%rad, | |
mutation_scale=10.0, | |
lw=2, | |
alpha=alpha, | |
color=color) | |
seen[(u,v)]=rad | |
ax.add_patch(e) | |
return e | |
# Open the CSV file as f. It will be closed automatically | |
# when leaving this block | |
with open(filename) as f: | |
# Our adjacency matrix | |
A = numpy.zeros((N, N)) | |
papers = numpy.zeros(N) | |
# The file is read line by line | |
reader = csv.reader(f) | |
for row in reader: # Easy to do a loop | |
# Each line is a list of strings, corresponding to the values | |
# between the comas in the file | |
# Only keep the actual names | |
author_names = filter(lambda x:x != "", row[1:]) | |
# When we need to know the index of the current value when looping, | |
# use the `enumerate` function. Here k is the index. | |
for k, author_name in enumerate(author_names): | |
i = add_author(author_name, inc=True) | |
if i > N-1: | |
A, papers = grow(A, papers) | |
papers[i] += 1 | |
for coauthors in author_names[k+1:]: | |
j = add_author(coauthors) | |
if j > N-1: | |
A, papers = grow(A, papers) | |
# Fill 'er up! (not trying to be smart with symmetry) | |
A[i,j] += 1 | |
A[j,i] += 1 | |
# Clip the matrix to the actual number of authors | |
K = len(authors) | |
A = A[:K, :K] | |
papers = papers[:K] | |
with open("table_data.txt", "w") as table_file: | |
formatted_authors = map(lambda s:s.replace(" ", "-"), authors) | |
table_file.write("-,{0}\n".format(",".join(formatted_authors))) | |
for i in range(K): | |
table_file.write(formatted_authors[i]) | |
for j in range(K): | |
table_file.write(",{0}".format(A[i,j] if j > i else 0)) | |
table_file.write("\n") | |
#idx = numpy.argsort(papers) | |
#papers = papers[idx] | |
#authors = [authors[idx[k]] for k in range(K)] | |
#P = numpy.zeros((K, K)) | |
#for k in range(K): | |
#P[idx[k], k] = 1 | |
#A = numpy.dot(P.T, numpy.dot(A, P)) | |
# Generate the graph and plot it | |
#g = nx.from_numpy_matrix(A) | |
#node_labels = dict(zip(range(K), authors)) | |
#g = nx.relabel_nodes(g, node_labels) | |
#pos = nx.graphviz_layout(g) | |
#pos = { k:(numpy.cos(2*numpy.pi*i/K), numpy.sin(2*numpy.pi*i/K)) for i,k in enumerate(authors) } | |
#edge_labels=dict([((u,v,),int(d['weight'])) for u,v,d in g.edges(data=True)]) | |
#nx.draw_networkx_edge_labels(g,pos, edge_labels=edge_labels) | |
#nx.draw_networkx(g, pos=pos, node_color="white", node_size=0, font_size=9) | |
#ax=plt.gca() | |
#draw_network(g,pos,ax) | |
#ax.autoscale() | |
#plt.axis('equal') | |
#plt.axis('off') | |
#plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment