Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@gapato
Last active August 29, 2015 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gapato/05bfa41d2ef75e9ce392 to your computer and use it in GitHub Desktop.
Save gapato/05bfa41d2ef75e9ce392 to your computer and use it in GitHub Desktop.
Plot collaboration graph from list of papers
#!/usr/bin/python
from __future__ import division
import sys
import csv
import numpy # MATLAB-like numerical library
import networkx as nx # To plot the graph
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch, Circle
# Example
# 1,A,B,C,-,-,-,-
# 2,D,E,F,A,C,-,-
# 3,B,C,F,-,-,-,-
# 4,A,B,C,D,E,F,-
# [[ 0. 2. 3. 2. 2. 2.]
# [ 2. 0. 3. 1. 1. 2.]
# [ 3. 3. 0. 2. 2. 3.]
# [ 2. 1. 2. 0. 2. 2.]
# [ 2. 1. 2. 2. 0. 2.]
# [ 2. 2. 3. 2. 2. 0.]]
try:
# sys.argv is like argv in the main function of a C program
# argv[0] is the name of the command and argv[1] is the first parameter
filename = sys.argv[1]
except:
raise ValueError("You must provide a CSV file to work on!")
# Keep track of author names and papers count
authors = []
# We don't know the number of author a priori so we use
# a matrix of size 10 that we will grow (exponentially) when needed
N = 10
def grow(A, papers):
global N
next_N = N*10
# Create an empty matrix
B = numpy.zeros((next_N, next_N))
p = numpy.zeros(next_N, dtype=numpy.intc)
# And copy existing data
B[:N,:N] = A
p[:N] = papers
N = next_N
return B, p
def add_author(name, inc=False):
""" Helper function which adds a name to the list
when not already in it, and returns the corresponding index
"""
if name not in authors:
authors.append(name)
idx = authors.index(name)
return idx
def draw_network(G,pos,ax,sg=None):
for n in G:
c=Circle(pos[n],radius=0.01,alpha=1)
ax.add_patch(c)
G.node[n]['patch']=c
x,y=pos[n]
seen={}
for (u,v,d) in G.edges(data=True):
n1=G.node[u]['patch']
n2=G.node[v]['patch']
rad=0.1
if (u,v) in seen:
rad=seen.get((u,v))
rad=(rad+np.sign(rad)*0.1)*-1
alpha=1
color='k'
e = FancyArrowPatch(n1.center,n2.center,patchA=n1,patchB=n2,
arrowstyle='-|>',
connectionstyle='arc3,rad=%s'%rad,
mutation_scale=10.0,
lw=2,
alpha=alpha,
color=color)
seen[(u,v)]=rad
ax.add_patch(e)
return e
# Open the CSV file as f. It will be closed automatically
# when leaving this block
with open(filename) as f:
# Our adjacency matrix
A = numpy.zeros((N, N))
papers = numpy.zeros(N)
# The file is read line by line
reader = csv.reader(f)
for row in reader: # Easy to do a loop
# Each line is a list of strings, corresponding to the values
# between the comas in the file
# Only keep the actual names
author_names = filter(lambda x:x != "", row[1:])
# When we need to know the index of the current value when looping,
# use the `enumerate` function. Here k is the index.
for k, author_name in enumerate(author_names):
i = add_author(author_name, inc=True)
if i > N-1:
A, papers = grow(A, papers)
papers[i] += 1
for coauthors in author_names[k+1:]:
j = add_author(coauthors)
if j > N-1:
A, papers = grow(A, papers)
# Fill 'er up! (not trying to be smart with symmetry)
A[i,j] += 1
A[j,i] += 1
# Clip the matrix to the actual number of authors
K = len(authors)
A = A[:K, :K]
papers = papers[:K]
with open("table_data.txt", "w") as table_file:
formatted_authors = map(lambda s:s.replace(" ", "-"), authors)
table_file.write("-,{0}\n".format(",".join(formatted_authors)))
for i in range(K):
table_file.write(formatted_authors[i])
for j in range(K):
table_file.write(",{0}".format(A[i,j] if j > i else 0))
table_file.write("\n")
#idx = numpy.argsort(papers)
#papers = papers[idx]
#authors = [authors[idx[k]] for k in range(K)]
#P = numpy.zeros((K, K))
#for k in range(K):
#P[idx[k], k] = 1
#A = numpy.dot(P.T, numpy.dot(A, P))
# Generate the graph and plot it
#g = nx.from_numpy_matrix(A)
#node_labels = dict(zip(range(K), authors))
#g = nx.relabel_nodes(g, node_labels)
#pos = nx.graphviz_layout(g)
#pos = { k:(numpy.cos(2*numpy.pi*i/K), numpy.sin(2*numpy.pi*i/K)) for i,k in enumerate(authors) }
#edge_labels=dict([((u,v,),int(d['weight'])) for u,v,d in g.edges(data=True)])
#nx.draw_networkx_edge_labels(g,pos, edge_labels=edge_labels)
#nx.draw_networkx(g, pos=pos, node_color="white", node_size=0, font_size=9)
#ax=plt.gca()
#draw_network(g,pos,ax)
#ax.autoscale()
#plt.axis('equal')
#plt.axis('off')
#plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment