gapato/adjacence.py

## adjacence.py
#!/usr/bin/python
from __future__ import division

import sys
import csv
import numpy            # MATLAB-like numerical library
import networkx as nx   # To plot the graph
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch, Circle

# Example

# 1,A,B,C,-,-,-,-
# 2,D,E,F,A,C,-,-
# 3,B,C,F,-,-,-,-
# 4,A,B,C,D,E,F,-

# [[ 0.  2.  3.  2.  2.  2.]
#  [ 2.  0.  3.  1.  1.  2.]
#  [ 3.  3.  0.  2.  2.  3.]
#  [ 2.  1.  2.  0.  2.  2.]
#  [ 2.  1.  2.  2.  0.  2.]
#  [ 2.  2.  3.  2.  2.  0.]]

try:
    # sys.argv is like argv in the main function of a C program
    # argv[0] is the name of the command and argv[1] is the first parameter
    filename = sys.argv[1]
except:
    raise ValueError("You must provide a CSV file to work on!")

# Keep track of author names and papers count
authors = []

# We don't know the number of author a priori so we use
# a matrix of size 10 that we will grow (exponentially) when needed
N = 10

def grow(A, papers):
    global N

    next_N = N*10

    # Create an empty matrix
    B = numpy.zeros((next_N, next_N))
    p = numpy.zeros(next_N, dtype=numpy.intc)

    # And copy existing data
    B[:N,:N] = A
    p[:N] = papers

    N = next_N

    return B, p

def add_author(name, inc=False):
    """ Helper function which adds a name to the list
    when not already in it, and returns the corresponding index
    """
    if name not in authors:
        authors.append(name)
    idx = authors.index(name)
    return idx

def draw_network(G,pos,ax,sg=None):

    for n in G:
        c=Circle(pos[n],radius=0.01,alpha=1)
        ax.add_patch(c)
        G.node[n]['patch']=c
        x,y=pos[n]
    seen={}
    for (u,v,d) in G.edges(data=True):
        n1=G.node[u]['patch']
        n2=G.node[v]['patch']
        rad=0.1
        if (u,v) in seen:
            rad=seen.get((u,v))
            rad=(rad+np.sign(rad)*0.1)*-1
        alpha=1
        color='k'

        e = FancyArrowPatch(n1.center,n2.center,patchA=n1,patchB=n2,
                            arrowstyle='-|>',
                            connectionstyle='arc3,rad=%s'%rad,
                            mutation_scale=10.0,
                            lw=2,
                            alpha=alpha,
                            color=color)
        seen[(u,v)]=rad
        ax.add_patch(e)
    return e

# Open the CSV file as f. It will be closed automatically
# when leaving this block
with open(filename) as f:

    # Our adjacency matrix
    A = numpy.zeros((N, N))
    papers = numpy.zeros(N)

    # The file is read line by line
    reader = csv.reader(f)
    for row in reader: # Easy to do a loop
        # Each line is a list of strings, corresponding to the values
        # between the comas in the file

        # Only keep the actual names
        author_names = filter(lambda x:x != "", row[1:])

        # When we need to know the index of the current value when looping,
        # use the `enumerate` function. Here k is the index.
        for k, author_name in enumerate(author_names):
            i = add_author(author_name, inc=True)
            if i > N-1:
                A, papers = grow(A, papers)
            papers[i] += 1
            for coauthors in author_names[k+1:]:
                j = add_author(coauthors)
                if j > N-1:
                    A, papers = grow(A, papers)

                # Fill 'er up! (not trying to be smart with symmetry)
                A[i,j] += 1
                A[j,i] += 1

    # Clip the matrix to the actual number of authors
    K = len(authors)
    A = A[:K, :K]
    papers = papers[:K]

    with open("table_data.txt", "w") as table_file:
        formatted_authors = map(lambda s:s.replace(" ", "-"), authors)
        table_file.write("-,{0}\n".format(",".join(formatted_authors)))
        for i in range(K):
            table_file.write(formatted_authors[i])
            for j in range(K):
                table_file.write(",{0}".format(A[i,j] if j > i else 0))
            table_file.write("\n")

    #idx = numpy.argsort(papers)
    #papers = papers[idx]
    #authors = [authors[idx[k]] for k in range(K)]
    #P = numpy.zeros((K, K))
    #for k in range(K):
        #P[idx[k], k] = 1

    #A = numpy.dot(P.T, numpy.dot(A, P))


    # Generate the graph and plot it
    #g = nx.from_numpy_matrix(A)

    #node_labels = dict(zip(range(K), authors))
    #g = nx.relabel_nodes(g, node_labels)

    #pos = nx.graphviz_layout(g)
    #pos = { k:(numpy.cos(2*numpy.pi*i/K), numpy.sin(2*numpy.pi*i/K)) for i,k in enumerate(authors) }

    #edge_labels=dict([((u,v,),int(d['weight'])) for u,v,d in g.edges(data=True)])

    #nx.draw_networkx_edge_labels(g,pos, edge_labels=edge_labels)
    #nx.draw_networkx(g, pos=pos, node_color="white", node_size=0, font_size=9)

    #ax=plt.gca()
    #draw_network(g,pos,ax)
    #ax.autoscale()

    #plt.axis('equal')
    #plt.axis('off')
    #plt.show()
	#!/usr/bin/python
	from __future__ import division

	import sys
	import csv
	import numpy # MATLAB-like numerical library
	import networkx as nx # To plot the graph
	import matplotlib.pyplot as plt
	from matplotlib.patches import FancyArrowPatch, Circle

	# Example

	# 1,A,B,C,-,-,-,-
	# 2,D,E,F,A,C,-,-
	# 3,B,C,F,-,-,-,-
	# 4,A,B,C,D,E,F,-

	# [[ 0. 2. 3. 2. 2. 2.]
	# [ 2. 0. 3. 1. 1. 2.]
	# [ 3. 3. 0. 2. 2. 3.]
	# [ 2. 1. 2. 0. 2. 2.]
	# [ 2. 1. 2. 2. 0. 2.]
	# [ 2. 2. 3. 2. 2. 0.]]

	try:
	# sys.argv is like argv in the main function of a C program
	# argv[0] is the name of the command and argv[1] is the first parameter
	filename = sys.argv[1]
	except:
	raise ValueError("You must provide a CSV file to work on!")

	# Keep track of author names and papers count
	authors = []

	# We don't know the number of author a priori so we use
	# a matrix of size 10 that we will grow (exponentially) when needed
	N = 10

	def grow(A, papers):
	global N

	next_N = N*10

	# Create an empty matrix
	B = numpy.zeros((next_N, next_N))
	p = numpy.zeros(next_N, dtype=numpy.intc)

	# And copy existing data
	B[:N,:N] = A
	p[:N] = papers

	N = next_N

	return B, p

	def add_author(name, inc=False):
	""" Helper function which adds a name to the list
	when not already in it, and returns the corresponding index
	"""
	if name not in authors:
	authors.append(name)
	idx = authors.index(name)
	return idx

	def draw_network(G,pos,ax,sg=None):

	for n in G:
	c=Circle(pos[n],radius=0.01,alpha=1)
	ax.add_patch(c)
	G.node[n]['patch']=c
	x,y=pos[n]
	seen={}
	for (u,v,d) in G.edges(data=True):
	n1=G.node[u]['patch']
	n2=G.node[v]['patch']
	rad=0.1
	if (u,v) in seen:
	rad=seen.get((u,v))
	rad=(rad+np.sign(rad)0.1)-1
	alpha=1
	color='k'

	e = FancyArrowPatch(n1.center,n2.center,patchA=n1,patchB=n2,
	arrowstyle='-\|>',
	connectionstyle='arc3,rad=%s'%rad,
	mutation_scale=10.0,
	lw=2,
	alpha=alpha,
	color=color)
	seen[(u,v)]=rad
	ax.add_patch(e)
	return e

	# Open the CSV file as f. It will be closed automatically
	# when leaving this block
	with open(filename) as f:

	# Our adjacency matrix
	A = numpy.zeros((N, N))
	papers = numpy.zeros(N)

	# The file is read line by line
	reader = csv.reader(f)
	for row in reader: # Easy to do a loop
	# Each line is a list of strings, corresponding to the values
	# between the comas in the file

	# Only keep the actual names
	author_names = filter(lambda x:x != "", row[1:])

	# When we need to know the index of the current value when looping,
	# use the `enumerate` function. Here k is the index.
	for k, author_name in enumerate(author_names):
	i = add_author(author_name, inc=True)
	if i > N-1:
	A, papers = grow(A, papers)
	papers[i] += 1
	for coauthors in author_names[k+1:]:
	j = add_author(coauthors)
	if j > N-1:
	A, papers = grow(A, papers)

	# Fill 'er up! (not trying to be smart with symmetry)
	A[i,j] += 1
	A[j,i] += 1

	# Clip the matrix to the actual number of authors
	K = len(authors)
	A = A[:K, :K]
	papers = papers[:K]

	with open("table_data.txt", "w") as table_file:
	formatted_authors = map(lambda s:s.replace(" ", "-"), authors)
	table_file.write("-,{0}\n".format(",".join(formatted_authors)))
	for i in range(K):
	table_file.write(formatted_authors[i])
	for j in range(K):
	table_file.write(",{0}".format(A[i,j] if j > i else 0))
	table_file.write("\n")

	#idx = numpy.argsort(papers)
	#papers = papers[idx]
	#authors = [authors[idx[k]] for k in range(K)]
	#P = numpy.zeros((K, K))
	#for k in range(K):
	#P[idx[k], k] = 1

	#A = numpy.dot(P.T, numpy.dot(A, P))



	# Generate the graph and plot it
	#g = nx.from_numpy_matrix(A)

	#node_labels = dict(zip(range(K), authors))
	#g = nx.relabel_nodes(g, node_labels)

	#pos = nx.graphviz_layout(g)
	#pos = { k:(numpy.cos(2numpy.pii/K), numpy.sin(2numpy.pii/K)) for i,k in enumerate(authors) }

	#edge_labels=dict([((u,v,),int(d['weight'])) for u,v,d in g.edges(data=True)])

	#nx.draw_networkx_edge_labels(g,pos, edge_labels=edge_labels)
	#nx.draw_networkx(g, pos=pos, node_color="white", node_size=0, font_size=9)

	#ax=plt.gca()
	#draw_network(g,pos,ax)
	#ax.autoscale()

	#plt.axis('equal')
	#plt.axis('off')
	#plt.show()