talesa/adjacency-list-to-adjacency-matrix.py

## adjacency-list-to-adjacency-matrix.py

# coding: utf-8

# Importing libraries

# In[1]:


import pandas as pd
import numpy as np


# Loading data file as adjacency list

# In[2]:


adj_list = pd.read_excel('EXPERIMENTAL_ALL-gephi.xlsx')


# Checking the names of the columns

# In[3]:


adj_list.columns


# Extracting the list of unique species

# In[4]:


species = adj_list.Source.unique()


# Checking number of unique species

# In[5]:


species.size


# Initiating empty placeholder adjacency matrix with rows (index) and columns names set to species

# In[6]:


adj_matrix = pd.DataFrame(index=species, columns=species)


# For each row in the adjacency list set the appropriate cell in the matrix to appropriate weight

# In[7]:


for row in adj_list.itertuples():
    adj_matrix.loc[row.Source, row.Target] = row.Weight
    adj_matrix.loc[row.Target, row.Source] = row.Weight


# Ensure that all the nonexisting connections have weight set to 0

# In[8]:


adj_matrix = adj_matrix.fillna(0)


# Ensure the matrix is symmetric as it should be. `a.T` computes the transpose of the matrix, if the matrix is symmetric it should be equal to its transpose, hence `a` should be equal to `a.T`.

# In[9]:


a = adj_matrix.as_matrix()
assert(np.allclose(a, a.T))


# Ensure that there are twice as many non-empty cells in the adjacency matrix as there is records in the adjacency list, because each row in the list should appear twice in the matrix.

# In[10]:


assert((adj_matrix != 0).sum().sum() == 2*len(adj_list))


# Save to an Excel `xlsx` file.

# In[11]:


adj_matrix.to_excel('adj_matrix.xlsx')

	# coding: utf-8

	# Importing libraries

	# In[1]:


	import pandas as pd
	import numpy as np


	# Loading data file as adjacency list

	# In[2]:


	adj_list = pd.read_excel('EXPERIMENTAL_ALL-gephi.xlsx')


	# Checking the names of the columns

	# In[3]:


	adj_list.columns


	# Extracting the list of unique species

	# In[4]:


	species = adj_list.Source.unique()


	# Checking number of unique species

	# In[5]:


	species.size


	# Initiating empty placeholder adjacency matrix with rows (index) and columns names set to species

	# In[6]:


	adj_matrix = pd.DataFrame(index=species, columns=species)


	# For each row in the adjacency list set the appropriate cell in the matrix to appropriate weight

	# In[7]:


	for row in adj_list.itertuples():
	adj_matrix.loc[row.Source, row.Target] = row.Weight
	adj_matrix.loc[row.Target, row.Source] = row.Weight


	# Ensure that all the nonexisting connections have weight set to 0

	# In[8]:


	adj_matrix = adj_matrix.fillna(0)


	# Ensure the matrix is symmetric as it should be. `a.T` computes the transpose of the matrix, if the matrix is symmetric it should be equal to its transpose, hence `a` should be equal to `a.T`.

	# In[9]:


	a = adj_matrix.as_matrix()
	assert(np.allclose(a, a.T))


	# Ensure that there are twice as many non-empty cells in the adjacency matrix as there is records in the adjacency list, because each row in the list should appear twice in the matrix.

	# In[10]:


	assert((adj_matrix != 0).sum().sum() == 2*len(adj_list))


	# Save to an Excel `xlsx` file.

	# In[11]:


	adj_matrix.to_excel('adj_matrix.xlsx')