Last active
February 10, 2017 08:19
-
-
Save wjrl/54184da44de497591369f368ea17c691 to your computer and use it in GitHub Desktop.
Converts adjacency matrix text file to a .sif file and a .noa file for node layout
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv | |
import sys | |
import os | |
if len(sys.argv) != 3: | |
sys.exit('Usage: [python] adjToSif.py linkTag inFile') | |
linkTag = sys.argv[1] | |
fileName = sys.argv[2] | |
pre = os.path.splitext(fileName)[0] | |
outFileName = pre + ".sif" | |
noaFileName = pre + ".noa" | |
rowNames = [] | |
colNames = [] | |
allNames = set() | |
used = set() | |
with open(fileName,'rb') as tsvo: | |
with open(outFileName, 'wb') as sifo: | |
tsvin = csv.reader(tsvo, delimiter='\t') | |
sifout = csv.writer(sifo, delimiter='\t') | |
first = True | |
for row in tsvin: | |
if first: | |
first = False | |
colNames = map(str.strip, row) # Trim off whitespace | |
allNames |= set(colNames) | |
allNames = set(filter(None, allNames)) # Ditch empty string | |
continue | |
rowName = row[0].strip() | |
rowNames.append(rowName) | |
# Need brackets to keep string from being chopped into characters... | |
allNames |= set([rowName]) | |
count = 1; | |
for entry in row[1:]: | |
if entry == '1': | |
interact = [rowName, linkTag, colNames[count]] | |
sifout.writerow(interact) | |
used.add(rowName) | |
used.add(colNames[count]) | |
count += 1 | |
singletons = allNames.difference(used) | |
for singleton in singletons: | |
# Need brackets to keep string from being chopped into characters... | |
sifout.writerow([singleton]) | |
# | |
# If bipartite, this will order the row names before the column names. If square, | |
# the row order will be used: | |
# | |
written = set() | |
with open(noaFileName, 'wb') as noao: | |
noao.write("Node Row\n"); | |
nodeCount = 0; | |
rowThenCol = list(rowNames) | |
rowThenCol.extend(colNames) | |
for node in rowThenCol: | |
if (node in used) and (node not in written): | |
noao.write(node + " = " + str(nodeCount) + "\n"); | |
nodeCount += 1 | |
written.add(node) | |
print(written) | |
for node in rowThenCol: | |
if (node in singletons) and (node not in written): | |
noao.write(node + " = " + str(nodeCount) + "\n"); | |
nodeCount += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Given a tab-delimited adjacency matrix file "network.txt" (first row and first column are node names) with 0 and 1 entries, this Python script outputs a "network.sif" file, which you import into BioFabric using "File->Import->Import SIF File...". Note that singleton nodes (degree = 0) will be included in the .sif file as well and will appear in the BioFabric network as rows without links. When a .sif is loaded in, it uses the default layout, but this script also creates a "network.noa" node layout file. To use it, you load it in via "Layout->Layout Using Node Attributes...". The row names and column names in the adjacency matrix can be different. Nodes with degree > 0 will be ordered based first on row order, followed by column order for any nodes not yet seen, with singleton nodes being listed at the end. Thus, the ordering of the matrix rows and columns can be used to specify a custom layout. If this is e.g. a symmetric square matrix, both links will be output. If you specify on import that the links are not directed, the duplicate links will be dropped.