Skip to content

Instantly share code, notes, and snippets.

@ringw
Created March 20, 2014 01:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ringw/9655416 to your computer and use it in GitHub Desktop.
Save ringw/9655416 to your computer and use it in GitHub Desktop.
Daniel Ringwalt 02510 PS4
import numpy as np
alphaCycle = np.loadtxt('alphaCycle.txt')
geneNames = np.loadtxt('alphaGenes.txt', dtype=str)
alphaDiscrete = np.zeros_like(alphaCycle, dtype=int)
alphaDiscrete[alphaCycle >= 1] = 1
alphaDiscrete[alphaCycle <= -1] = -1
print "# values >= 1:", np.sum(alphaDiscrete == 1)
print "# values <= -1:", np.sum(alphaDiscrete == -1)
# Undirected, unweighted bipartite graph
# Left nodes are in rows and right nodes are in columns
# Even nodes on right are positive nodes for each timepoint,
# odd are negative nodes
graph = np.zeros((alphaCycle.shape[0], alphaCycle.shape[1]*2), bool)
positiveL, positiveR = np.where(alphaDiscrete == 1)
# Scale right node index by 2
graph[positiveL, positiveR * 2] = True
negativeL, negativeR = np.where(alphaDiscrete == -1)
# Store negative node in odd indices
graph[negativeL, negativeR * 2 + 1] = True
def extract_bicluster():
""" Find largest bi-cluster and set the edges to 0 so that the next
bi-cluster can be found. """
rightSubsets = dict()
for l in xrange(graph.shape[0]):
# Neighbor indices on right
neighs, = np.where(graph[l])
# Power set is size 2**(len(neighs))
for p in xrange(1, 2**(len(neighs))):
rightSubset = []
# Extract nth bit from p
for n in xrange(len(neighs)):
if (p >> n) & 1:
rightSubset.append(neighs[n])
# Make rightSubset hashable
rightSubset = tuple(rightSubset)
if rightSubset not in rightSubsets:
# Only one left node is connected to subset so far
subsetLeft = [l]
else:
subsetLeft = rightSubsets[rightSubset] + [l]
rightSubsets[rightSubset] = subsetLeft
maxSubset = 0
subset = None
for rightSubset in rightSubsets.keys():
leftSubset = rightSubsets[rightSubset]
# Complete subgraph, so number of edges is product of number of nodes
# on left and right
subsetSize = len(leftSubset) * len(rightSubset)
if subsetSize > maxSubset:
maxSubset = subsetSize
subset = (leftSubset, list(rightSubset))
leftSubset, rightSubset = subset
print "Bicluster dimensions:", len(leftSubset), "genes,", len(rightSubset), "timepoints"
print "Genes:", " ".join(list(geneNames[leftSubset]))
positiveTimes = [t/2 for t in rightSubset if t % 2 == 0]
if len(positiveTimes):
print "Timepoints for positive expression:", positiveTimes
negativeTimes = [t/2 for t in rightSubset if t % 2 == 1]
if len(negativeTimes):
print "Timepoints for negative expression:", negativeTimes
print ""
# Zero out this subset so that we can find another bicluster
for left in leftSubset:
graph[left, rightSubset] = 0
for i in xrange(5):
extract_bicluster()
import numpy as np
alphaCycle = np.loadtxt('alphaCycle.txt')
geneNames = np.loadtxt('alphaGenes.txt', dtype=str)
alphaDiscrete = np.zeros_like(alphaCycle, dtype=int)
alphaDiscrete[alphaCycle >= 1] = 1
alphaDiscrete[alphaCycle <= -1] = -1
print "# values >= 1:", np.sum(alphaDiscrete == 1)
print "# values <= -1:", np.sum(alphaDiscrete == -1)
# Undirected, unweighted bipartite graph
# Left nodes are in rows and right nodes are in columns
# Even nodes on right are positive nodes for each timepoint,
# odd are negative nodes
graph = np.zeros((alphaCycle.shape[0], alphaCycle.shape[1]*2), bool)
positiveL, positiveR = np.where(alphaDiscrete == 1)
# Scale right node index by 2
graph[positiveL, positiveR * 2] = True
negativeL, negativeR = np.where(alphaDiscrete == -1)
# Store negative node in odd indices
graph[negativeL, negativeR * 2 + 1] = True
def extract_bicluster():
""" Find largest bi-cluster and set the edges to 0 so that the next
bi-cluster can be found. """
rightSubsets = dict()
for l in xrange(graph.shape[0]):
# Neighbor indices on right
neighs, = np.where(graph[l])
# Power set is size 2**(len(neighs))
for p in xrange(1, 2**(len(neighs))):
rightSubset = []
# Extract nth bit from p
for n in xrange(len(neighs)):
if (p >> n) & 1:
rightSubset.append(neighs[n])
# Make rightSubset hashable
rightSubset = tuple(rightSubset)
if rightSubset not in rightSubsets:
# Only one left node is connected to subset so far
subsetLeft = [l]
else:
subsetLeft = rightSubsets[rightSubset] + [l]
rightSubsets[rightSubset] = subsetLeft
maxSubset = 0
subset = None
for rightSubset in rightSubsets.keys():
leftSubset = rightSubsets[rightSubset]
# Complete subgraph, so number of edges is product of number of nodes
# on left and right
# Additionally, subtract 1 from each number of nodes so that we don't
# select only one gene or only one timepoint.
subsetSize = (len(leftSubset) - 1) * (len(rightSubset) - 1)
if subsetSize > maxSubset:
maxSubset = subsetSize
subset = (leftSubset, list(rightSubset))
leftSubset, rightSubset = subset
print "Bicluster dimensions:", len(leftSubset), "genes,", len(rightSubset), "timepoints"
print "Genes:", " ".join(list(geneNames[leftSubset]))
positiveTimes = [t/2 for t in rightSubset if t % 2 == 0]
if len(positiveTimes):
print "Timepoints for positive expression:", positiveTimes
negativeTimes = [t/2 for t in rightSubset if t % 2 == 1]
if len(negativeTimes):
print "Timepoints for negative expression:", negativeTimes
print ""
# Zero out this subset so that we can find another bicluster
for left in leftSubset:
graph[left, rightSubset] = 0
for i in xrange(5):
extract_bicluster()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment