Created
March 20, 2014 01:19
-
-
Save ringw/9655416 to your computer and use it in GitHub Desktop.
Daniel Ringwalt 02510 PS4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
alphaCycle = np.loadtxt('alphaCycle.txt') | |
geneNames = np.loadtxt('alphaGenes.txt', dtype=str) | |
alphaDiscrete = np.zeros_like(alphaCycle, dtype=int) | |
alphaDiscrete[alphaCycle >= 1] = 1 | |
alphaDiscrete[alphaCycle <= -1] = -1 | |
print "# values >= 1:", np.sum(alphaDiscrete == 1) | |
print "# values <= -1:", np.sum(alphaDiscrete == -1) | |
# Undirected, unweighted bipartite graph | |
# Left nodes are in rows and right nodes are in columns | |
# Even nodes on right are positive nodes for each timepoint, | |
# odd are negative nodes | |
graph = np.zeros((alphaCycle.shape[0], alphaCycle.shape[1]*2), bool) | |
positiveL, positiveR = np.where(alphaDiscrete == 1) | |
# Scale right node index by 2 | |
graph[positiveL, positiveR * 2] = True | |
negativeL, negativeR = np.where(alphaDiscrete == -1) | |
# Store negative node in odd indices | |
graph[negativeL, negativeR * 2 + 1] = True | |
def extract_bicluster(): | |
""" Find largest bi-cluster and set the edges to 0 so that the next | |
bi-cluster can be found. """ | |
rightSubsets = dict() | |
for l in xrange(graph.shape[0]): | |
# Neighbor indices on right | |
neighs, = np.where(graph[l]) | |
# Power set is size 2**(len(neighs)) | |
for p in xrange(1, 2**(len(neighs))): | |
rightSubset = [] | |
# Extract nth bit from p | |
for n in xrange(len(neighs)): | |
if (p >> n) & 1: | |
rightSubset.append(neighs[n]) | |
# Make rightSubset hashable | |
rightSubset = tuple(rightSubset) | |
if rightSubset not in rightSubsets: | |
# Only one left node is connected to subset so far | |
subsetLeft = [l] | |
else: | |
subsetLeft = rightSubsets[rightSubset] + [l] | |
rightSubsets[rightSubset] = subsetLeft | |
maxSubset = 0 | |
subset = None | |
for rightSubset in rightSubsets.keys(): | |
leftSubset = rightSubsets[rightSubset] | |
# Complete subgraph, so number of edges is product of number of nodes | |
# on left and right | |
subsetSize = len(leftSubset) * len(rightSubset) | |
if subsetSize > maxSubset: | |
maxSubset = subsetSize | |
subset = (leftSubset, list(rightSubset)) | |
leftSubset, rightSubset = subset | |
print "Bicluster dimensions:", len(leftSubset), "genes,", len(rightSubset), "timepoints" | |
print "Genes:", " ".join(list(geneNames[leftSubset])) | |
positiveTimes = [t/2 for t in rightSubset if t % 2 == 0] | |
if len(positiveTimes): | |
print "Timepoints for positive expression:", positiveTimes | |
negativeTimes = [t/2 for t in rightSubset if t % 2 == 1] | |
if len(negativeTimes): | |
print "Timepoints for negative expression:", negativeTimes | |
print "" | |
# Zero out this subset so that we can find another bicluster | |
for left in leftSubset: | |
graph[left, rightSubset] = 0 | |
for i in xrange(5): | |
extract_bicluster() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
alphaCycle = np.loadtxt('alphaCycle.txt') | |
geneNames = np.loadtxt('alphaGenes.txt', dtype=str) | |
alphaDiscrete = np.zeros_like(alphaCycle, dtype=int) | |
alphaDiscrete[alphaCycle >= 1] = 1 | |
alphaDiscrete[alphaCycle <= -1] = -1 | |
print "# values >= 1:", np.sum(alphaDiscrete == 1) | |
print "# values <= -1:", np.sum(alphaDiscrete == -1) | |
# Undirected, unweighted bipartite graph | |
# Left nodes are in rows and right nodes are in columns | |
# Even nodes on right are positive nodes for each timepoint, | |
# odd are negative nodes | |
graph = np.zeros((alphaCycle.shape[0], alphaCycle.shape[1]*2), bool) | |
positiveL, positiveR = np.where(alphaDiscrete == 1) | |
# Scale right node index by 2 | |
graph[positiveL, positiveR * 2] = True | |
negativeL, negativeR = np.where(alphaDiscrete == -1) | |
# Store negative node in odd indices | |
graph[negativeL, negativeR * 2 + 1] = True | |
def extract_bicluster(): | |
""" Find largest bi-cluster and set the edges to 0 so that the next | |
bi-cluster can be found. """ | |
rightSubsets = dict() | |
for l in xrange(graph.shape[0]): | |
# Neighbor indices on right | |
neighs, = np.where(graph[l]) | |
# Power set is size 2**(len(neighs)) | |
for p in xrange(1, 2**(len(neighs))): | |
rightSubset = [] | |
# Extract nth bit from p | |
for n in xrange(len(neighs)): | |
if (p >> n) & 1: | |
rightSubset.append(neighs[n]) | |
# Make rightSubset hashable | |
rightSubset = tuple(rightSubset) | |
if rightSubset not in rightSubsets: | |
# Only one left node is connected to subset so far | |
subsetLeft = [l] | |
else: | |
subsetLeft = rightSubsets[rightSubset] + [l] | |
rightSubsets[rightSubset] = subsetLeft | |
maxSubset = 0 | |
subset = None | |
for rightSubset in rightSubsets.keys(): | |
leftSubset = rightSubsets[rightSubset] | |
# Complete subgraph, so number of edges is product of number of nodes | |
# on left and right | |
# Additionally, subtract 1 from each number of nodes so that we don't | |
# select only one gene or only one timepoint. | |
subsetSize = (len(leftSubset) - 1) * (len(rightSubset) - 1) | |
if subsetSize > maxSubset: | |
maxSubset = subsetSize | |
subset = (leftSubset, list(rightSubset)) | |
leftSubset, rightSubset = subset | |
print "Bicluster dimensions:", len(leftSubset), "genes,", len(rightSubset), "timepoints" | |
print "Genes:", " ".join(list(geneNames[leftSubset])) | |
positiveTimes = [t/2 for t in rightSubset if t % 2 == 0] | |
if len(positiveTimes): | |
print "Timepoints for positive expression:", positiveTimes | |
negativeTimes = [t/2 for t in rightSubset if t % 2 == 1] | |
if len(negativeTimes): | |
print "Timepoints for negative expression:", negativeTimes | |
print "" | |
# Zero out this subset so that we can find another bicluster | |
for left in leftSubset: | |
graph[left, rightSubset] = 0 | |
for i in xrange(5): | |
extract_bicluster() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment