ringw/bicluster.py

## bicluster.py
import numpy as np

alphaCycle = np.loadtxt('alphaCycle.txt')
geneNames = np.loadtxt('alphaGenes.txt', dtype=str)
alphaDiscrete = np.zeros_like(alphaCycle, dtype=int)
alphaDiscrete[alphaCycle >= 1] = 1
alphaDiscrete[alphaCycle <= -1] = -1

print "# values >= 1:", np.sum(alphaDiscrete == 1)
print "# values <= -1:", np.sum(alphaDiscrete == -1)

# Undirected, unweighted bipartite graph
# Left nodes are in rows and right nodes are in columns
# Even nodes on right are positive nodes for each timepoint,
# odd are negative nodes
graph = np.zeros((alphaCycle.shape[0], alphaCycle.shape[1]*2), bool)
positiveL, positiveR = np.where(alphaDiscrete == 1)
# Scale right node index by 2
graph[positiveL, positiveR * 2] = True
negativeL, negativeR = np.where(alphaDiscrete == -1)
# Store negative node in odd indices
graph[negativeL, negativeR * 2 + 1] = True

def extract_bicluster():
    """ Find largest bi-cluster and set the edges to 0 so that the next
        bi-cluster can be found. """
    rightSubsets = dict()

    for l in xrange(graph.shape[0]):
        # Neighbor indices on right
        neighs, = np.where(graph[l])
        # Power set is size 2**(len(neighs))
        for p in xrange(1, 2**(len(neighs))):
            rightSubset = []
            # Extract nth bit from p
            for n in xrange(len(neighs)):
                if (p >> n) & 1:
                    rightSubset.append(neighs[n])
            # Make rightSubset hashable
            rightSubset = tuple(rightSubset)
            if rightSubset not in rightSubsets:
                # Only one left node is connected to subset so far
                subsetLeft = [l]
            else:
                subsetLeft = rightSubsets[rightSubset] + [l]
            rightSubsets[rightSubset] = subsetLeft

    maxSubset = 0
    subset = None
    for rightSubset in rightSubsets.keys():
        leftSubset = rightSubsets[rightSubset]
        # Complete subgraph, so number of edges is product of number of nodes
        # on left and right
        subsetSize = len(leftSubset) * len(rightSubset)
        if subsetSize > maxSubset:
            maxSubset = subsetSize
            subset = (leftSubset, list(rightSubset))

    leftSubset, rightSubset = subset
    print "Bicluster dimensions:", len(leftSubset), "genes,", len(rightSubset), "timepoints"
    print "Genes:", " ".join(list(geneNames[leftSubset]))
    positiveTimes = [t/2 for t in rightSubset if t % 2 == 0]
    if len(positiveTimes):
        print "Timepoints for positive expression:", positiveTimes
    negativeTimes = [t/2 for t in rightSubset if t % 2 == 1]
    if len(negativeTimes):
        print "Timepoints for negative expression:", negativeTimes
    print ""
    # Zero out this subset so that we can find another bicluster
    for left in leftSubset:
        graph[left, rightSubset] = 0

for i in xrange(5):
    extract_bicluster()

## bicluster2.py
import numpy as np

alphaCycle = np.loadtxt('alphaCycle.txt')
geneNames = np.loadtxt('alphaGenes.txt', dtype=str)
alphaDiscrete = np.zeros_like(alphaCycle, dtype=int)
alphaDiscrete[alphaCycle >= 1] = 1
alphaDiscrete[alphaCycle <= -1] = -1

print "# values >= 1:", np.sum(alphaDiscrete == 1)
print "# values <= -1:", np.sum(alphaDiscrete == -1)

# Undirected, unweighted bipartite graph
# Left nodes are in rows and right nodes are in columns
# Even nodes on right are positive nodes for each timepoint,
# odd are negative nodes
graph = np.zeros((alphaCycle.shape[0], alphaCycle.shape[1]*2), bool)
positiveL, positiveR = np.where(alphaDiscrete == 1)
# Scale right node index by 2
graph[positiveL, positiveR * 2] = True
negativeL, negativeR = np.where(alphaDiscrete == -1)
# Store negative node in odd indices
graph[negativeL, negativeR * 2 + 1] = True

def extract_bicluster():
    """ Find largest bi-cluster and set the edges to 0 so that the next
        bi-cluster can be found. """
    rightSubsets = dict()

    for l in xrange(graph.shape[0]):
        # Neighbor indices on right
        neighs, = np.where(graph[l])
        # Power set is size 2**(len(neighs))
        for p in xrange(1, 2**(len(neighs))):
            rightSubset = []
            # Extract nth bit from p
            for n in xrange(len(neighs)):
                if (p >> n) & 1:
                    rightSubset.append(neighs[n])
            # Make rightSubset hashable
            rightSubset = tuple(rightSubset)
            if rightSubset not in rightSubsets:
                # Only one left node is connected to subset so far
                subsetLeft = [l]
            else:
                subsetLeft = rightSubsets[rightSubset] + [l]
            rightSubsets[rightSubset] = subsetLeft

    maxSubset = 0
    subset = None
    for rightSubset in rightSubsets.keys():
        leftSubset = rightSubsets[rightSubset]
        # Complete subgraph, so number of edges is product of number of nodes
        # on left and right
        # Additionally, subtract 1 from each number of nodes so that we don't
        # select only one gene or only one timepoint.
        subsetSize = (len(leftSubset) - 1) * (len(rightSubset) - 1)
        if subsetSize > maxSubset:
            maxSubset = subsetSize
            subset = (leftSubset, list(rightSubset))

    leftSubset, rightSubset = subset
    print "Bicluster dimensions:", len(leftSubset), "genes,", len(rightSubset), "timepoints"
    print "Genes:", " ".join(list(geneNames[leftSubset]))
    positiveTimes = [t/2 for t in rightSubset if t % 2 == 0]
    if len(positiveTimes):
        print "Timepoints for positive expression:", positiveTimes
    negativeTimes = [t/2 for t in rightSubset if t % 2 == 1]
    if len(negativeTimes):
        print "Timepoints for negative expression:", negativeTimes
    print ""
    # Zero out this subset so that we can find another bicluster
    for left in leftSubset:
        graph[left, rightSubset] = 0

for i in xrange(5):
    extract_bicluster()
	import numpy as np

	alphaCycle = np.loadtxt('alphaCycle.txt')
	geneNames = np.loadtxt('alphaGenes.txt', dtype=str)
	alphaDiscrete = np.zeros_like(alphaCycle, dtype=int)
	alphaDiscrete[alphaCycle >= 1] = 1
	alphaDiscrete[alphaCycle <= -1] = -1

	print "# values >= 1:", np.sum(alphaDiscrete == 1)
	print "# values <= -1:", np.sum(alphaDiscrete == -1)

	# Undirected, unweighted bipartite graph
	# Left nodes are in rows and right nodes are in columns
	# Even nodes on right are positive nodes for each timepoint,
	# odd are negative nodes
	graph = np.zeros((alphaCycle.shape[0], alphaCycle.shape[1]*2), bool)
	positiveL, positiveR = np.where(alphaDiscrete == 1)
	# Scale right node index by 2
	graph[positiveL, positiveR * 2] = True
	negativeL, negativeR = np.where(alphaDiscrete == -1)
	# Store negative node in odd indices
	graph[negativeL, negativeR * 2 + 1] = True

	def extract_bicluster():
	""" Find largest bi-cluster and set the edges to 0 so that the next
	bi-cluster can be found. """
	rightSubsets = dict()

	for l in xrange(graph.shape[0]):
	# Neighbor indices on right
	neighs, = np.where(graph[l])
	# Power set is size 2**(len(neighs))
	for p in xrange(1, 2**(len(neighs))):
	rightSubset = []
	# Extract nth bit from p
	for n in xrange(len(neighs)):
	if (p >> n) & 1:
	rightSubset.append(neighs[n])
	# Make rightSubset hashable
	rightSubset = tuple(rightSubset)
	if rightSubset not in rightSubsets:
	# Only one left node is connected to subset so far
	subsetLeft = [l]
	else:
	subsetLeft = rightSubsets[rightSubset] + [l]
	rightSubsets[rightSubset] = subsetLeft

	maxSubset = 0
	subset = None
	for rightSubset in rightSubsets.keys():
	leftSubset = rightSubsets[rightSubset]
	# Complete subgraph, so number of edges is product of number of nodes
	# on left and right
	subsetSize = len(leftSubset) * len(rightSubset)
	if subsetSize > maxSubset:
	maxSubset = subsetSize
	subset = (leftSubset, list(rightSubset))

	leftSubset, rightSubset = subset
	print "Bicluster dimensions:", len(leftSubset), "genes,", len(rightSubset), "timepoints"
	print "Genes:", " ".join(list(geneNames[leftSubset]))
	positiveTimes = [t/2 for t in rightSubset if t % 2 == 0]
	if len(positiveTimes):
	print "Timepoints for positive expression:", positiveTimes
	negativeTimes = [t/2 for t in rightSubset if t % 2 == 1]
	if len(negativeTimes):
	print "Timepoints for negative expression:", negativeTimes
	print ""
	# Zero out this subset so that we can find another bicluster
	for left in leftSubset:
	graph[left, rightSubset] = 0

	for i in xrange(5):
	extract_bicluster()