kvchen/block_lu_broken.py

## block_lu_broken.py
@ray.remote
def lu_decomp_invert(lu_decomp):
    """Takes the inverse of each of the components in the P, L, U
    decomposition. Needed as a helper function for the block-level LU decomp.
    """
    return tuple(np.linalg.inv(x) for x in lu_decomp)


@ray.remote(num_return_vals=3)
def block_lu(a, block_size=100):
    """Returns the LU decomposition of a square matrix.

    Parameters
    ----------
    a : array_like

    Returns
    -------
    p : array_like
    l : array_like
    u : array_like
    """
    if a.shape[0] <= block_size or a.shape[1] <= block_size:
        return ray.get(ra.linalg.lu.remote(a))

    p, l, u = np.zeros(a.shape), np.zeros(a.shape), np.zeros(a.shape)
    num_blocks = int(np.ceil(float(a.shape[0]) / block_size))

    # Compute all single-node LU decompositions in parallel

    block_decomps_remote = []
    a_modified = a

    for idx in range(num_blocks):
        a11 = a_modified[:block_size, :block_size]
        block_decomps_remote.append(ra.linalg.lu.remote(a11))

        a12 = a_modified[:block_size, block_size:]
        a21 = a_modified[block_size:, :block_size]

        # Compute the Schur complements

        a22 = a_modified[block_size:, block_size:]
        a_modified = a22 - np.dot(a21, np.dot(np.linalg.inv(a11), a12))

    block_decomps = ray.get(block_decomps_remote)

    # Compute the inverses for each of the LU components in parallel

    block_decomp_inverses = ray.get([lu_decomp_invert.remote(decomp)
                                     for decomp in block_decomps_remote])

    for idx, (p11, _, _) in enumerate(block_decomps):
        block_low = block_size * idx
        block_high = block_low + block_size

        p[block_low:block_high, block_low:block_high] = p11

    # Perform coalescing

    for idx, (plu, plu_inverse) in enumerate(zip(block_decomps,
                                                 block_decomp_inverses)):
        p11, l11, u11 = plu
        p11_inverse, l11_inverse, u11_inverse = plu_inverse

        block_low = block_size * idx
        block_high = block_low + block_size

        # p[block_low:block_high, block_low:block_high] = p11

        if idx < num_blocks - 1:
            a12 = a[:block_size, block_size:]
            a21 = a[block_size:, :block_size]
            a22 = a[block_size:, block_size:]

            # Inverse of the permutation matrix is just its transpose

            p22_inverse = p[block_high:, block_high:].T
            # p22 = p[block_high:, block_high:]

            # TODO(kvchen): Change this to use distributed block
            # multiplication. Not sure if it'll be any more efficient.

            # u12 = np.dot(l11_inverse, np.dot(p11_inverse, a12))
            # l21 = np.dot(p22_inverse, np.dot(a21, u11_inverse))

            u12, l21 = ray.get([
                ra.dot.remote(l11_inverse, ra.dot.remote(p11_inverse, a12)),
                ra.dot.remote(p22_inverse, ra.dot.remote(a21, u11_inverse)),
            ])

            l[block_high:, block_low:block_high] = l21
            u[block_low:block_high, block_high:] = u12

            # Take the Schur complement and recurse

            a = a22 - np.dot(l21, u12)

        l[block_low:block_high, block_low:block_high] = l11
        u[block_low:block_high, block_low:block_high] = u11

    return p, l, u
	@ray.remote
	def lu_decomp_invert(lu_decomp):
	"""Takes the inverse of each of the components in the P, L, U
	decomposition. Needed as a helper function for the block-level LU decomp.
	"""
	return tuple(np.linalg.inv(x) for x in lu_decomp)


	@ray.remote(num_return_vals=3)
	def block_lu(a, block_size=100):
	"""Returns the LU decomposition of a square matrix.

	Parameters
	----------
	a : array_like

	Returns
	-------
	p : array_like
	l : array_like
	u : array_like
	"""
	if a.shape[0] <= block_size or a.shape[1] <= block_size:
	return ray.get(ra.linalg.lu.remote(a))

	p, l, u = np.zeros(a.shape), np.zeros(a.shape), np.zeros(a.shape)
	num_blocks = int(np.ceil(float(a.shape[0]) / block_size))

	# Compute all single-node LU decompositions in parallel

	block_decomps_remote = []
	a_modified = a

	for idx in range(num_blocks):
	a11 = a_modified[:block_size, :block_size]
	block_decomps_remote.append(ra.linalg.lu.remote(a11))

	a12 = a_modified[:block_size, block_size:]
	a21 = a_modified[block_size:, :block_size]

	# Compute the Schur complements

	a22 = a_modified[block_size:, block_size:]
	a_modified = a22 - np.dot(a21, np.dot(np.linalg.inv(a11), a12))

	block_decomps = ray.get(block_decomps_remote)

	# Compute the inverses for each of the LU components in parallel

	block_decomp_inverses = ray.get([lu_decomp_invert.remote(decomp)
	for decomp in block_decomps_remote])

	for idx, (p11, _, _) in enumerate(block_decomps):
	block_low = block_size * idx
	block_high = block_low + block_size

	p[block_low:block_high, block_low:block_high] = p11

	# Perform coalescing

	for idx, (plu, plu_inverse) in enumerate(zip(block_decomps,
	block_decomp_inverses)):
	p11, l11, u11 = plu
	p11_inverse, l11_inverse, u11_inverse = plu_inverse

	block_low = block_size * idx
	block_high = block_low + block_size

	# p[block_low:block_high, block_low:block_high] = p11

	if idx < num_blocks - 1:
	a12 = a[:block_size, block_size:]
	a21 = a[block_size:, :block_size]
	a22 = a[block_size:, block_size:]

	# Inverse of the permutation matrix is just its transpose

	p22_inverse = p[block_high:, block_high:].T
	# p22 = p[block_high:, block_high:]

	# TODO(kvchen): Change this to use distributed block
	# multiplication. Not sure if it'll be any more efficient.

	# u12 = np.dot(l11_inverse, np.dot(p11_inverse, a12))
	# l21 = np.dot(p22_inverse, np.dot(a21, u11_inverse))

	u12, l21 = ray.get([
	ra.dot.remote(l11_inverse, ra.dot.remote(p11_inverse, a12)),
	ra.dot.remote(p22_inverse, ra.dot.remote(a21, u11_inverse)),
	])

	l[block_high:, block_low:block_high] = l21
	u[block_low:block_high, block_high:] = u12

	# Take the Schur complement and recurse

	a = a22 - np.dot(l21, u12)

	l[block_low:block_high, block_low:block_high] = l11
	u[block_low:block_high, block_low:block_high] = u11

	return p, l, u