Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# import the libraries
from pyspark.mllib.linalg import Matrices
from pyspark.mllib.linalg.distributed import BlockMatrix
# Create an RDD of sub-matrix blocks.
blocks = sc.parallelize([((0, 0), Matrices.dense(3, 3, [1, 2, 1, 2, 1, 2, 1, 2, 1])),
((1, 1), Matrices.dense(3, 3, [3, 4, 5, 3, 4, 5, 3, 4, 5])),
((2, 0), Matrices.dense(3, 3, [1, 1, 1, 1, 1, 1, 1, 1, 1]))])
# Create a BlockMatrix from an RDD of sub-matrix blocks of size 3X3
b_matrix = BlockMatrix(blocks, 3, 3)
# columns per block
print(b_matrix.colsPerBlock)
# >> 3
# rows per block
print(b_matrix.rowsPerBlock)
# >> 3
# convert the block matrix to local matrix
local_mat = b_matrix.toLocalMatrix()
# print local matrix
print(local_mat.toArray())
"""
>> array([[1., 2., 1., 0., 0., 0.],
[2., 1., 2., 0., 0., 0.],
[1., 2., 1., 0., 0., 0.],
[0., 0., 0., 3., 3., 3.],
[0., 0., 0., 4., 4., 4.],
[0., 0., 0., 5., 5., 5.],
[1., 1., 1., 0., 0., 0.],
[1., 1., 1., 0., 0., 0.],
[1., 1., 1., 0., 0., 0.]])
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment