Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Indexed Row Matrix
from pyspark.mllib.linalg.distributed import IndexedRow, IndexedRowMatrix
# create RDD
indexed_rows = sc.parallelize([
IndexedRow(0, [0,1,2]),
IndexedRow(1, [1,2,3]),
IndexedRow(2, [3,4,5]),
IndexedRow(3, [4,2,3]),
IndexedRow(4, [2,2,5]),
IndexedRow(5, [4,5,5])
])
# create IndexedRowMatrix
indexed_rows_matrix = IndexedRowMatrix(indexed_rows)
print(indexed_rows_matrix.numRows())
# >> 6
print(indexed_rows_matrix.numCols())
# >> 3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment