Skip to content

Instantly share code, notes, and snippets.

@oddskool
Last active January 20, 2023 17:07
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oddskool/27476a1e22df357de798 to your computer and use it in GitHub Desktop.
Save oddskool/27476a1e22df357de798 to your computer and use it in GitHub Desktop.
load CSV data to CSR matrix
import array
import csv
import numpy as np
from scipy.sparse import csr_matrix
def csv_to_csr(f):
"""Read content of CSV file f, return as CSR matrix."""
data = array.array("f")
indices = array.array("i")
indptr = array.array("i", [0])
for i, row in enumerate(csv.reader(f), 1):
row = np.array(map(float, row))
shape1 = len(row)
nonzero = np.where(row)[0]
data.extend(row[nonzero])
indices.extend(nonzero)
indptr.append(indptr[-1]+len(nonzero))
return csr_matrix((data, indices, indptr),
dtype=float, shape=(i, shape1))
if __name__ == '__main__':
tmp_file = 'tmp.csv'
original_matrix = np.array([[0.0, 1.0, 2.0],
[4.5, 0.0, 0.1],
[3.0, 0.0, 3.0]])
try:
with open(tmp_file, 'w') as fd:
for row in original_matrix:
print >>fd, ','.join([str(_) for _ in row])
m = csv_to_csr(open(tmp_file))
print original_matrix
print m.todense()
assert np.allclose(m.todense(), original_matrix)
finally:
import os
os.unlink(tmp_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment