Skip to content

Instantly share code, notes, and snippets.

Last active Jun 8, 2020
What would you like to do?
Spare matrix representation for goodreads data
import json
from scipy.sparse import coo_matrix, save_npz
import numpy as np
print("reading data...")
with open("goodreads/interactions.json") as f:
data = json.load(f)
books = data["books"]
interactions = data["interactions"]
print("building matrix...")
# setup for COO matrix builder
data, I, J = [], [], []
max_col = 0
for i,row in enumerate(interactions):
for col in row:
if col>max_col:
# we need the max columnn index so we may know the size of matrix
max_col = col
U = coo_matrix((data, (I, J)),
shape=(i+1, max_col+1),
# save raw sparse matrix
print("saving to disk...")
save_npz("goodreads/U.npz", U)
print("cleaning up...")
del interactions, data, I, J
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment