rwalk/goodreads_matrix.py

## goodreads_matrix.py
import json
from scipy.sparse import coo_matrix, save_npz
import numpy as np


print("reading data...")
with open("goodreads/interactions.json") as f:
    data = json.load(f)
    books = data["books"]
    interactions = data["interactions"]

print("building matrix...")
# setup for COO matrix builder
data, I, J = [], [], []

max_col = 0
for i,row in enumerate(interactions):
    for col in row:
        data.append(1.0)
        I.append(i)
        J.append(col)
        if col>max_col:
            # we need the max columnn index so we may know the size of matrix
            max_col = col

U = coo_matrix((data, (I, J)),
               shape=(i+1, max_col+1),
               dtype=np.float64
               ).tocsr()

# save raw sparse matrix
print("saving to disk...")
save_npz("goodreads/U.npz", U)

print("cleaning up...")
del interactions, data, I, J
	import json
	from scipy.sparse import coo_matrix, save_npz
	import numpy as np


	print("reading data...")
	with open("goodreads/interactions.json") as f:
	data = json.load(f)
	books = data["books"]
	interactions = data["interactions"]

	print("building matrix...")
	# setup for COO matrix builder
	data, I, J = [], [], []

	max_col = 0
	for i,row in enumerate(interactions):
	for col in row:
	data.append(1.0)
	I.append(i)
	J.append(col)
	if col>max_col:
	# we need the max columnn index so we may know the size of matrix
	max_col = col

	U = coo_matrix((data, (I, J)),
	shape=(i+1, max_col+1),
	dtype=np.float64
	).tocsr()

	# save raw sparse matrix
	print("saving to disk...")
	save_npz("goodreads/U.npz", U)

	print("cleaning up...")
	del interactions, data, I, J