Skip to content

Instantly share code, notes, and snippets.

@alexland
Last active August 29, 2015 14:06
Show Gist options
  • Save alexland/2a4ced2b332cd6bb2fd8 to your computer and use it in GitHub Desktop.
Save alexland/2a4ced2b332cd6bb2fd8 to your computer and use it in GitHub Desktop.
populating a scipy sparse DOK matrix
def str2num(t):
return ( int(t[0]), float(t[1]) )
def parse_line(line):
'''
returns:
(i) score (scalar);
(ii) adjacency dict (one row in sparse 2D array)
pass in:
'''
S, tx = line.split(maxsplit=1)
tx = ( line.strip().split(':') for line in tx.split() )
return S, {k:v for k, v in map(str2num, tx)}
def file_opener(data_file):
'''
returns:
pass in: data_file as str, relative path from 'User' dir
'''
dfile = os.path.expanduser(data_file)
with open(dfile, 'r', encoding='utf-8') as fh:
d = (line for line in fh.readlines())
d = (parse_line(line) for line in d)
d = list(d)
return d
def data2sparse_array(data):
from scipy.sparse import dok as DOK
max_key_val = 0
for i in range(len(data)):
k = max(data[i][1].keys())
if k > max_key_val:
max_key_val = k
ncols = max_key_val
nrows = len(data)
D = DOK((nrows, ncols), dtype=float)
S = NP.empty((nrows, 1))
for i in range(nrows):
s, adict = d[i]
S[i] = s
for j in range(ncols):
D[i,j] = adict.get(j)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment