Last active
August 29, 2015 14:06
-
-
Save alexland/2a4ced2b332cd6bb2fd8 to your computer and use it in GitHub Desktop.
populating a scipy sparse DOK matrix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def str2num(t): | |
return ( int(t[0]), float(t[1]) ) | |
def parse_line(line): | |
''' | |
returns: | |
(i) score (scalar); | |
(ii) adjacency dict (one row in sparse 2D array) | |
pass in: | |
''' | |
S, tx = line.split(maxsplit=1) | |
tx = ( line.strip().split(':') for line in tx.split() ) | |
return S, {k:v for k, v in map(str2num, tx)} | |
def file_opener(data_file): | |
''' | |
returns: | |
pass in: data_file as str, relative path from 'User' dir | |
''' | |
dfile = os.path.expanduser(data_file) | |
with open(dfile, 'r', encoding='utf-8') as fh: | |
d = (line for line in fh.readlines()) | |
d = (parse_line(line) for line in d) | |
d = list(d) | |
return d | |
def data2sparse_array(data): | |
from scipy.sparse import dok as DOK | |
max_key_val = 0 | |
for i in range(len(data)): | |
k = max(data[i][1].keys()) | |
if k > max_key_val: | |
max_key_val = k | |
ncols = max_key_val | |
nrows = len(data) | |
D = DOK((nrows, ncols), dtype=float) | |
S = NP.empty((nrows, 1)) | |
for i in range(nrows): | |
s, adict = d[i] | |
S[i] = s | |
for j in range(ncols): | |
D[i,j] = adict.get(j) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment