Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
転置インデックスもどきのデータをベクトル化するためのPython3のプログラム片。
with open(inputfile, encoding='utf-8') as fh:
freqlist = json.load(fh)
words = []
articleids = []
for k,v in freqlist.items():
words.append(k)
for vv in v:
a,f = vv.split(",")
aa = int(a)
if aa not in articleids:
articleids.append(aa)
articleids.sort()
alen = len(articleids)
wlen = len(words)
bow = lil_matrix((alen,wlen))
for k,v in freqlist.items():
wi = words.index(k)
for vv in v:
a,f = map(int,vv.split(","))
bow[articleids.index(a),wi] = f
nzelemnum = len(bow.nonzero()[0])
print("{0:d} {1:d} {2:d}".format(alen,wlen,nzelemnum))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment