Skip to content

Instantly share code, notes, and snippets.

@seikichi
Created October 19, 2010 20:21
Show Gist options
  • Save seikichi/635023 to your computer and use it in GitHub Desktop.
Save seikichi/635023 to your computer and use it in GitHub Desktop.
Rで学ぶクラスタ解析のコードをpythonで書いただけ
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Rで学ぶクラスタ解析のパクり
import scipy as sp
def table(result, answer):
result, answer = sp.asarray(result), sp.asarray(answer)
ret = []
for i in sorted(set(result)):
tmp = []
for j in sorted(set(answer)):
tmp.append(len(answer[result==i][answer[result==i]==j]))
ret.append(tmp)
return sp.array(ret)
def mypurity(ct):
return sp.sum(sp.apply_along_axis(max, 1, ct)) / float(sp.sum(ct))
def myentropy(ct):
ct = sp.array(ct, dtype=float) # 面倒なので全部floatに;
def _entropy0(pv):
p1 = pv / sp.sum(pv)
p2 = p1[p1 != 0]
return sp.sum(p2 * sp.log(p2))
return -sp.sum((sp.sum(ct, 1) / sp.sum(ct)) * sp.apply_along_axis(_entropy0, 1, ct)) / sp.log(ct.shape[1])
def myeval(ans, goldans):
ct = table(ans, goldans)
print "Entropy: ", myentropy(ct)
print "Purity: ", mypurity(ct)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment