Skip to content

Instantly share code, notes, and snippets.

@xccds
Created June 9, 2016 08:55
Show Gist options
  • Save xccds/a1385c23bba4b5ad16ef18e8c8727525 to your computer and use it in GitHub Desktop.
Save xccds/a1385c23bba4b5ad16ef18e8c8727525 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# 数据读入和整理\n",
"iris = pd.read_csv('iris.csv')\n",
"X = iris.ix[:,:4].values\n",
"mapdict ={k:v for v, k in enumerate(iris.Species.unique())}\n",
"y = iris.Species.map(mapdict).values\n",
"n = len(y)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 切分数据\n",
"train_index = np.random.choice(n, 0.6*n, replace = False)\n",
"test_index = np.setdiff1d(np.arange(n),train_index)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# 计算权重矩阵\n",
"sigma = X.var(axis = 0)\n",
"weights = np.zeros((n,n))\n",
"\n",
"def weight_func(ind1,ind2,X=X,sigma=sigma):\n",
" return np.exp(-np.sum((X[ind1]-X[ind2])**2/sigma))\n",
"\n",
"for i in range(n):\n",
" for j in range(n):\n",
" weights[i,j] = weight_func(i,j)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"## 标准化为转移矩阵\n",
"t = weights/weights.sum(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# y转换形式\n",
"y_m = np.zeros((n, len(np.unique(y))))\n",
"for i in range(n):\n",
" y_m[i,y[i]] = 1"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"## unlabel初始化, label记住\n",
"y_m[test_index] = np.random.random(y_m[test_index].shape)\n",
"clamp = y_m[train_index]"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"## 迭代计算\n",
"iter_n = 50\n",
"for _ in range(iter_n):\n",
" y_m = t.dot(y_m) # LP\n",
" y_m = (y_m.T/y_m.sum(axis=1)).T # normalize\n",
" y_m[train_index] = clamp # clamp"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.93333333333333335"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 预测准确率\n",
"predict = y_m[test_index].argmax(axis=1)\n",
"np.sum(y[test_index] == predict)/float(len(predict))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment