Skip to content

Instantly share code, notes, and snippets.

Created June 18, 2013 10:59
Show Gist options
  • Save anonymous/5804425 to your computer and use it in GitHub Desktop.
Save anonymous/5804425 to your computer and use it in GitHub Desktop.
# -*- coding:utf-8 -*-
# 单核多类分类程序
# 13-5-31 modified trainflag and testflag
# 导入模块
import liyfun as liy # load the data funcs
import numpy as np # numpy tool
from shogun.Features import RealFeatures, MulticlassLabels
from shogun.Kernel import Chi2Kernel, CustomKernel , GaussianKernel
from shogun.Distance import ChiSquareDistance
from shogun.Classifier import MulticlassLibSVM
from shogun.Evaluation import CrossValidation, CrossValidationResult
from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
from shogun.Evaluation import StratifiedCrossValidationSplitting, MulticlassAccuracy
from shogun.ModelSelection import ModelSelectionParameters, GridSearchModelSelection, R_EXP, R_LINEAR
from shogun.ModelSelection import ParameterCombination
def multiclass (ind):
name='trainflag.mat'
temp=liy.load(filedir, name)
trainflag=np.squeeze( np.array( temp[ind-1] )[0] ) - 1
name='testflag.mat' # get test_flag
temp=liy.load(filedir, name)
testflag=np.squeeze( np.array( temp[ind-1] )[0] ) - 1
fixlen=10; wordsnum=50
name='bhist_testnum_%s'%ind+ '_wordsnum_%s'%wordsnum +'.mat'
# name='newhist_fixlen_%s'%fixlen + '_testnum_%s'%ind+ '_50.mat'
basicdata=np.float_( np.transpose( liy.load(filedir, name) ) )
basicdata=liy.unitnorm(basicdata, 1, 'col') # each col is a data point
train=basicdata[:, trainflag]
features=RealFeatures(train)
labels=np.arange(0, 600)/100
labels=np.float64(labels)
trainlabel=labels[trainflag]
reallabel=labels[testflag]
trainlabel = MulticlassLabels(trainlabel)
#分类器
classifier=MulticlassLibSVM()
#分开策略
splitting_strategy=StratifiedCrossValidationSplitting(trainlabel, subsets)
#评价
evaluation_criterion=MulticlassAccuracy()
# 交叉验证类,四个输入
cross=CrossValidation(classifier, features, trainlabel, splitting_strategy, evaluation_criterion)
cross.set_num_runs(1) #重复次数
# 创建参数树
root=ModelSelectionParameters() # 创建参数根节点
c=ModelSelectionParameters("C") # param 1 is C
root.append_child(c)
c.build_values(0.0, 5.0, R_EXP, 1.0, 10.0) # c 从 1 10 100 1000 ~10^10
chi2_kernel=Chi2Kernel()
param_chi2_kernel=ModelSelectionParameters("kernel", chi2_kernel) # 创建核函数的参数
chi2_kernel_width=ModelSelectionParameters("width")
chi2_kernel_width.build_values(0.05, 30, R_LINEAR, 0.5 )
param_chi2_kernel.append_child(chi2_kernel_width)
root.append_child(param_chi2_kernel) # 核函数参数添加到模型里
grid_search=GridSearchModelSelection(root, cross)
print_state=False
best_combination=grid_search.select_model(print_state)
best_combination.apply_to_machine(classifier)
k=classifier.get_kernel()
w=Chi2Kernel.obtain_from_generic(k).get_width()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment