Created
December 16, 2013 04:22
-
-
Save zippera/7982279 to your computer and use it in GitHub Desktop.
人工智能作业,对图片分类。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#coding=utf-8 | |
import os | |
from numpy import array | |
from scipy.cluster.vq import vq, kmeans, whiten, kmeans2 | |
from sklearn import svm | |
from sklearn import cross_validation | |
ncluster = 6 #聚类数 | |
######## | |
#读入数据 | |
root = '/Users/moxie/Documents/dat' #目录 | |
dir1 = root + '/kl' #恐龙文件夹 | |
dir2 = root + '/qc' #汽车文件夹 | |
#kl数据读取 | |
kl = [] #恐龙文件夹 | |
inf_kl = [] #局部特征的个数 | |
for i in range(95): | |
pic = [] | |
f_name = '%s.txt' % i | |
file1 = os.path.join(dir1,f_name) | |
for line in open(file1,'rb'): | |
localf = [] #每行 | |
for dms in line.strip().split(): | |
localf.append(float(dms)) | |
pic.append(localf) #每个图片 | |
inf = pic.pop(0) | |
kl.extend(pic) | |
count = int(inf[0]) | |
inf_kl.append(count) | |
#qc数据读取 | |
qc = [] | |
inf_qc = [] | |
for i in range(95): | |
pic = [] | |
f_name = '%s.txt' % i | |
file2 = os.path.join(dir2,f_name) | |
for line in open(file2,'rb'): | |
localf = [] | |
for dms in line.strip().split(): | |
localf.append(float(dms)) | |
pic.append(localf) | |
inf = pic.pop(0) | |
qc.extend(pic) | |
count = int(inf[0]) | |
inf_qc.append(count) | |
kl.extend(qc) #所有的局部特征 | |
inf_kl.extend(inf_qc) #所有的特征数量信息 | |
#print inf_kl,len(inf_kl) | |
####### | |
#kmeans 处理为 k 类,同时标记每个数据的所属类别,之后用;另外,均值就是代表点了;根据均值点建立词典。 | |
features = array(kl) | |
whitened = whiten(features) #scale一致化 | |
cent,label = kmeans2(whitened,ncluster) #label 为每个局部特征所属类别 | |
####### | |
#把每个图片表示为 bow 形式 | |
#求 x | |
codebook = list(set(label)) | |
x = [] | |
begin = 0 | |
label = list(label) | |
#print 'label:' , label | |
for n in inf_kl: | |
end = begin + n | |
temp = label[begin:end] | |
picf = [] #每个图片 | |
for ii in codebook: | |
nn = temp.count(ii) | |
picf.append(nn) | |
x.append(picf) | |
begin = end | |
#print len(x),len(picf) | |
#求 y | |
y = [0] * 95 + [1] * 95 | |
#print x,y,len(x),len(y) | |
####### | |
#分开为 train 和 test | |
X_train, X_test, y_train, y_test = cross_validation.train_test_split(x, y, | |
test_size=0.3, random_state=0) | |
#x_test = x[:32] + x[158:] | |
#x_train = x[32:158] | |
#y_test = y[:32] + y[158:] | |
#y_train = y[32:158] | |
#分类 | |
clf = svm.SVC(kernel='linear', C=1) | |
clf.fit(X_train,y_train) | |
print '正确率为:',clf.score(X_test,y_test) | |
#正确率:0.982456140351 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment