Created
November 18, 2012 12:35
-
-
Save royguo/4105011 to your computer and use it in GitHub Desktop.
linear_perceptron
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env/ python | |
#coding:utf-8 | |
""" | |
author : royguo1988@gmail.com | |
""" | |
import random | |
class DataPrepare(object): | |
"""读取数据文件,分为训练数据和测试数据(80%作为训练,20%作为测试)""" | |
def __init__(self, data_file, train_data, test_data): | |
self.data_file = open(data_file,'r') | |
self.train_data = open(train_data,'w') | |
self.test_data = open(test_data,'w') | |
def __del__(self): | |
self.data_file.close() | |
self.train_data.close() | |
self.test_data.close() | |
def prepare(self): | |
line = self.data_file.readline().strip() | |
while line: | |
if random.random() <= 0.8: | |
self.train_data.write(line + '\n') | |
else: | |
self.test_data.write(line + '\n') | |
line = self.data_file.readline().strip() | |
class ModelTraining(object): | |
def __init__(self,train_data): | |
# 样本数据的数组形式 | |
self.samples = [] | |
with open(train_data,'r') as f: | |
# 从文件中读入样本数据 | |
line = f.readline().strip() | |
while line: | |
arr = line.split() | |
assert(len(arr) == 3) | |
self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]}) | |
line = f.readline().strip() | |
def caculate(self): | |
# 设置默认的w和b,这是从R中的分布图中估算出的,也可以任一指定 | |
w,b = [0.05,0.5],0.05 | |
# 设定增长率,即每次w、b的调整幅度 | |
p = 0.001 | |
# 每次验证后需要调整w和b的话,就重新遍历所有样本 | |
i = 0 | |
w_plus,b_plus = self.verifyWB(w,b,p) | |
while w_plus !=[0,0] or b_plus != 0: | |
i += 1 | |
print '第 ',i,' 次调整, w = ',w,' b = ',b | |
w[0] += w_plus[0] | |
w[1] += w_plus[1] | |
b += b_plus | |
w_plus,b_plus = self.verifyWB(w,b,p) | |
print 'w = ',w,' b = ',b | |
return w,b | |
# 对所有样本验证w和b的正确性,不正确则返回他们的增长率 | |
def verifyWB(self,w,b,p): | |
w_plus,b_plus = [0,0],0 | |
for s in self.samples: | |
predict = self.result(w,b,s) | |
# 预测与实际相同则忽略当前节点 | |
if predict != int(s['label']): | |
coefficient = (p/2) * (int(s['label']) - predict) | |
w_plus = [coefficient * float(s['x1']),coefficient*float(s['x2'])] | |
b_plus = coefficient | |
break | |
return w_plus,b_plus | |
def result(self,w,b,s): | |
c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b | |
if c > 0: | |
return 1 | |
return -1 | |
class ModelEvaluate(object): | |
"""使用模型计算出来的w和b对测试数据进行测试""" | |
def __init__(self, test_data, w, b): | |
self.w = w | |
self.b = b | |
# 样本数据的数组形式 | |
self.samples = [] | |
with open(test_data,'r') as f: | |
# 从文件中读入样本数据 | |
line = f.readline().strip() | |
while line: | |
arr = line.split() | |
assert(len(arr) == 3) | |
self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]}) | |
line = f.readline().strip() | |
def evaluate(self): | |
print '测试样本总是:',len(self.samples) | |
correct_count = 0 | |
for s in self.samples: | |
predict = self.result(self.w, self.b, s) | |
if predict == int(s['label']): | |
correct_count += 1 | |
print '正确率为:',float(correct_count)/len(self.samples) | |
def result(self,w,b,s): | |
c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b | |
if c > 0: | |
return 1 | |
return -1 | |
if __name__ == '__main__': | |
dp = DataPrepare('data.txt','train_data.txt','test_data.txt') | |
dp.prepare() | |
dp = None # 赋值为None以执行__del__函数关闭文件 | |
mt = ModelTraining('train_data.txt') | |
w,b = [0,0],0 | |
w,b = mt.caculate() | |
me = ModelEvaluate('test_data.txt',w,b) | |
me.evaluate() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-1 -0.303844 -0.458237 | |
-1 -7.727905 -0.384403 | |
-1 -1.693569 -0.724698 | |
1 -6.202481 0.580145 | |
-1 8.869004 -0.343454 | |
1 -5.391438 0.591225 | |
1 7.559267 0.385052 | |
-1 7.263065 -0.239257 | |
-1 -3.519450 -0.989159 | |
-1 -9.948223 -0.421481 | |
-1 -7.707347 -0.895640 | |
-1 1.238391 -0.757743 | |
1 7.213038 0.198066 | |
1 -4.405302 0.414567 | |
-1 -4.246700 0.057275 | |
-1 7.229273 -0.632374 | |
1 8.380911 0.210671 | |
-1 -2.509012 -0.520105 | |
1 3.491775 0.275496 | |
1 6.276548 1.063988 | |
-1 0.755912 -0.225401 | |
-1 -9.992536 -0.522414 | |
-1 -9.495737 -0.027652 | |
1 10.545063 0.394088 | |
-1 0.848230 -0.356873 | |
1 6.588944 0.498700 | |
1 -0.926625 0.220477 | |
1 7.022405 0.376469 | |
1 -2.220649 0.406389 | |
-1 -0.699247 -0.733574 | |
1 1.406911 0.550811 | |
-1 6.251736 -0.859889 | |
-1 9.100554 -0.297695 | |
-1 -9.118529 -0.454069 | |
-1 -6.245038 -0.472838 | |
-1 -1.417224 -0.322209 | |
1 1.408517 0.377613 | |
-1 6.244810 -0.703489 | |
1 -8.633542 0.546162 | |
-1 -3.936660 -0.047634 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
class DataPrepare(object): | |
"""读取数据文件,分为训练数据和测试数据(80%作为训练,20%作为测试)""" | |
def __init__(self, data_file, train_data, test_data): | |
self.data_file = open(data_file,'r') | |
self.train_data = open(train_data,'w') | |
self.test_data = open(test_data,'w') | |
def __del__(self): | |
self.data_file.close() | |
self.train_data.close() | |
self.test_data.close() | |
def prepare(self): | |
line = self.data_file.readline().strip() | |
while line: | |
if random.random() <= 0.8: | |
self.train_data.write(line + '\n') | |
else: | |
self.test_data.write(line + '\n') | |
line = self.data_file.readline().strip() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ModelEvaluate(object): | |
"""使用模型计算出来的w和b对测试数据进行测试""" | |
def __init__(self, test_data, w, b): | |
self.w = w | |
self.b = b | |
# 样本数据的数组形式 | |
self.samples = [] | |
with open(test_data,'r') as f: | |
# 从文件中读入样本数据 | |
line = f.readline().strip() | |
while line: | |
arr = line.split() | |
assert(len(arr) == 3) | |
self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]}) | |
line = f.readline().strip() | |
def evaluate(self): | |
print '测试样本总是:',len(self.samples) | |
correct_count = 0 | |
for s in self.samples: | |
predict = self.result(self.w, self.b, s) | |
if predict == int(s['label']): | |
correct_count += 1 | |
print '正确率为:',float(correct_count)/len(self.samples) | |
def result(self,w,b,s): | |
c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b | |
if c > 0: | |
return 1 | |
return -1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ModelTraining(object): | |
def __init__(self,train_data): | |
# 样本数据的数组形式 | |
self.samples = [] | |
with open(train_data,'r') as f: | |
# 从文件中读入样本数据 | |
line = f.readline().strip() | |
while line: | |
arr = line.split() | |
assert(len(arr) == 3) | |
self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]}) | |
line = f.readline().strip() | |
def caculate(self): | |
# 设置默认的w和b,这是从R中的分布图中估算出的,也可以任一指定 | |
w,b = [0.05,0.5],0.05 | |
# 设定增长率,即每次w、b的调整幅度 | |
p = 0.001 | |
# 每次验证后需要调整w和b的话,就重新遍历所有样本 | |
i = 0 | |
w_plus,b_plus = self.verifyWB(w,b,p) | |
while w_plus !=[0,0] or b_plus != 0: | |
i += 1 | |
print '第 ',i,' 次调整, w = ',w,' b = ',b | |
w[0] += w_plus[0] | |
w[1] += w_plus[1] | |
b += b_plus | |
w_plus,b_plus = self.verifyWB(w,b,p) | |
print 'w = ',w,' b = ',b | |
# 对所有样本验证w和b的正确性,不正确则返回他们的增长率 | |
def verifyWB(self,w,b,p): | |
w_plus,b_plus = [0,0],0 | |
for s in self.samples: | |
predict = self.result(w,b,s) | |
# 预测与实际相同则忽略当前节点 | |
if predict != int(s['label']): | |
coefficient = (p/2) * (int(s['label']) - predict) | |
w_plus = [coefficient * float(s['x1']),coefficient*float(s['x2'])] | |
b_plus = coefficient | |
break | |
return w_plus,b_plus | |
def result(self,w,b,s): | |
c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b | |
if c > 0: | |
return 1 | |
return -1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment