Skip to content

Instantly share code, notes, and snippets.

@royguo
Created November 18, 2012 12:35
Show Gist options
  • Save royguo/4105011 to your computer and use it in GitHub Desktop.
Save royguo/4105011 to your computer and use it in GitHub Desktop.
linear_perceptron
#!/usr/bin/env/ python
#coding:utf-8
"""
author : royguo1988@gmail.com
"""
import random
class DataPrepare(object):
"""读取数据文件,分为训练数据和测试数据(80%作为训练,20%作为测试)"""
def __init__(self, data_file, train_data, test_data):
self.data_file = open(data_file,'r')
self.train_data = open(train_data,'w')
self.test_data = open(test_data,'w')
def __del__(self):
self.data_file.close()
self.train_data.close()
self.test_data.close()
def prepare(self):
line = self.data_file.readline().strip()
while line:
if random.random() <= 0.8:
self.train_data.write(line + '\n')
else:
self.test_data.write(line + '\n')
line = self.data_file.readline().strip()
class ModelTraining(object):
def __init__(self,train_data):
# 样本数据的数组形式
self.samples = []
with open(train_data,'r') as f:
# 从文件中读入样本数据
line = f.readline().strip()
while line:
arr = line.split()
assert(len(arr) == 3)
self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]})
line = f.readline().strip()
def caculate(self):
# 设置默认的w和b,这是从R中的分布图中估算出的,也可以任一指定
w,b = [0.05,0.5],0.05
# 设定增长率,即每次w、b的调整幅度
p = 0.001
# 每次验证后需要调整w和b的话,就重新遍历所有样本
i = 0
w_plus,b_plus = self.verifyWB(w,b,p)
while w_plus !=[0,0] or b_plus != 0:
i += 1
print '第 ',i,' 次调整, w = ',w,' b = ',b
w[0] += w_plus[0]
w[1] += w_plus[1]
b += b_plus
w_plus,b_plus = self.verifyWB(w,b,p)
print 'w = ',w,' b = ',b
return w,b
# 对所有样本验证w和b的正确性,不正确则返回他们的增长率
def verifyWB(self,w,b,p):
w_plus,b_plus = [0,0],0
for s in self.samples:
predict = self.result(w,b,s)
# 预测与实际相同则忽略当前节点
if predict != int(s['label']):
coefficient = (p/2) * (int(s['label']) - predict)
w_plus = [coefficient * float(s['x1']),coefficient*float(s['x2'])]
b_plus = coefficient
break
return w_plus,b_plus
def result(self,w,b,s):
c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b
if c > 0:
return 1
return -1
class ModelEvaluate(object):
"""使用模型计算出来的w和b对测试数据进行测试"""
def __init__(self, test_data, w, b):
self.w = w
self.b = b
# 样本数据的数组形式
self.samples = []
with open(test_data,'r') as f:
# 从文件中读入样本数据
line = f.readline().strip()
while line:
arr = line.split()
assert(len(arr) == 3)
self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]})
line = f.readline().strip()
def evaluate(self):
print '测试样本总是:',len(self.samples)
correct_count = 0
for s in self.samples:
predict = self.result(self.w, self.b, s)
if predict == int(s['label']):
correct_count += 1
print '正确率为:',float(correct_count)/len(self.samples)
def result(self,w,b,s):
c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b
if c > 0:
return 1
return -1
if __name__ == '__main__':
dp = DataPrepare('data.txt','train_data.txt','test_data.txt')
dp.prepare()
dp = None # 赋值为None以执行__del__函数关闭文件
mt = ModelTraining('train_data.txt')
w,b = [0,0],0
w,b = mt.caculate()
me = ModelEvaluate('test_data.txt',w,b)
me.evaluate()
-1 -0.303844 -0.458237
-1 -7.727905 -0.384403
-1 -1.693569 -0.724698
1 -6.202481 0.580145
-1 8.869004 -0.343454
1 -5.391438 0.591225
1 7.559267 0.385052
-1 7.263065 -0.239257
-1 -3.519450 -0.989159
-1 -9.948223 -0.421481
-1 -7.707347 -0.895640
-1 1.238391 -0.757743
1 7.213038 0.198066
1 -4.405302 0.414567
-1 -4.246700 0.057275
-1 7.229273 -0.632374
1 8.380911 0.210671
-1 -2.509012 -0.520105
1 3.491775 0.275496
1 6.276548 1.063988
-1 0.755912 -0.225401
-1 -9.992536 -0.522414
-1 -9.495737 -0.027652
1 10.545063 0.394088
-1 0.848230 -0.356873
1 6.588944 0.498700
1 -0.926625 0.220477
1 7.022405 0.376469
1 -2.220649 0.406389
-1 -0.699247 -0.733574
1 1.406911 0.550811
-1 6.251736 -0.859889
-1 9.100554 -0.297695
-1 -9.118529 -0.454069
-1 -6.245038 -0.472838
-1 -1.417224 -0.322209
1 1.408517 0.377613
-1 6.244810 -0.703489
1 -8.633542 0.546162
-1 -3.936660 -0.047634
import random
class DataPrepare(object):
"""读取数据文件,分为训练数据和测试数据(80%作为训练,20%作为测试)"""
def __init__(self, data_file, train_data, test_data):
self.data_file = open(data_file,'r')
self.train_data = open(train_data,'w')
self.test_data = open(test_data,'w')
def __del__(self):
self.data_file.close()
self.train_data.close()
self.test_data.close()
def prepare(self):
line = self.data_file.readline().strip()
while line:
if random.random() <= 0.8:
self.train_data.write(line + '\n')
else:
self.test_data.write(line + '\n')
line = self.data_file.readline().strip()
class ModelEvaluate(object):
"""使用模型计算出来的w和b对测试数据进行测试"""
def __init__(self, test_data, w, b):
self.w = w
self.b = b
# 样本数据的数组形式
self.samples = []
with open(test_data,'r') as f:
# 从文件中读入样本数据
line = f.readline().strip()
while line:
arr = line.split()
assert(len(arr) == 3)
self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]})
line = f.readline().strip()
def evaluate(self):
print '测试样本总是:',len(self.samples)
correct_count = 0
for s in self.samples:
predict = self.result(self.w, self.b, s)
if predict == int(s['label']):
correct_count += 1
print '正确率为:',float(correct_count)/len(self.samples)
def result(self,w,b,s):
c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b
if c > 0:
return 1
return -1
class ModelTraining(object):
def __init__(self,train_data):
# 样本数据的数组形式
self.samples = []
with open(train_data,'r') as f:
# 从文件中读入样本数据
line = f.readline().strip()
while line:
arr = line.split()
assert(len(arr) == 3)
self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]})
line = f.readline().strip()
def caculate(self):
# 设置默认的w和b,这是从R中的分布图中估算出的,也可以任一指定
w,b = [0.05,0.5],0.05
# 设定增长率,即每次w、b的调整幅度
p = 0.001
# 每次验证后需要调整w和b的话,就重新遍历所有样本
i = 0
w_plus,b_plus = self.verifyWB(w,b,p)
while w_plus !=[0,0] or b_plus != 0:
i += 1
print '第 ',i,' 次调整, w = ',w,' b = ',b
w[0] += w_plus[0]
w[1] += w_plus[1]
b += b_plus
w_plus,b_plus = self.verifyWB(w,b,p)
print 'w = ',w,' b = ',b
# 对所有样本验证w和b的正确性,不正确则返回他们的增长率
def verifyWB(self,w,b,p):
w_plus,b_plus = [0,0],0
for s in self.samples:
predict = self.result(w,b,s)
# 预测与实际相同则忽略当前节点
if predict != int(s['label']):
coefficient = (p/2) * (int(s['label']) - predict)
w_plus = [coefficient * float(s['x1']),coefficient*float(s['x2'])]
b_plus = coefficient
break
return w_plus,b_plus
def result(self,w,b,s):
c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b
if c > 0:
return 1
return -1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment