royguo/DataFile.txt

## all.py
#!/usr/bin/env/ python
#coding:utf-8
"""
    author : royguo1988@gmail.com
"""
import random

class DataPrepare(object):
    """读取数据文件，分为训练数据和测试数据(80%作为训练，20%作为测试)"""
    def __init__(self, data_file, train_data, test_data):
        self.data_file = open(data_file,'r')
        self.train_data = open(train_data,'w')
        self.test_data = open(test_data,'w')

    def __del__(self):
        self.data_file.close()
        self.train_data.close()
        self.test_data.close()

    def prepare(self):
        line = self.data_file.readline().strip()
        while line:
            if random.random() <= 0.8:
                self.train_data.write(line + '\n')
            else:
                self.test_data.write(line + '\n')
            line = self.data_file.readline().strip()

class ModelTraining(object):
    def __init__(self,train_data):
        # 样本数据的数组形式
        self.samples = []
        with open(train_data,'r') as f:
            # 从文件中读入样本数据
            line = f.readline().strip()
            while line:
                arr = line.split()
                assert(len(arr) == 3)
                self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]})
                line = f.readline().strip()

    def caculate(self):
        # 设置默认的w和b，这是从R中的分布图中估算出的，也可以任一指定
        w,b = [0.05,0.5],0.05
        # 设定增长率，即每次w、b的调整幅度
        p = 0.001
        # 每次验证后需要调整w和b的话，就重新遍历所有样本
        i = 0
        w_plus,b_plus = self.verifyWB(w,b,p)
        while  w_plus !=[0,0] or b_plus != 0:
            i += 1
            print '第 ',i,' 次调整, w = ',w,' b = ',b
            w[0] += w_plus[0]
            w[1] += w_plus[1]
            b += b_plus
            w_plus,b_plus = self.verifyWB(w,b,p)
        print 'w = ',w,' b = ',b
        return w,b

    # 对所有样本验证w和b的正确性，不正确则返回他们的增长率
    def verifyWB(self,w,b,p):
        w_plus,b_plus = [0,0],0
        for s in self.samples:
            predict = self.result(w,b,s)
            # 预测与实际相同则忽略当前节点
            if predict != int(s['label']):
                coefficient = (p/2) * (int(s['label']) - predict)
                w_plus = [coefficient * float(s['x1']),coefficient*float(s['x2'])]
                b_plus = coefficient
                break
        return w_plus,b_plus

    def result(self,w,b,s):
        c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b
        if c > 0:
            return 1
        return -1


class ModelEvaluate(object):
    """使用模型计算出来的w和b对测试数据进行测试"""
    def __init__(self, test_data, w, b):
        self.w = w
        self.b = b
        # 样本数据的数组形式
        self.samples = []
        with open(test_data,'r') as f:
            # 从文件中读入样本数据
            line = f.readline().strip()
            while line:
                arr = line.split()
                assert(len(arr) == 3)
                self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]})
                line = f.readline().strip()

    def evaluate(self):
        print '测试样本总是:',len(self.samples)
        correct_count = 0
        for s in self.samples:
            predict = self.result(self.w, self.b, s)
            if predict == int(s['label']):
                correct_count += 1
        print '正确率为：',float(correct_count)/len(self.samples)


    def result(self,w,b,s):
        c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b
        if c > 0:
            return 1
        return -1

if __name__ == '__main__':
    dp = DataPrepare('data.txt','train_data.txt','test_data.txt')
    dp.prepare()
    dp = None   # 赋值为None以执行__del__函数关闭文件

    mt = ModelTraining('train_data.txt')
    w,b = [0,0],0
    w,b = mt.caculate()

    me = ModelEvaluate('test_data.txt',w,b)
    me.evaluate()

## DataFile.txt
-1  -0.303844 -0.458237
-1  -7.727905 -0.384403
-1  -1.693569 -0.724698
1  -6.202481 0.580145
-1  8.869004 -0.343454
1  -5.391438 0.591225
1  7.559267 0.385052
-1  7.263065 -0.239257
-1  -3.519450 -0.989159
-1  -9.948223 -0.421481
-1  -7.707347 -0.895640
-1  1.238391 -0.757743
1  7.213038 0.198066
1  -4.405302 0.414567
-1  -4.246700 0.057275
-1  7.229273 -0.632374
1  8.380911 0.210671
-1  -2.509012 -0.520105
1  3.491775 0.275496
1  6.276548 1.063988
-1  0.755912 -0.225401
-1  -9.992536 -0.522414
-1  -9.495737 -0.027652
1  10.545063 0.394088
-1  0.848230 -0.356873
1  6.588944 0.498700
1  -0.926625 0.220477
1  7.022405 0.376469
1  -2.220649 0.406389
-1  -0.699247 -0.733574
1  1.406911 0.550811
-1  6.251736 -0.859889
-1  9.100554 -0.297695
-1  -9.118529 -0.454069
-1  -6.245038 -0.472838
-1  -1.417224 -0.322209
1  1.408517 0.377613
-1  6.244810 -0.703489
1  -8.633542 0.546162
-1  -3.936660 -0.047634

## DataPrepare.py
import random

class DataPrepare(object):
    """读取数据文件，分为训练数据和测试数据(80%作为训练，20%作为测试)"""
    def __init__(self, data_file, train_data, test_data):
        self.data_file = open(data_file,'r')
        self.train_data = open(train_data,'w')
        self.test_data = open(test_data,'w')

    def __del__(self):
        self.data_file.close()
        self.train_data.close()
        self.test_data.close()

    def prepare(self):
        line = self.data_file.readline().strip()
        while line:
            if random.random() <= 0.8:
                self.train_data.write(line + '\n')
            else:
                self.test_data.write(line + '\n')
            line = self.data_file.readline().strip()

## ModelEvaluate.py
class ModelEvaluate(object):
    """使用模型计算出来的w和b对测试数据进行测试"""
    def __init__(self, test_data, w, b):
        self.w = w
        self.b = b
        # 样本数据的数组形式
        self.samples = []
        with open(test_data,'r') as f:
            # 从文件中读入样本数据
            line = f.readline().strip()
            while line:
                arr = line.split()
                assert(len(arr) == 3)
                self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]})
                line = f.readline().strip()

    def evaluate(self):
        print '测试样本总是:',len(self.samples)
        correct_count = 0
        for s in self.samples:
            predict = self.result(self.w, self.b, s)
            if predict == int(s['label']):
                correct_count += 1
        print '正确率为：',float(correct_count)/len(self.samples)


    def result(self,w,b,s):
        c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b
        if c > 0:
            return 1
        return -1

## ModelTraining.py
class ModelTraining(object):
    def __init__(self,train_data):
        # 样本数据的数组形式
        self.samples = []
        with open(train_data,'r') as f:
            # 从文件中读入样本数据
            line = f.readline().strip()
            while line:
                arr = line.split()
                assert(len(arr) == 3)
                self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]})
                line = f.readline().strip()

    def caculate(self):
        # 设置默认的w和b，这是从R中的分布图中估算出的，也可以任一指定
        w,b = [0.05,0.5],0.05
        # 设定增长率，即每次w、b的调整幅度
        p = 0.001
        # 每次验证后需要调整w和b的话，就重新遍历所有样本
        i = 0
        w_plus,b_plus = self.verifyWB(w,b,p)
        while  w_plus !=[0,0] or b_plus != 0:
            i += 1
            print '第 ',i,' 次调整, w = ',w,' b = ',b
            w[0] += w_plus[0]
            w[1] += w_plus[1]
            b += b_plus
            w_plus,b_plus = self.verifyWB(w,b,p)
        print 'w = ',w,' b = ',b

    # 对所有样本验证w和b的正确性，不正确则返回他们的增长率
    def verifyWB(self,w,b,p):
        w_plus,b_plus = [0,0],0
        for s in self.samples:
            predict = self.result(w,b,s)
            # 预测与实际相同则忽略当前节点
            if predict != int(s['label']):
                coefficient = (p/2) * (int(s['label']) - predict)
                w_plus = [coefficient * float(s['x1']),coefficient*float(s['x2'])]
                b_plus = coefficient
                break
        return w_plus,b_plus

    def result(self,w,b,s):
        c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b
        if c > 0:
            return 1
        return -1
	#!/usr/bin/env/ python
	#coding:utf-8
	"""
	author : royguo1988@gmail.com
	"""
	import random

	class DataPrepare(object):
	"""读取数据文件，分为训练数据和测试数据(80%作为训练，20%作为测试)"""
	def __init__(self, data_file, train_data, test_data):
	self.data_file = open(data_file,'r')
	self.train_data = open(train_data,'w')
	self.test_data = open(test_data,'w')

	def __del__(self):
	self.data_file.close()
	self.train_data.close()
	self.test_data.close()

	def prepare(self):
	line = self.data_file.readline().strip()
	while line:
	if random.random() <= 0.8:
	self.train_data.write(line + '\n')
	else:
	self.test_data.write(line + '\n')
	line = self.data_file.readline().strip()

	class ModelTraining(object):
	def __init__(self,train_data):
	# 样本数据的数组形式
	self.samples = []
	with open(train_data,'r') as f:
	# 从文件中读入样本数据
	line = f.readline().strip()
	while line:
	arr = line.split()
	assert(len(arr) == 3)
	self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]})
	line = f.readline().strip()

	def caculate(self):
	# 设置默认的w和b，这是从R中的分布图中估算出的，也可以任一指定
	w,b = [0.05,0.5],0.05
	# 设定增长率，即每次w、b的调整幅度
	p = 0.001
	# 每次验证后需要调整w和b的话，就重新遍历所有样本
	i = 0
	w_plus,b_plus = self.verifyWB(w,b,p)
	while w_plus !=[0,0] or b_plus != 0:
	i += 1
	print '第 ',i,' 次调整, w = ',w,' b = ',b
	w[0] += w_plus[0]
	w[1] += w_plus[1]
	b += b_plus
	w_plus,b_plus = self.verifyWB(w,b,p)
	print 'w = ',w,' b = ',b
	return w,b

	# 对所有样本验证w和b的正确性，不正确则返回他们的增长率
	def verifyWB(self,w,b,p):
	w_plus,b_plus = [0,0],0
	for s in self.samples:
	predict = self.result(w,b,s)
	# 预测与实际相同则忽略当前节点
	if predict != int(s['label']):
	coefficient = (p/2) * (int(s['label']) - predict)
	w_plus = [coefficient * float(s['x1']),coefficient*float(s['x2'])]
	b_plus = coefficient
	break
	return w_plus,b_plus

	def result(self,w,b,s):
	c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b
	if c > 0:
	return 1
	return -1


	class ModelEvaluate(object):
	"""使用模型计算出来的w和b对测试数据进行测试"""
	def __init__(self, test_data, w, b):
	self.w = w
	self.b = b
	# 样本数据的数组形式
	self.samples = []
	with open(test_data,'r') as f:
	# 从文件中读入样本数据
	line = f.readline().strip()
	while line:
	arr = line.split()
	assert(len(arr) == 3)
	self.samples.append({'label':arr[0],'x1':arr[1],'x2':arr[2]})
	line = f.readline().strip()

	def evaluate(self):
	print '测试样本总是:',len(self.samples)
	correct_count = 0
	for s in self.samples:
	predict = self.result(self.w, self.b, s)
	if predict == int(s['label']):
	correct_count += 1
	print '正确率为：',float(correct_count)/len(self.samples)



	def result(self,w,b,s):
	c = w[0] * float(s['x1']) + w[1] * float(s['x2']) + b
	if c > 0:
	return 1
	return -1

	if __name__ == '__main__':
	dp = DataPrepare('data.txt','train_data.txt','test_data.txt')
	dp.prepare()
	dp = None # 赋值为None以执行__del__函数关闭文件

	mt = ModelTraining('train_data.txt')
	w,b = [0,0],0
	w,b = mt.caculate()

	me = ModelEvaluate('test_data.txt',w,b)
	me.evaluate()
	-1 -0.303844 -0.458237
	-1 -7.727905 -0.384403
	-1 -1.693569 -0.724698
	1 -6.202481 0.580145
	-1 8.869004 -0.343454
	1 -5.391438 0.591225
	1 7.559267 0.385052
	-1 7.263065 -0.239257
	-1 -3.519450 -0.989159
	-1 -9.948223 -0.421481
	-1 -7.707347 -0.895640
	-1 1.238391 -0.757743
	1 7.213038 0.198066
	1 -4.405302 0.414567
	-1 -4.246700 0.057275
	-1 7.229273 -0.632374
	1 8.380911 0.210671
	-1 -2.509012 -0.520105
	1 3.491775 0.275496
	1 6.276548 1.063988
	-1 0.755912 -0.225401
	-1 -9.992536 -0.522414
	-1 -9.495737 -0.027652
	1 10.545063 0.394088
	-1 0.848230 -0.356873
	1 6.588944 0.498700
	1 -0.926625 0.220477
	1 7.022405 0.376469
	1 -2.220649 0.406389
	-1 -0.699247 -0.733574
	1 1.406911 0.550811
	-1 6.251736 -0.859889
	-1 9.100554 -0.297695
	-1 -9.118529 -0.454069
	-1 -6.245038 -0.472838
	-1 -1.417224 -0.322209
	1 1.408517 0.377613
	-1 6.244810 -0.703489
	1 -8.633542 0.546162
	-1 -3.936660 -0.047634