dongguosheng/lr.py

## lr.py
# -*- coding: utf-8 -*-

'''
Feature vectors are column vectors.
X, n * m, n -> feature dim, m -> sample number
Y, 1 * m, row vector
theta/weight/beta, n * 1, column vector
gradient, n * 1, column vector

gradient = X * (Y - sigmoid(theta' * X))'
heissen = X * (X' * sigmoid(theta' * X) * (1 - sigmoid(theta' * X)))

cost function:
    log-likelihood(theta) = Y * log(Pr(X; theta)) + (1 - Y) * log(1 - Pr(X; theta))
    Pr(X; theta) = sigmoid(theta' * X)

Regularization to be added.

'''

import numpy as np
from scipy import optimize as opt

def sigmoid_derive(X):
    return X * (1 - X)

def sigmoid(X):
    return 1.0 / (1 + np.exp(- X))

def gradient(theta, X, y):
    y_predict = sigmoid(np.dot(theta.transpose(), X))
    grad = np.dot(X, (y - y_predict).transpose())
    return grad

def heissen(theta, X):
    # cal heissen
    y_predict = sigmoid(np.dot(theta.transpose(), X))
    right = sigmoid_derive(y_predict)
    heis = np.dot(X, X.transpose() * right.transpose())
    return heis

def gradient_for_scipy(theta, X, y):
    # TODO:
    pass

def cost(theta, X, y):
    '''
    Maximum Likelihood Estimate, MLE(极大似然估计).
    '''
    h = sigmoid(np.dot(theta.transpose(), X))
    c = -y * np.log(h) - (1 - y) * np.log(1 - h)
    return np.sum(c)

def train(X, y, method='g'):
    # append intercept term
    intercept = np.ones((1, X.shape[1]))
    X_ = np.append(intercept, X, axis=0)
    # init theta, column vector
    theta = 0.01 * np.array([np.random.randn(X_.shape[0])]).transpose()
    print 'Theta Init: '
    print theta
    if method == 'g':
        return gradient_descent(theta, X_, y, alpha=0.01, epsilon=0.01, max_iter=1500)
    elif method == 'h':
        return newton_method(theta, X_, y, alpha=0.01, epsilon=0.001, max_iter=1500)
    else:
        return opt.fmin_bfgs(cost, theta, fprime=gradient_for_scipy, args=(X_, y))

def gradient_descent(theta, X_, y, alpha=0.01, epsilon=0.01, max_iter=1500):
    '''
    Simple Gradient Descent Impletation.
    '''
    grad = gradient(theta, X_, y)
    cost_old = cost(theta, X_, y)
    theta = update_theta(grad, theta, alpha)
    cnt = 1
    for i in range(max_iter):
        grad = gradient(theta, X_, y)
        theta = update_theta(grad, theta, alpha)
        cost_new = cost(theta, X_, y)
        cnt += 1
        print 'iter %d, cost: %f' % (cnt, cost_new)
        if abs(cost_new - cost_old) < epsilon:
            break
        else:
            cost_old = cost_new
    return theta

def newton_method(theta, X_, y, alpha=0.01, epsilon=0.01, max_iter=1500):
    '''
    Simple Newton Method Impletation.
    '''
    grad = gradient(theta, X_, y)
    heis = heissen(theta, X_)
    cost_old = cost(theta, X_, y)
    theta = update_theta(grad, theta, np.asmatrix(heis).getI())
    cnt = 1
    for i in range(max_iter):
        grad = gradient(theta, X_, y)
        heis = heissen(theta, X_)
        theta = update_theta(grad, theta, np.asmatrix(heis).getI())
        cost_new = cost(theta, X_, y)
        cnt += 1
        print 'iter %d, cost: %f' % (cnt, cost_new)
        if abs(cost_new - cost_old) < epsilon:
            break
        else:
            cost_old = cost_new
    return theta

def update_theta(gradient, theta, alpha):
    theta += alpha * gradient
    return theta

def predict(X, theta):
    return sigmoid(np.dot(theta.transpose(), X))

def load_dataset(input_file, dim):
    '''
    input format: LIBLINEAR input format.
    '''
    X = ''
    label = []
    with open(input_file) as f:
        for line in f:
            tmp_list = line.rstrip().split()
            if tmp_list[0] == '+1':
                label.append(1.0)
            else:
                label.append(0.0)
            feature_list = []
            j = 1
            for i in range(dim):
                index, val = tmp_list[j].split(':')
                if int(index) == i + 1:
                    feature_list.append(float(val))
                    j += 1
                else:
                    feature_list.append(.0)
            assert len(feature_list) == dim
            if isinstance(X, str) and X == '':
                X = np.array([feature_list])
            else:
                X = np.append(X, [feature_list], axis=0)

    return (X.transpose(), np.array([label]))

def main():
    X, y = load_dataset('./heart_scale', 13)
    method = 'h'    # g for gradient descent, h for newton method, else for scipy bfgs
    theta = train(X, y, method=method)
    print theta
    intercept = np.ones((1, X.shape[1]))
    X_ = np.append(intercept, X, axis=0)
    prob = predict(X_, theta)
    # print prob.flatten()
    predict_label = [1 if p > 0.5 else 0 for p in prob.flatten()]
    # print predict_label
    # print y.flatten()
    print 'Precision: {precision}%'.format(precision=sum(1 if i == j else 0 for i, j in zip(y.flatten(), predict_label)) / 270.0 * 100)

if __name__ == '__main__':
    main()
	# -- coding: utf-8 --

	'''
	Feature vectors are column vectors.
	X, n * m, n -> feature dim, m -> sample number
	Y, 1 * m, row vector
	theta/weight/beta, n * 1, column vector
	gradient, n * 1, column vector

	gradient = X * (Y - sigmoid(theta' * X))'
	heissen = X * (X' * sigmoid(theta' * X) * (1 - sigmoid(theta' * X)))

	cost function:
	log-likelihood(theta) = Y * log(Pr(X; theta)) + (1 - Y) * log(1 - Pr(X; theta))
	Pr(X; theta) = sigmoid(theta' * X)

	Regularization to be added.

	'''

	import numpy as np
	from scipy import optimize as opt

	def sigmoid_derive(X):
	return X * (1 - X)

	def sigmoid(X):
	return 1.0 / (1 + np.exp(- X))

	def gradient(theta, X, y):
	y_predict = sigmoid(np.dot(theta.transpose(), X))
	grad = np.dot(X, (y - y_predict).transpose())
	return grad

	def heissen(theta, X):
	# cal heissen
	y_predict = sigmoid(np.dot(theta.transpose(), X))
	right = sigmoid_derive(y_predict)
	heis = np.dot(X, X.transpose() * right.transpose())
	return heis

	def gradient_for_scipy(theta, X, y):
	# TODO:
	pass

	def cost(theta, X, y):
	'''
	Maximum Likelihood Estimate, MLE(极大似然估计).
	'''
	h = sigmoid(np.dot(theta.transpose(), X))
	c = -y * np.log(h) - (1 - y) * np.log(1 - h)
	return np.sum(c)

	def train(X, y, method='g'):
	# append intercept term
	intercept = np.ones((1, X.shape[1]))
	X_ = np.append(intercept, X, axis=0)
	# init theta, column vector
	theta = 0.01 * np.array([np.random.randn(X_.shape[0])]).transpose()
	print 'Theta Init: '
	print theta
	if method == 'g':
	return gradient_descent(theta, X_, y, alpha=0.01, epsilon=0.01, max_iter=1500)
	elif method == 'h':
	return newton_method(theta, X_, y, alpha=0.01, epsilon=0.001, max_iter=1500)
	else:
	return opt.fmin_bfgs(cost, theta, fprime=gradient_for_scipy, args=(X_, y))

	def gradient_descent(theta, X_, y, alpha=0.01, epsilon=0.01, max_iter=1500):
	'''
	Simple Gradient Descent Impletation.
	'''
	grad = gradient(theta, X_, y)
	cost_old = cost(theta, X_, y)
	theta = update_theta(grad, theta, alpha)
	cnt = 1
	for i in range(max_iter):
	grad = gradient(theta, X_, y)
	theta = update_theta(grad, theta, alpha)
	cost_new = cost(theta, X_, y)
	cnt += 1
	print 'iter %d, cost: %f' % (cnt, cost_new)
	if abs(cost_new - cost_old) < epsilon:
	break
	else:
	cost_old = cost_new
	return theta

	def newton_method(theta, X_, y, alpha=0.01, epsilon=0.01, max_iter=1500):
	'''
	Simple Newton Method Impletation.
	'''
	grad = gradient(theta, X_, y)
	heis = heissen(theta, X_)
	cost_old = cost(theta, X_, y)
	theta = update_theta(grad, theta, np.asmatrix(heis).getI())
	cnt = 1
	for i in range(max_iter):
	grad = gradient(theta, X_, y)
	heis = heissen(theta, X_)
	theta = update_theta(grad, theta, np.asmatrix(heis).getI())
	cost_new = cost(theta, X_, y)
	cnt += 1
	print 'iter %d, cost: %f' % (cnt, cost_new)
	if abs(cost_new - cost_old) < epsilon:
	break
	else:
	cost_old = cost_new
	return theta

	def update_theta(gradient, theta, alpha):
	theta += alpha * gradient
	return theta

	def predict(X, theta):
	return sigmoid(np.dot(theta.transpose(), X))

	def load_dataset(input_file, dim):
	'''
	input format: LIBLINEAR input format.
	'''
	X = ''
	label = []
	with open(input_file) as f:
	for line in f:
	tmp_list = line.rstrip().split()
	if tmp_list[0] == '+1':
	label.append(1.0)
	else:
	label.append(0.0)
	feature_list = []
	j = 1
	for i in range(dim):
	index, val = tmp_list[j].split(':')
	if int(index) == i + 1:
	feature_list.append(float(val))
	j += 1
	else:
	feature_list.append(.0)
	assert len(feature_list) == dim
	if isinstance(X, str) and X == '':
	X = np.array([feature_list])
	else:
	X = np.append(X, [feature_list], axis=0)

	return (X.transpose(), np.array([label]))

	def main():
	X, y = load_dataset('./heart_scale', 13)
	method = 'h' # g for gradient descent, h for newton method, else for scipy bfgs
	theta = train(X, y, method=method)
	print theta
	intercept = np.ones((1, X.shape[1]))
	X_ = np.append(intercept, X, axis=0)
	prob = predict(X_, theta)
	# print prob.flatten()
	predict_label = [1 if p > 0.5 else 0 for p in prob.flatten()]
	# print predict_label
	# print y.flatten()
	print 'Precision: {precision}%'.format(precision=sum(1 if i == j else 0 for i, j in zip(y.flatten(), predict_label)) / 270.0 * 100)

	if __name__ == '__main__':
	main()