Last active
February 29, 2016 13:05
-
-
Save dongguosheng/63a9c336ce903e23030d to your computer and use it in GitHub Desktop.
a simple logistic regression implementation, test on heart_scale dataset.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
''' | |
Feature vectors are column vectors. | |
X, n * m, n -> feature dim, m -> sample number | |
Y, 1 * m, row vector | |
theta/weight/beta, n * 1, column vector | |
gradient, n * 1, column vector | |
gradient = X * (Y - sigmoid(theta' * X))' | |
heissen = X * (X' * sigmoid(theta' * X) * (1 - sigmoid(theta' * X))) | |
cost function: | |
log-likelihood(theta) = Y * log(Pr(X; theta)) + (1 - Y) * log(1 - Pr(X; theta)) | |
Pr(X; theta) = sigmoid(theta' * X) | |
Regularization to be added. | |
''' | |
import numpy as np | |
from scipy import optimize as opt | |
def sigmoid_derive(X): | |
return X * (1 - X) | |
def sigmoid(X): | |
return 1.0 / (1 + np.exp(- X)) | |
def gradient(theta, X, y): | |
y_predict = sigmoid(np.dot(theta.transpose(), X)) | |
grad = np.dot(X, (y - y_predict).transpose()) | |
return grad | |
def heissen(theta, X): | |
# cal heissen | |
y_predict = sigmoid(np.dot(theta.transpose(), X)) | |
right = sigmoid_derive(y_predict) | |
heis = np.dot(X, X.transpose() * right.transpose()) | |
return heis | |
def gradient_for_scipy(theta, X, y): | |
# TODO: | |
pass | |
def cost(theta, X, y): | |
''' | |
Maximum Likelihood Estimate, MLE(极大似然估计). | |
''' | |
h = sigmoid(np.dot(theta.transpose(), X)) | |
c = -y * np.log(h) - (1 - y) * np.log(1 - h) | |
return np.sum(c) | |
def train(X, y, method='g'): | |
# append intercept term | |
intercept = np.ones((1, X.shape[1])) | |
X_ = np.append(intercept, X, axis=0) | |
# init theta, column vector | |
theta = 0.01 * np.array([np.random.randn(X_.shape[0])]).transpose() | |
print 'Theta Init: ' | |
print theta | |
if method == 'g': | |
return gradient_descent(theta, X_, y, alpha=0.01, epsilon=0.01, max_iter=1500) | |
elif method == 'h': | |
return newton_method(theta, X_, y, alpha=0.01, epsilon=0.001, max_iter=1500) | |
else: | |
return opt.fmin_bfgs(cost, theta, fprime=gradient_for_scipy, args=(X_, y)) | |
def gradient_descent(theta, X_, y, alpha=0.01, epsilon=0.01, max_iter=1500): | |
''' | |
Simple Gradient Descent Impletation. | |
''' | |
grad = gradient(theta, X_, y) | |
cost_old = cost(theta, X_, y) | |
theta = update_theta(grad, theta, alpha) | |
cnt = 1 | |
for i in range(max_iter): | |
grad = gradient(theta, X_, y) | |
theta = update_theta(grad, theta, alpha) | |
cost_new = cost(theta, X_, y) | |
cnt += 1 | |
print 'iter %d, cost: %f' % (cnt, cost_new) | |
if abs(cost_new - cost_old) < epsilon: | |
break | |
else: | |
cost_old = cost_new | |
return theta | |
def newton_method(theta, X_, y, alpha=0.01, epsilon=0.01, max_iter=1500): | |
''' | |
Simple Newton Method Impletation. | |
''' | |
grad = gradient(theta, X_, y) | |
heis = heissen(theta, X_) | |
cost_old = cost(theta, X_, y) | |
theta = update_theta(grad, theta, np.asmatrix(heis).getI()) | |
cnt = 1 | |
for i in range(max_iter): | |
grad = gradient(theta, X_, y) | |
heis = heissen(theta, X_) | |
theta = update_theta(grad, theta, np.asmatrix(heis).getI()) | |
cost_new = cost(theta, X_, y) | |
cnt += 1 | |
print 'iter %d, cost: %f' % (cnt, cost_new) | |
if abs(cost_new - cost_old) < epsilon: | |
break | |
else: | |
cost_old = cost_new | |
return theta | |
def update_theta(gradient, theta, alpha): | |
theta += alpha * gradient | |
return theta | |
def predict(X, theta): | |
return sigmoid(np.dot(theta.transpose(), X)) | |
def load_dataset(input_file, dim): | |
''' | |
input format: LIBLINEAR input format. | |
''' | |
X = '' | |
label = [] | |
with open(input_file) as f: | |
for line in f: | |
tmp_list = line.rstrip().split() | |
if tmp_list[0] == '+1': | |
label.append(1.0) | |
else: | |
label.append(0.0) | |
feature_list = [] | |
j = 1 | |
for i in range(dim): | |
index, val = tmp_list[j].split(':') | |
if int(index) == i + 1: | |
feature_list.append(float(val)) | |
j += 1 | |
else: | |
feature_list.append(.0) | |
assert len(feature_list) == dim | |
if isinstance(X, str) and X == '': | |
X = np.array([feature_list]) | |
else: | |
X = np.append(X, [feature_list], axis=0) | |
return (X.transpose(), np.array([label])) | |
def main(): | |
X, y = load_dataset('./heart_scale', 13) | |
method = 'h' # g for gradient descent, h for newton method, else for scipy bfgs | |
theta = train(X, y, method=method) | |
print theta | |
intercept = np.ones((1, X.shape[1])) | |
X_ = np.append(intercept, X, axis=0) | |
prob = predict(X_, theta) | |
# print prob.flatten() | |
predict_label = [1 if p > 0.5 else 0 for p in prob.flatten()] | |
# print predict_label | |
# print y.flatten() | |
print 'Precision: {precision}%'.format(precision=sum(1 if i == j else 0 for i, j in zip(y.flatten(), predict_label)) / 270.0 * 100) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment