Last active
March 17, 2016 01:38
-
-
Save laughing/e874813a34d9b50ce52f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
from collections import defaultdict | |
class LR_SGD(object): | |
def __init__(self, eta=1): | |
self.w = defaultdict(float) | |
self.eta = eta | |
self.t = 0 | |
def sign(self, z): | |
if z >= 0: | |
return 1 | |
else: | |
return -1 | |
def sigmoid(self, z): | |
if z > 20: | |
return 1 | |
elif z < -20: | |
return 0 | |
else: | |
return 1.0 / (1 + math.exp(-z)) | |
def predict(self, x): | |
s = sum([self.w[k] * v for k, v in x.items()]) | |
return self.sigmoid(s) | |
def train(self, y, x): | |
self.t += 1 | |
py = self.predict(x) | |
error = py - y | |
eta = self.eta / (1.0 + math.log(self.t)) | |
for k, v in x.items(): | |
self.w[k] -= eta * error * v | |
class LR_RDA_ADAGRAD(LR_SGD): | |
def __init__(self, eta=1, l=1E-2): | |
super(LR_RDA_ADAGRAD, self).__init__(eta) | |
self.G = defaultdict(float) | |
self.u = defaultdict(float) | |
self.l = l | |
def train(self, y, x): | |
self.t += 1 | |
py = self.predict(x) | |
error = py - y | |
for k, v in x.items(): | |
g = error * v | |
self.G[k] += g * g | |
self.u[k] += g | |
eta = self.eta / math.sqrt(self.G[k]) | |
u = math.fabs(self.u[k]) / self.t | |
if u <= self.l: | |
self.w[k] = 0 | |
else: | |
self.w[k] = -self.sign(self.u[k]) * eta * self.t * (u - self.l) | |
class LR_SGD_ADADELTA(LR_SGD): | |
def __init__(self, rho=0.99, epsilon=1E-6): | |
self.rho = rho | |
self.epsilon = epsilon | |
self.w = defaultdict(float) | |
self.Eg = defaultdict(float) | |
self.Edx = defaultdict(float) | |
def rms(self, z): | |
return math.sqrt(z + self.epsilon) | |
def train(self, y, x): | |
py = self.predict(x) | |
error = py - y | |
for k, v in x.items(): | |
g = error * v | |
self.Eg[k] = self.rho * self.Eg[k] + (1 - self.rho) * g * g | |
dx = -self.rms(self.Edx[k]) / self.rms(self.Eg[k]) * g | |
self.w[k] += dx | |
self.Edx[k] = self.rho * self.Edx[k] + (1 - self.rho) * dx * dx | |
class LR_FTRL_Proximal(LR_SGD): | |
def __init__(self, l1=1, l2=0, alpha=1, beta=1): | |
super(LR_FTRL_Proximal, self).__init__(0) | |
self.l1 = l1 | |
self.l2 = l2 | |
self.alpha = alpha | |
self.beta = beta | |
self.z = defaultdict(float) | |
self.n = defaultdict(float) | |
def train(self, y, x): | |
py = self.predict(x) | |
error = py - y | |
for k, v in x.items(): | |
g = error * v | |
gg = g * g | |
o = (math.sqrt(self.n[k] + gg) - math.sqrt(self.n[k])) / self.alpha | |
self.z[k] += g - o * self.w[k] | |
self.n[k] += gg | |
if math.fabs(self.z[k]) <= self.l1: | |
self.w[k] = 0 | |
else: | |
self.w[k] = -(self.z[k] - self.sign(self.z[k]) * self.l1) / ((self.beta + math.sqrt(self.n[k])) / self.alpha + self.l2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import math | |
import sgd | |
_, train_path, test_path = sys.argv | |
def parse(line): | |
d = {} | |
a = line.split(" ") | |
label = int(a.pop(0)) | |
if label < 0: | |
label = 0 | |
s = 0 | |
for x in a: | |
xx = x.split(":") | |
v = float(xx[1]) | |
d[int(xx[0])] = v | |
s += v*v | |
s = math.sqrt(s) | |
d = dict([(k, v / s) for k, v in d.items()]) | |
return (label, d) | |
clf = sgd.LR_SGD_ADADELTA() | |
i = 1 | |
for line in open(train_path): | |
line = line.strip() | |
y, x = parse(line) | |
clf.train(y, x) | |
i += 1 | |
n = 0 | |
t = 0 | |
for line in open(test_path): | |
line = line.strip() | |
y, x = parse(line) | |
py = 1 if clf.predict(x) > 0.5 else 0 | |
if y == py: | |
t += 1 | |
n+=1 | |
acc = float(t) / n | |
print "%0.4f" % acc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment