Created
January 22, 2019 15:32
-
-
Save MitI-7/8401202c9b99c38ab1a078708ed7e903 to your computer and use it in GitHub Desktop.
FactorizationMachines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from math import exp, log | |
def sigmoid(x): | |
return 1 / (1 + exp(-x)) | |
class FactorizationMachines: | |
def __init__(self, epoch: int=1000, k: int=3, eta: float=0.01, seed: int=0): | |
np.random.seed(seed) | |
self.n = None | |
self.epoch = epoch | |
self.k = k | |
self.lamb = 0.01 | |
self.eta = eta | |
self.sigma = 0.001 | |
self.w0 = 0.0 | |
self.w = None | |
self.V = None | |
def fit(self, X: np.array, Y: np.array, verbose=True): | |
self.n = X.shape[1] | |
self.w = np.zeros(self.n, np.float64) | |
self.V = self.sigma * np.array(np.random.randn(self.n, self.k)) | |
for epoch in range(self.epoch): | |
for x, y in zip(X, Y): | |
assert y in (-1, 1) | |
self._update(x=x, y=y, p=self._predict(x)) | |
if verbose: | |
print("epoch:{0} log loss={1}".format(epoch, self.test(X, Y))) | |
return self | |
def predict(self, X: np.array) -> np.array: | |
return np.array([self._predict(x) for x in X]) | |
def test(self, X: np.array, Y: np.array) -> float: | |
return np.mean([-log(sigmoid(y * p)) for p, y in zip(self.predict(X), Y)]) | |
def _predict(self, x: np.array) -> float: | |
wx = np.dot(self.w, x) | |
vx = np.zeros((self.k,), dtype=np.float64) | |
v2x2 = np.zeros((self.k,), dtype=np.float64) | |
for f in range(self.k): | |
vx[f] = np.dot(self.V[:, f], x) | |
for i in range(len(x)): | |
for f in range(self.k): | |
v2x2[f] += (self.V[i, f] ** 2) * (x[i] ** 2) | |
c = sum((vx[f] ** 2 - v2x2[f]) for f in range(self.k)) | |
return self.w0 + wx + 0.5 * c | |
def _update(self, x, y, p) -> None: | |
vx = [np.dot(self.V[:, f], x) for f in range(self.k)] | |
delta = y * (sigmoid(y * p) - 1.0) | |
self.w0 -= self.eta * (delta + 2 * self.lamb * self.w0) | |
for i in range(len(x)): | |
self.w[i] -= self.eta * (delta * x[i] + 2 * self.lamb * self.w[i]) | |
for f in range(self.k): | |
h = x[i] * (vx[f] - x[i] * self.V[i, f]) | |
self.V[i, f] -= self.eta * (delta * h + 2 * self.lamb * self.V[i, f]) | |
def main(): | |
from sklearn.datasets import load_breast_cancer | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import confusion_matrix | |
from sklearn.preprocessing import StandardScaler | |
data = load_breast_cancer() | |
X, y = data["data"], data["target"] | |
X_train, X_test, y_train, y_test = train_test_split(X, y) | |
y_train = [-1 if y == 0 else 1 for y in y_train] | |
y_test = [-1 if y == 0 else 1 for y in y_test] | |
sc = StandardScaler() | |
sc.fit(X_train) | |
X_train = sc.transform(X_train) | |
X_test = sc.transform(X_test) | |
model = FactorizationMachines(epoch=100) | |
model.fit(X_train, y_train) | |
y_pred = model.predict(X_test) | |
print(confusion_matrix(y_test, [1 if p > 0.5 else -1 for p in y_pred])) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment