Last active
April 23, 2018 15:57
-
-
Save sylvainma/946ac9a46a4cd1d0f42ac1ff7fc11c55 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(MASS) | |
distXY <- function(X, Y, M=diag(dim(X)[2])) | |
{ | |
if (!is.matrix(X)) | |
{ | |
X <- matrix(X, nrow=1) | |
} | |
if (!is.matrix(Y)) | |
{ | |
Y <- matrix(Y, nrow=1) | |
} | |
nx <- dim(X)[1] | |
ny <- dim(Y)[1] | |
h.x <- rowSums((X%*%t(chol(M)))^2) | |
h.y <- rowSums((Y%*%t(chol(M)))^2) | |
ones.x <- rep(1, nx ) | |
ones.y <- rep(1, ny) | |
D2xy <- h.x %*% t(ones.y) - 2 * X %*% M %*% t(Y) + ones.x %*% t(h.y) | |
} | |
kmeans_adapt <- function(X, K, rho=seq(from=1, to=1, length.out=K), n_iter=100, n_ess=1, eps=1e-5, min_det=1e-7) { | |
p <- ncol(X) | |
n <- nrow(X) | |
# Variables qui gardent les meilleurs résultats au cours des essais | |
J_opt <- NULL | |
V_opt <- NULL | |
mu_opt <- NULL | |
cluster_opt <- NULL | |
i_opt <- NULL | |
for (ess in 1:n_ess) { | |
# Initialisation de mu et V | |
mu <- X[sample(nrow(X), size=K),, drop=FALSE] | |
V <- array(diag(p), dim=c(p, p, K)) | |
for (k in 1:K) { | |
V[,, k] <- rho[k]^(-1/p) * V[,, k] | |
} | |
mu.prec <- NULL | |
delta <- Inf | |
cluster <- NULL | |
i <- 0 | |
while (delta > eps && i <= n_iter) { | |
# Mise à jour des paramètres | |
# Partition | |
distances <- matrix(9, ncol=K, nrow=n) | |
for (k in 1:K) { | |
distances[, k] <- distXY(X, mu[k,], solve(V[,, k])) | |
# DEBUG: distances négatives | |
if (sum(distances[, k] < 0)) { | |
print(distances[(distances[, k] < 0), k]) | |
} | |
distances[(distances[, k] < 0), k] <- 0 # correction des distances négatives | |
distances[, k] <- sqrt(distances[, k]) | |
} | |
cluster <- apply(distances, 1, which.min) | |
# Centres et matrices de covariance | |
mu.prec <- mu | |
J <- seq(0, 0, length.out=K) | |
for (k in 1:K) { | |
# Individus de la classe courante | |
P_k <- X[(cluster == k),, drop=FALSE] | |
# Nouveau centre | |
mu[k,] <- colMeans(P_k) | |
# Nouvelle matrice de covariance | |
V[,, k] <- cov(P_k) | |
# Normalisation de la matrice de covariance | |
determinant <- det(V[,, k]) | |
if (is.nan(determinant) || is.na(determinant) || determinant < min_det) { | |
warning("Production de NaN / NA\n") | |
V[,, k] <- rho[k]^(-1/p) * diag(p) | |
} else { | |
V[,, k] <- (rho[k] * determinant)^(-1/p) * V[,, k] | |
} | |
# Somme des distances au centre de la classe courante | |
J[k] <- sum(distances[(cluster == k), k, drop=FALSE]^2) | |
} | |
# Somme totale des distances aux centres des classes | |
J <- sum(J) | |
delta <- sum((mu - mu.prec)^2) | |
i <- i + 1 | |
} | |
if (is.null(J_opt) || J < J_opt) { | |
J_opt <- J | |
V_opt <- V | |
mu_opt <- mu | |
cluster_opt <- cluster | |
i_opt <- i | |
} | |
} | |
list(J=J_opt, iterations=i_opt, cluster=cluster_opt, mu=mu_opt, V=V_opt) | |
} | |
data(iris) | |
X <- iris[,1:4] | |
z <- iris[,5] | |
set.seed(5) | |
iris.kmeans_adapt <- kmeans_adapt(as.matrix(X), 1, n_ess=10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment