Skip to content

Instantly share code, notes, and snippets.

@sylvainma
Last active April 23, 2018 15:57
Show Gist options
  • Save sylvainma/946ac9a46a4cd1d0f42ac1ff7fc11c55 to your computer and use it in GitHub Desktop.
Save sylvainma/946ac9a46a4cd1d0f42ac1ff7fc11c55 to your computer and use it in GitHub Desktop.
library(MASS)
distXY <- function(X, Y, M=diag(dim(X)[2]))
{
if (!is.matrix(X))
{
X <- matrix(X, nrow=1)
}
if (!is.matrix(Y))
{
Y <- matrix(Y, nrow=1)
}
nx <- dim(X)[1]
ny <- dim(Y)[1]
h.x <- rowSums((X%*%t(chol(M)))^2)
h.y <- rowSums((Y%*%t(chol(M)))^2)
ones.x <- rep(1, nx )
ones.y <- rep(1, ny)
D2xy <- h.x %*% t(ones.y) - 2 * X %*% M %*% t(Y) + ones.x %*% t(h.y)
}
kmeans_adapt <- function(X, K, rho=seq(from=1, to=1, length.out=K), n_iter=100, n_ess=1, eps=1e-5, min_det=1e-7) {
p <- ncol(X)
n <- nrow(X)
# Variables qui gardent les meilleurs résultats au cours des essais
J_opt <- NULL
V_opt <- NULL
mu_opt <- NULL
cluster_opt <- NULL
i_opt <- NULL
for (ess in 1:n_ess) {
# Initialisation de mu et V
mu <- X[sample(nrow(X), size=K),, drop=FALSE]
V <- array(diag(p), dim=c(p, p, K))
for (k in 1:K) {
V[,, k] <- rho[k]^(-1/p) * V[,, k]
}
mu.prec <- NULL
delta <- Inf
cluster <- NULL
i <- 0
while (delta > eps && i <= n_iter) {
# Mise à jour des paramètres
# Partition
distances <- matrix(9, ncol=K, nrow=n)
for (k in 1:K) {
distances[, k] <- distXY(X, mu[k,], solve(V[,, k]))
# DEBUG: distances négatives
if (sum(distances[, k] < 0)) {
print(distances[(distances[, k] < 0), k])
}
distances[(distances[, k] < 0), k] <- 0 # correction des distances négatives
distances[, k] <- sqrt(distances[, k])
}
cluster <- apply(distances, 1, which.min)
# Centres et matrices de covariance
mu.prec <- mu
J <- seq(0, 0, length.out=K)
for (k in 1:K) {
# Individus de la classe courante
P_k <- X[(cluster == k),, drop=FALSE]
# Nouveau centre
mu[k,] <- colMeans(P_k)
# Nouvelle matrice de covariance
V[,, k] <- cov(P_k)
# Normalisation de la matrice de covariance
determinant <- det(V[,, k])
if (is.nan(determinant) || is.na(determinant) || determinant < min_det) {
warning("Production de NaN / NA\n")
V[,, k] <- rho[k]^(-1/p) * diag(p)
} else {
V[,, k] <- (rho[k] * determinant)^(-1/p) * V[,, k]
}
# Somme des distances au centre de la classe courante
J[k] <- sum(distances[(cluster == k), k, drop=FALSE]^2)
}
# Somme totale des distances aux centres des classes
J <- sum(J)
delta <- sum((mu - mu.prec)^2)
i <- i + 1
}
if (is.null(J_opt) || J < J_opt) {
J_opt <- J
V_opt <- V
mu_opt <- mu
cluster_opt <- cluster
i_opt <- i
}
}
list(J=J_opt, iterations=i_opt, cluster=cluster_opt, mu=mu_opt, V=V_opt)
}
data(iris)
X <- iris[,1:4]
z <- iris[,5]
set.seed(5)
iris.kmeans_adapt <- kmeans_adapt(as.matrix(X), 1, n_ess=10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment