Skip to content

Instantly share code, notes, and snippets.

@mathieubray
Created August 4, 2017 12:57
Show Gist options
  • Save mathieubray/d83ce9c13fcb60f723f957c13ad85ac5 to your computer and use it in GitHub Desktop.
Save mathieubray/d83ce9c13fcb60f723f957c13ad85ac5 to your computer and use it in GitHub Desktop.
Tensor Decompositions as applied to Simulated KPD Data
library(dplyr)
library(rTensor)
library(igraph)
library(sna)
library(ggplot2)
library(tidyr)
# Alpha NTF. Algorithm 1. Flatz 2013
# Input: Non-negative N-way tensor Y and rank J
# Output: Component matrices A, objective values at each iteration
alphaNTF <- function(Y, A, J, alpha=2, tol=1e-5, max_k=10000){
N <- Y@num_modes
normalize.component <- function(Q){
R <- t(rep(1,nrow(Q))) %*% Q %>%
as.vector %>%
diag %>%
solve
return(Q %*% R)
}
# Normalize to unit length
A.l <- A %>% map(normalize.component)
A[1:(N-1)] <- A.l[1:(N-1)]
k <- 0
diff <- 1
prev.diff <- 1
objs <- rep(0,max_k)
while (k < max_k & diff > tol & prev.diff > tol){
k <- k + 1
Y.hat <- A
for(n in 1:N){
Y.hat.n <- Y.hat[[n]] %*% t(khatri_rao_list(Y.hat[-n],reverse=TRUE))
A[[n]] <- A[[n]] * ((k_unfold(Y,n)@data / Y.hat.n)^alpha %*% khatri_rao_list(A.l[-n],reverse=TRUE))^(1/alpha)
A.l[[n]] <- A[[n]] %>% normalize.component
if (n != N){
A[[n]] <- A.l[[n]]
}
}
diff <- norm(k_unfold(Y,1)@data - A[[1]] %*% t(khatri_rao_list(A[-1],reverse=TRUE)),"F")
objs[k] <- diff
if (k >= 2){
prev.diff <- abs(objs[k] - objs[k-1])
}
}
return(list(A=A, objs=objs[1:k]))
}
# FAST HALS NTF. Algorithm 4. Flatz 2013
# Input: Non-negative N-way tensor Y and rank J
# Output: N component matrices A
fastHALS <- function(Y, A, J, tol=1e-5, max_k=10000){
N <- Y@num_modes
I <- array(rep(0,J^3),dim=c(J,J,J)) %>% as.tensor
for (j in 1:J){
I[j,j,j] <- 1
}
normalize.matrix.columns <- function(Q){
return(apply(Q, 2, function(x){ x/sqrt(sum(x^2)) }))
}
#Normalize
A.l <- A %>% map(normalize.matrix.columns)
A[1:(N-1)] <- A.l[1:(N-1)]
k <- 0
diff <- 1
prev.diff <- 1
objs <- rep(0,max_k)
T.1 <- map(A, function(X) { return(t(X) %*% X) }) %>% hadamard_list
while (k <= max_k & diff > tol & prev.diff > tol){
k <- k+1
gamma <- t(A[[N]]) %*% A[[N]] %>% diag
for(n in 1:N){
if (n == N){
gamma <- rep(1,J)
}
T.2 <- k_unfold(Y,n)@data %*% khatri_rao_list(A[-n],reverse=TRUE)
T.3 <- T.1 / (t(A[[n]]) %*% A[[n]])
for(j in 1:J){
b <- A[[n]][,j] * gamma[j] + T.2[,j] - A[[n]] %*% T.3[,j]
b <- ifelse(b>0,b,0)
if (n != N){
b <- b %>% normalize.matrix.columns
}
A[[n]][,j] <- b
}
T.1 <- T.3 *(t(A[[n]]) %*% A[[n]])
}
Y.hat <- I %>% ttl(A,ms=c(1,2,3))
diff <- fnorm(Y - Y.hat)
objs[k] <- diff
if (k >= 2){
prev.diff <- abs(objs[k] - objs[k-1])
}
}
return(list(A=A, objs=objs[1:k]))
}
# Generate Adjacency Tensor
set.seed(90707)
per_mr <- 4 # Number of Nodes Arriving between each Match Run
num_mrs <- 25 # Number of Match Runs
num_nodes <- per_mr * num_mrs # Number of Total Nodes
prob_type <- c(0.4,0.1,0.1,0.4)
prob_connect <- matrix(c(0.5,0.5,0.25,0.25,0.25,0.25,0.05,0.05,0.5,0.5,0.25,0.25,0.25,0.25,0.05,0.05),nrow=4)
node_types <- sample(1:length(prob_type),num_nodes,replace=T,prob=prob_type) # Randomly Assign Type to Each Node
crossmatch_prob <- c(0.9,0.75,0.75,0.5) # Probability of Connection Remaining After Evaluation
matrix.list <- array(0,dim=c(num_nodes,num_nodes,num_mrs)) # Initialize a Matrix List
old.matrix <- matrix(0,nrow=num_nodes,ncol=num_nodes) # Initialize First Matrix
for (it in 1:num_mrs){
new.matrix <- old.matrix
# Renege
if (it > 1){
for (i in 1:(per_mr*(it-1))){
for (j in 1:(per_mr*(it-1))){
if (new.matrix[i,j] == 1){
q <- rbinom(1,1,crossmatch_prob[node_types[j]])
new.matrix[i,j] <- q
}
}
}
}
# New Connections
for (i in 1:(per_mr*it)){
for (j in 1:(per_mr*it)){
if (i != j & !(i <= per_mr*(it-1) & j <= per_mr*(it-1))){
q <- rbinom(1,1,prob_connect[node_types[i],node_types[j]])
new.matrix[i,j] <- q
}
}
}
matrix.list[,,it] <- new.matrix
old.matrix <- new.matrix
}
kpd <- as.tensor(matrix.list)
# Perform Decompositions
R <- 2
init <- list(matrix(runif(R * num_nodes,0,1),nrow=num_nodes,ncol=R),
matrix(runif(R * num_nodes,0,1),nrow=num_nodes,ncol=R),
matrix(runif(R * num_mrs,0,1),nrow=num_mrs,ncol=R))
maxits <- 1000
tol <- 1e-5
decomp.cp <- cp(kpd, num_components = R, max_iter = maxits, tol = tol)
decomp.hals <- fastHALS(kpd, init, R, max_k = maxits, tol = tol)
decomp.alpha <- alphaNTF(kpd, init, R, alpha = 1.5, max_k = maxits, tol = tol)
# CP
B <- decomp.cp$U
norms <- decomp.cp
value <- decomp.cp$fnorm_resid
# HALS - Uncomment to plot
B <- decomp.hals$A
norms <- decomp.hals$objs
value <- decomp.hals$objs %>% last
# Alpha - Uncomment to plot
B <- decomp.alpha$A
norms <- decomp.alpha$objs
value <- decomp.alpha$objs %>% last
# Component Plot
node.frame <- rbind(data.frame(B[[1]],Node_Type=as.factor(node_types),u="Donor"),
data.frame(B[[2]],Node_Type=as.factor(node_types),u="Candidate"))
ggplot(data=node.frame, aes(x=X1,y=X2,color=Node_Type)) +
facet_wrap(~u) +
geom_point(size=4,alpha=0.5) +
theme_bw() +
xlab("Component 1") +
ylab("Component 2") +
ggtitle("Component Plot")
## Convergence Plot
norm.table <- data.frame(Iteration=1:length(norms),Objective=norms) %>%
filter(Iteration > 5)
ggplot(data=norm.table,aes(x=Iteration,y=Objective)) +
geom_path() +
theme_bw() +
ggtitle("Convergence Plot") +
geom_hline(yintercept=value,color="red",linetype="dashed") +
geom_label(aes(label=paste0(round(value,3)),x=10,y=value, color="red"),show.legend=FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment