mathieubray/TensorKPD.R

## TensorKPD.R
library(dplyr)
library(rTensor)
library(igraph)
library(sna)
library(ggplot2)
library(tidyr)

# Alpha NTF. Algorithm 1. Flatz 2013

# Input: Non-negative N-way tensor Y and rank J
# Output: Component matrices A, objective values at each iteration
alphaNTF <- function(Y, A, J, alpha=2, tol=1e-5, max_k=10000){

  N <- Y@num_modes

  normalize.component <- function(Q){

    R <- t(rep(1,nrow(Q))) %*% Q %>%
      as.vector %>%
      diag %>%
      solve

    return(Q %*% R)
  }

  # Normalize to unit length
  A.l <- A %>% map(normalize.component)
  A[1:(N-1)] <- A.l[1:(N-1)]

  k <- 0
  diff <- 1
  prev.diff <- 1
  objs <- rep(0,max_k)

  while (k < max_k & diff > tol & prev.diff > tol){

    k <- k + 1

    Y.hat <- A

    for(n in 1:N){

      Y.hat.n <- Y.hat[[n]] %*% t(khatri_rao_list(Y.hat[-n],reverse=TRUE))

      A[[n]] <- A[[n]] * ((k_unfold(Y,n)@data / Y.hat.n)^alpha %*% khatri_rao_list(A.l[-n],reverse=TRUE))^(1/alpha)

      A.l[[n]] <- A[[n]] %>% normalize.component

      if (n != N){
        A[[n]] <- A.l[[n]]
      }

    }

    diff <- norm(k_unfold(Y,1)@data - A[[1]] %*% t(khatri_rao_list(A[-1],reverse=TRUE)),"F")

    objs[k] <- diff

    if (k >= 2){
      prev.diff <- abs(objs[k] - objs[k-1])
    }

  }

  return(list(A=A, objs=objs[1:k]))

}


# FAST HALS NTF. Algorithm 4. Flatz 2013

# Input: Non-negative N-way tensor Y and rank J
# Output: N component matrices A
fastHALS <- function(Y, A, J, tol=1e-5, max_k=10000){

  N <- Y@num_modes

  I <- array(rep(0,J^3),dim=c(J,J,J)) %>% as.tensor
  for (j in 1:J){
    I[j,j,j] <- 1
  }

  normalize.matrix.columns <- function(Q){
    return(apply(Q, 2, function(x){ x/sqrt(sum(x^2)) }))
  }

  #Normalize
  A.l <- A %>% map(normalize.matrix.columns)
  A[1:(N-1)] <- A.l[1:(N-1)]

  k <- 0
  diff <- 1
  prev.diff <- 1
  objs <- rep(0,max_k)

  T.1 <-  map(A, function(X) { return(t(X) %*% X) }) %>% hadamard_list

  while (k <= max_k & diff > tol & prev.diff > tol){

    k <- k+1

    gamma <- t(A[[N]]) %*% A[[N]] %>% diag

    for(n in 1:N){
      if (n == N){
        gamma <- rep(1,J)
      }

      T.2 <- k_unfold(Y,n)@data %*% khatri_rao_list(A[-n],reverse=TRUE)

      T.3 <- T.1 / (t(A[[n]]) %*% A[[n]])

      for(j in 1:J){

        b <- A[[n]][,j] * gamma[j] + T.2[,j] - A[[n]] %*% T.3[,j]

        b <- ifelse(b>0,b,0)

        if (n != N){
          b <- b %>% normalize.matrix.columns
        }

        A[[n]][,j] <- b
      }

      T.1 <- T.3 *(t(A[[n]]) %*% A[[n]])

    }

    Y.hat <- I %>% ttl(A,ms=c(1,2,3))

    diff <- fnorm(Y - Y.hat)

    objs[k] <- diff

    if (k >= 2){
      prev.diff <- abs(objs[k] - objs[k-1])
    }
  }

  return(list(A=A, objs=objs[1:k]))
}


# Generate Adjacency Tensor

set.seed(90707)

per_mr <- 4 # Number of Nodes Arriving between each Match Run
num_mrs <- 25 # Number of Match Runs
num_nodes <- per_mr * num_mrs # Number of Total Nodes

prob_type <- c(0.4,0.1,0.1,0.4)
prob_connect <- matrix(c(0.5,0.5,0.25,0.25,0.25,0.25,0.05,0.05,0.5,0.5,0.25,0.25,0.25,0.25,0.05,0.05),nrow=4)

node_types <- sample(1:length(prob_type),num_nodes,replace=T,prob=prob_type) # Randomly Assign Type to Each Node

crossmatch_prob <- c(0.9,0.75,0.75,0.5) # Probability of Connection Remaining After Evaluation


matrix.list <- array(0,dim=c(num_nodes,num_nodes,num_mrs)) # Initialize a Matrix List

old.matrix <- matrix(0,nrow=num_nodes,ncol=num_nodes) # Initialize First Matrix

for (it in 1:num_mrs){

  new.matrix <- old.matrix

  # Renege

  if (it > 1){
    for (i in 1:(per_mr*(it-1))){
      for (j in 1:(per_mr*(it-1))){
        if (new.matrix[i,j] == 1){

          q <- rbinom(1,1,crossmatch_prob[node_types[j]])

          new.matrix[i,j] <- q
        }
      }
    }
  }

  # New Connections

  for (i in 1:(per_mr*it)){
    for (j in 1:(per_mr*it)){
      if (i != j & !(i <= per_mr*(it-1) & j <= per_mr*(it-1))){
        q <- rbinom(1,1,prob_connect[node_types[i],node_types[j]])

        new.matrix[i,j] <- q
      }
    }
  }


  matrix.list[,,it] <- new.matrix

  old.matrix <- new.matrix

}


kpd <- as.tensor(matrix.list)


# Perform Decompositions

R <- 2

init <- list(matrix(runif(R * num_nodes,0,1),nrow=num_nodes,ncol=R),
                  matrix(runif(R * num_nodes,0,1),nrow=num_nodes,ncol=R),
                  matrix(runif(R * num_mrs,0,1),nrow=num_mrs,ncol=R))

maxits <- 1000
tol <- 1e-5

decomp.cp <- cp(kpd, num_components = R, max_iter = maxits, tol = tol)
decomp.hals <- fastHALS(kpd, init, R, max_k = maxits, tol = tol)
decomp.alpha <- alphaNTF(kpd, init, R, alpha = 1.5, max_k = maxits, tol = tol)


# CP
B <- decomp.cp$U
norms <- decomp.cp
value <- decomp.cp$fnorm_resid

# HALS - Uncomment to plot
B <- decomp.hals$A
norms <- decomp.hals$objs
value <- decomp.hals$objs %>% last

# Alpha - Uncomment to plot
B <- decomp.alpha$A
norms <- decomp.alpha$objs
value <- decomp.alpha$objs %>% last


# Component Plot

node.frame <- rbind(data.frame(B[[1]],Node_Type=as.factor(node_types),u="Donor"),
                    data.frame(B[[2]],Node_Type=as.factor(node_types),u="Candidate"))


ggplot(data=node.frame, aes(x=X1,y=X2,color=Node_Type)) +
  facet_wrap(~u) +
  geom_point(size=4,alpha=0.5) +
  theme_bw() +
  xlab("Component 1") +
  ylab("Component 2") +
  ggtitle("Component Plot")


## Convergence Plot

norm.table <- data.frame(Iteration=1:length(norms),Objective=norms) %>%
  filter(Iteration > 5)

ggplot(data=norm.table,aes(x=Iteration,y=Objective)) +
  geom_path() +
  theme_bw() +
  ggtitle("Convergence Plot") +
  geom_hline(yintercept=value,color="red",linetype="dashed") +
  geom_label(aes(label=paste0(round(value,3)),x=10,y=value, color="red"),show.legend=FALSE)
	library(dplyr)
	library(rTensor)
	library(igraph)
	library(sna)
	library(ggplot2)
	library(tidyr)

	# Alpha NTF. Algorithm 1. Flatz 2013

	# Input: Non-negative N-way tensor Y and rank J
	# Output: Component matrices A, objective values at each iteration
	alphaNTF <- function(Y, A, J, alpha=2, tol=1e-5, max_k=10000){

	N <- Y@num_modes

	normalize.component <- function(Q){

	R <- t(rep(1,nrow(Q))) %*% Q %>%
	as.vector %>%
	diag %>%
	solve

	return(Q %*% R)
	}

	# Normalize to unit length
	A.l <- A %>% map(normalize.component)
	A[1:(N-1)] <- A.l[1:(N-1)]

	k <- 0
	diff <- 1
	prev.diff <- 1
	objs <- rep(0,max_k)

	while (k < max_k & diff > tol & prev.diff > tol){

	k <- k + 1

	Y.hat <- A

	for(n in 1:N){

	Y.hat.n <- Y.hat[[n]] %*% t(khatri_rao_list(Y.hat[-n],reverse=TRUE))

	A[[n]] <- A[[n]] * ((k_unfold(Y,n)@data / Y.hat.n)^alpha %*% khatri_rao_list(A.l[-n],reverse=TRUE))^(1/alpha)

	A.l[[n]] <- A[[n]] %>% normalize.component

	if (n != N){
	A[[n]] <- A.l[[n]]
	}

	}

	diff <- norm(k_unfold(Y,1)@data - A[[1]] %*% t(khatri_rao_list(A[-1],reverse=TRUE)),"F")

	objs[k] <- diff

	if (k >= 2){
	prev.diff <- abs(objs[k] - objs[k-1])
	}

	}

	return(list(A=A, objs=objs[1:k]))

	}


	# FAST HALS NTF. Algorithm 4. Flatz 2013

	# Input: Non-negative N-way tensor Y and rank J
	# Output: N component matrices A
	fastHALS <- function(Y, A, J, tol=1e-5, max_k=10000){

	N <- Y@num_modes

	I <- array(rep(0,J^3),dim=c(J,J,J)) %>% as.tensor
	for (j in 1:J){
	I[j,j,j] <- 1
	}

	normalize.matrix.columns <- function(Q){
	return(apply(Q, 2, function(x){ x/sqrt(sum(x^2)) }))
	}

	#Normalize
	A.l <- A %>% map(normalize.matrix.columns)
	A[1:(N-1)] <- A.l[1:(N-1)]

	k <- 0
	diff <- 1
	prev.diff <- 1
	objs <- rep(0,max_k)

	T.1 <- map(A, function(X) { return(t(X) %*% X) }) %>% hadamard_list

	while (k <= max_k & diff > tol & prev.diff > tol){

	k <- k+1

	gamma <- t(A[[N]]) %*% A[[N]] %>% diag

	for(n in 1:N){
	if (n == N){
	gamma <- rep(1,J)
	}

	T.2 <- k_unfold(Y,n)@data %*% khatri_rao_list(A[-n],reverse=TRUE)

	T.3 <- T.1 / (t(A[[n]]) %*% A[[n]])

	for(j in 1:J){

	b <- A[[n]][,j] * gamma[j] + T.2[,j] - A[[n]] %*% T.3[,j]

	b <- ifelse(b>0,b,0)

	if (n != N){
	b <- b %>% normalize.matrix.columns
	}

	A[[n]][,j] <- b
	}

	T.1 <- T.3 (t(A[[n]]) %% A[[n]])

	}

	Y.hat <- I %>% ttl(A,ms=c(1,2,3))

	diff <- fnorm(Y - Y.hat)

	objs[k] <- diff

	if (k >= 2){
	prev.diff <- abs(objs[k] - objs[k-1])
	}
	}

	return(list(A=A, objs=objs[1:k]))
	}


	# Generate Adjacency Tensor

	set.seed(90707)

	per_mr <- 4 # Number of Nodes Arriving between each Match Run
	num_mrs <- 25 # Number of Match Runs
	num_nodes <- per_mr * num_mrs # Number of Total Nodes

	prob_type <- c(0.4,0.1,0.1,0.4)
	prob_connect <- matrix(c(0.5,0.5,0.25,0.25,0.25,0.25,0.05,0.05,0.5,0.5,0.25,0.25,0.25,0.25,0.05,0.05),nrow=4)

	node_types <- sample(1:length(prob_type),num_nodes,replace=T,prob=prob_type) # Randomly Assign Type to Each Node

	crossmatch_prob <- c(0.9,0.75,0.75,0.5) # Probability of Connection Remaining After Evaluation



	matrix.list <- array(0,dim=c(num_nodes,num_nodes,num_mrs)) # Initialize a Matrix List

	old.matrix <- matrix(0,nrow=num_nodes,ncol=num_nodes) # Initialize First Matrix

	for (it in 1:num_mrs){

	new.matrix <- old.matrix

	# Renege

	if (it > 1){
	for (i in 1:(per_mr*(it-1))){
	for (j in 1:(per_mr*(it-1))){
	if (new.matrix[i,j] == 1){

	q <- rbinom(1,1,crossmatch_prob[node_types[j]])

	new.matrix[i,j] <- q
	}
	}
	}
	}

	# New Connections

	for (i in 1:(per_mr*it)){
	for (j in 1:(per_mr*it)){
	if (i != j & !(i <= per_mr(it-1) & j <= per_mr(it-1))){
	q <- rbinom(1,1,prob_connect[node_types[i],node_types[j]])

	new.matrix[i,j] <- q
	}
	}
	}



	matrix.list[,,it] <- new.matrix

	old.matrix <- new.matrix

	}


	kpd <- as.tensor(matrix.list)





	# Perform Decompositions

	R <- 2

	init <- list(matrix(runif(R * num_nodes,0,1),nrow=num_nodes,ncol=R),
	matrix(runif(R * num_nodes,0,1),nrow=num_nodes,ncol=R),
	matrix(runif(R * num_mrs,0,1),nrow=num_mrs,ncol=R))

	maxits <- 1000
	tol <- 1e-5

	decomp.cp <- cp(kpd, num_components = R, max_iter = maxits, tol = tol)
	decomp.hals <- fastHALS(kpd, init, R, max_k = maxits, tol = tol)
	decomp.alpha <- alphaNTF(kpd, init, R, alpha = 1.5, max_k = maxits, tol = tol)


	# CP
	B <- decomp.cp$U
	norms <- decomp.cp
	value <- decomp.cp$fnorm_resid

	# HALS - Uncomment to plot
	B <- decomp.hals$A
	norms <- decomp.hals$objs
	value <- decomp.hals$objs %>% last

	# Alpha - Uncomment to plot
	B <- decomp.alpha$A
	norms <- decomp.alpha$objs
	value <- decomp.alpha$objs %>% last


	# Component Plot

	node.frame <- rbind(data.frame(B[[1]],Node_Type=as.factor(node_types),u="Donor"),
	data.frame(B[[2]],Node_Type=as.factor(node_types),u="Candidate"))


	ggplot(data=node.frame, aes(x=X1,y=X2,color=Node_Type)) +
	facet_wrap(~u) +
	geom_point(size=4,alpha=0.5) +
	theme_bw() +
	xlab("Component 1") +
	ylab("Component 2") +
	ggtitle("Component Plot")



	## Convergence Plot

	norm.table <- data.frame(Iteration=1:length(norms),Objective=norms) %>%
	filter(Iteration > 5)

	ggplot(data=norm.table,aes(x=Iteration,y=Objective)) +
	geom_path() +
	theme_bw() +
	ggtitle("Convergence Plot") +
	geom_hline(yintercept=value,color="red",linetype="dashed") +
	geom_label(aes(label=paste0(round(value,3)),x=10,y=value, color="red"),show.legend=FALSE)