mikelove/tsne_snapping.R

## tsne_snapping.R
n <- 50
m <- 40
m_inform <- 10
set.seed(1)
niter <- 200
intradist <- numeric(niter)
interdist <- numeric(niter)
mus <- seq(from=0, to=3, length=niter)
library(Rtsne)
cols <- rep(1:2, each=n/2)
for (i in seq_len(niter)) {
  mu <- mus[i]
  cat(i,"")
  x <- cbind(rbind(matrix(rnorm(n/2 * m_inform, -mu/2), ncol=m_inform),
                   matrix(rnorm(n/2 * m_inform, mu/2), ncol=m_inform)),
             matrix(rnorm(n * (m - m_inform)), nrow=n))

  # see comment below on raising perplexity to 16 for n=50 and 30 for n=100
  res <- Rtsne(x, perplexity=10)

  #plot(res$Y, col=cols, pch=20, xlab="", ylab="")
  mid1 <- colMeans(res$Y[cols==1,])
  mid2 <- colMeans(res$Y[cols==2,])
  intradist[i] <- mean(c(sqrt(colSums((t(res$Y[cols==1,]) - mid1)^2)),
                      sqrt(colSums((t(res$Y[cols==2,]) - mid2)^2))))
  interdist[i] <- sqrt(sum((mid1 - mid2)^2))
}

# make plot
dat <- data.frame(mu=sqrt(m_inform)*rep(mus,2),
                  dist=c(intradist, interdist),
                  type=rep(c("intra","inter"),each=niter))
library(ggplot2)
print(
  ggplot(dat, aes(x=mu,y=dist,col=type)) + geom_point() + geom_smooth() +
    xlab("distance between sub-population centers") +
    ylab("distance recovered by t-SNE") + ggtitle(paste(n,"points"))
)
	n <- 50
	m <- 40
	m_inform <- 10
	set.seed(1)
	niter <- 200
	intradist <- numeric(niter)
	interdist <- numeric(niter)
	mus <- seq(from=0, to=3, length=niter)
	library(Rtsne)
	cols <- rep(1:2, each=n/2)
	for (i in seq_len(niter)) {
	mu <- mus[i]
	cat(i,"")
	x <- cbind(rbind(matrix(rnorm(n/2 * m_inform, -mu/2), ncol=m_inform),
	matrix(rnorm(n/2 * m_inform, mu/2), ncol=m_inform)),
	matrix(rnorm(n * (m - m_inform)), nrow=n))

	# see comment below on raising perplexity to 16 for n=50 and 30 for n=100
	res <- Rtsne(x, perplexity=10)

	#plot(res$Y, col=cols, pch=20, xlab="", ylab="")
	mid1 <- colMeans(res$Y[cols==1,])
	mid2 <- colMeans(res$Y[cols==2,])
	intradist[i] <- mean(c(sqrt(colSums((t(res$Y[cols==1,]) - mid1)^2)),
	sqrt(colSums((t(res$Y[cols==2,]) - mid2)^2))))
	interdist[i] <- sqrt(sum((mid1 - mid2)^2))
	}

	# make plot
	dat <- data.frame(mu=sqrt(m_inform)*rep(mus,2),
	dist=c(intradist, interdist),
	type=rep(c("intra","inter"),each=niter))
	library(ggplot2)
	print(
	ggplot(dat, aes(x=mu,y=dist,col=type)) + geom_point() + geom_smooth() +
	xlab("distance between sub-population centers") +
	ylab("distance recovered by t-SNE") + ggtitle(paste(n,"points"))
	)