davidckatz/impute.pls2B

## impute.pls2B
INSTRUCTIONS (see the file below for the R script)

This function imputes missing landmark data in one specimen based upon the locations of the other, recorded landmarks. The predicted relationship is estimated using two-block partial least squares analysis (PLS) on a set of complete case observations. The complete case data is divided into two blocks: one includes all landmarks that were recorded in the incomplete specimen, the other all landmarks that were not. PLS is used to identify axes that maximize covariance between the two blocks, and these relationships are used to predict missing landmarks in the incomplete case.

INPUTS
1.	n*(p*k) matrix of observations in which row 1 is the case with missing data and the remaining rows are complete cases
2.	Optionally, a vector of group identities (group.id) for all specimens, with length = nrow(X).

VALUES (returns)
1.	Vector of landmark coordinates rescaled by estimated centroid size.

NOTES
1.	As written, only accepts 3D landmark data.
2.	Support functions nested within impute.pls2B include a complete set of functions for Generalized Procrustes Analysis.
3.	If the group identity vector is supplied, a pooled within-group covariance matrix is computed. If not, the ordinary covariance matrix for the complete dataset is computed. Pooling may produce better estimates because it reduces the influence of among group deviances on the covariance matrix. The group.id option should only be used where at least one complete case is from the same group as the missing case.

STEP-THROUGH
1.	Generalized Procrustes Analysis on complete cases.
2.	If group.id is included, pooling.
3.	Compute initial estimate of missing coordinate data based on fitting the incomplete case to the complete case consensus configuration.
4. Iterate PLS estimation procedure until the difference between the current and previous estimated shapes differs by less than an a priori defined threshold.

## impute.pls2B.r
# See the file above for instructions.

impute.pls2B <- function(X, group.id=NA)
{
  ################################################
  # FUNCTIONS UTLIZED DURING IMPUTATION

  # Generalized Procrustes Analysis
  GPA <- function(d){
    # REMOVE LOCATION
    # array of centroid data
    cent.array <- array.of.centroids(d)
    # translate each specimen by the centroid of included landmarks
    trans.array <- d-cent.array

    # REMOVE SCLAE
    # vector of centroid sizes
    cent.size <- rep(NA, dim(trans.array)[1])
    for(i in 1:dim(trans.array)[1])
      {cent.size[i]<-c.size(trans.array[i,,], cent=centroid(trans.array[i,,]))}

    # scale to unit size
    s.array <- trans.array/cent.size

    # REMOVE ROTATION
    # initial fitting of all specimens to the first specimen in the array
    gpa.fitting <- gpa.iteration(scaled=s.array)
    r.array <- gpa.fitting$x
    r.mean <- gpa.fitting$mean.x
    scaled.mean <- gpa.fitting$scaled.mean

    list(r.array = r.array, r.mean=r.mean,
         cent.size = cent.size, scaled.mean=scaled.mean)
  }


  # SUPPORT FUNCTIONS

  # array of centroids (used to translate specimen array to origin)
  array.of.centroids <- function(d)
  {
    # matrix of centroids
    cent.matrix <- apply(d, 1, centroid)

    # make conformable arrays of centroids for each specimen
    cent.array <- array(NA, dim=c(dim(d)))

    # fill centroid array
    for(j in 1:dim(cent.array)[1]){
      cent.array[j,,] <- matrix(rep(cent.matrix[,j], dim(cent.array)[2]),
                               dim(cent.array)[2], dim(cent.array)[3], byrow=TRUE)
    }
    return(cent.array)
  }


  # centroid size of landmark configuration
  c.size <- function(d, cent=centroid(d))
  {
    centroid.mat <- matrix(cent, dim(d)[1], dim(d)[2], byrow=TRUE)
    summed.sq.dists <- sum(apply((d-centroid.mat)^2, 1, sum))
    cs <- sqrt(summed.sq.dists)
    return(cs)
  }


  # centroid of landmark configuation
  centroid <- function(centroid.of)
  {
    centroid.unity.vector <- matrix(1, nrow(centroid.of), 1)
    C <- t(1/nrow(centroid.of) * t(centroid.of) %*% centroid.unity.vector)
    return (C)
  }


  # iterative rotations for Procrustes fitting
  gpa.iteration <- function(scaled){ # requires centered, scaled n*p*k array of landmark data. k=3

   # FIRST ROTATION
   x <- array(NA, dim=c(dim(scaled)))

   # rotation of each specimen by its own rotation matrix
    for (n in 1:dim(scaled)[1]) {
      x.mean <- scaled[1,,]
      rm <- rotation.matrix(x.mean, scaled[n,,])$RM #rotation matrix for each specimen
      x[n,,] <- scaled[n,,] %*% rm # rotate all specimens to first
    }

   # ITERATIVE ROTATIONS
   delta.sos <- array.sos(x, x.mean) # provides criteria for when to stop iterating
   sos.old <- delta.sos

   while (delta.sos > .001)
   {
     # shrinkage prevention! makes the mean specimen of unit centroid size
     unscaled.mean <- apply(x, c(2, 3), mean)
     scaled.mean <- unscaled.mean/c.size(unscaled.mean)

     # rotations
     for(m in 1:dim(x)[1])
     {
       rm <- rotation.matrix(scaled.mean, x[m,,])$RM
       x[m,,] <- x[m,,] %*% rm
     }

     # data to evaluate the while loop criterion
     sos.new <- array.sos(x, scaled.mean)
     delta.sos <- sos.old-sos.new
     sos.old <- sos.new
   }

   # recompute mean a final time
   mean.x <- apply(x, c(2, 3), mean)

   list(x = x, mean.x = mean.x, scaled.mean=scaled.mean)
  }


  # sum of squares (proc dist) for an array of landmark configurations
  array.sos <- function(x, C)
  {
    imputed.lms <- imputed.lms

    #find the distance of each coordinate from the mean
    diff <- sweep(x, c(2,3), C, "-")

    #sum across the squares of each coordinate distance distance
    sos <- sum(diff^2, na.rm=TRUE)

    return(sos)
  }

  # translate a matrix (geometry rather than matrix multiplication
  # it's easier with geometry to handle rows with missing data)
  translate <- function(translate.what, translate.by=translate.what, imputed.lms=NA)
  {
    unity.vector <-
      matrix(1, nrow(translate.what), 1)

    #translation matrix
    trans.matrix <-
      unity.vector %*% centroid(translate.by)

    #tranlation by translation matrix
    translate <-
      translate.what - trans.matrix

    return(translate)
  }

  # rotation matrix to fit one shape to another
  rotation.matrix <- function(hold.this, rotate.this)
  {
    #decompose
    decomp <- svd(t(hold.this) %*% rotate.this)
    U <- decomp$u
    V <- decomp$v
    D <- decomp$d

    #if there is a 0 element to D, change it to a 1
    if(any(D==0)) {D[which(D==0)] <- 1}

    # preliminary rotation matrix
    pre.S <- matrix(0, length(D), length(D))
    diag(pre.S) <- sign(D)
    pre.RM <- V %*% pre.S %*% t(U)
    determ <- det(pre.RM)

    # test for reflection. if yes, adjust rotation matrix.
    if (determ >= 0) {S <- pre.S} else {
      S <- matrix(0, length(D), length(D))
        for (i in 1:length(D))
          {S[i,i] <- ifelse (D[i]==min(D), -(sign(D[i])), sign(D[i]))}
      }

    #Produce the appropriate rotation matrix
    RM <- V%*%S%*%t(U)

    list(RM=RM, determ=determ)
  }


  # ordinary Procrustes analysis (fit one specimen to a reference).
  # assumes reference (hold.this) is already centered and scaled.
  opa.one <- function(hold.this, rotate.this)
  {
    # translate and scaledspecimen to be rotated
    rotate.this <- translate(rotate.this)/c.size(rotate.this)

    #rotate matrix
    rm <- rotation.matrix(hold.this, rotate.this)$RM

    # rotated configuration
    opa <- rotate.this%*%rm

    list(opa=opa, cs=cs)
  }

  #converts n*p*k array to n*(p*k) matrix of proper setup
  lm.array.to.matrix <- function(lm.array)
  {
    lm.matrix <- aperm(lm.array, c(1,3,2))
    dim(lm.matrix) <-
      c(dim(lm.array)[1], dim(lm.array)[2] * dim(lm.array)[3])

    return(lm.matrix)
  }


  # centering data on group (pop, species, etc) means
  group.centering <- function(m, grp.vec)
  {
    groups <- unique(grp.vec)

    #empty matrix for group means
    mean.mat <- matrix(NA, nrow=length(groups), ncol=ncol(m))

    #loop to fill matrix of group means
    for(i in 1:length(groups))
    {
      grp.dat <- subset(m, grp.vec == groups[i])
      mean.mat[i,] <- apply(grp.dat, 2, mean)
    }

    #expanded matrix of mean with nrow = nrow(m)
    matches <- match(grp.vec, groups)
    matched.means <- mean.mat[matches,]

    #within group residuals
    group.centered <- m - matched.means

    list(group.centered = group.centered,
         matched.means = matched.means,
         groups = groups,
         mean.mat=mean.mat)
  }

  #########################################################################

  # DATA PREP: COMPLETE CASES

  # rearrange complete case matrix as a n*p*k 3D array
  d <- aperm(array(X[-1,], dim=c(nrow(X)-1, 3, ncol(complete)/3)), c(1,3,2))

  # GPA of complete cases
  cc.gpa <- GPA(d)
  cc.array <- cc.gpa$r.array
  cc.mat <- lm.array.to.matrix(cc.array)
  cc.mean <- cc.gpa$r.mean
  cc.cs <- cc.gpa$cent.size
  cc.scaledmu <- cc.gpa$scaled.mean

  # setting up residual covariance matrix, depending on whether
  # there is group id data.
  # Scenario 1: there is group data
  if(length(group.id)>1){
    cc.group.id <- group.id[-1]
    ctr <- group.centering(cc.mat,cc.group.id)
    resid <- ctr$group.centered
    mus <- ctr$mean.mat
    grps <- ctr$groups
    matched.means <- ctr$matched.means
    missing.mean <- matched.means[which(unique(cc.group.id)==group.id[1]),]} else {
  # Scenario 2: no group id information
    matched.means <- matrix(as.vector(t(cc.mean)),nrow(cc.mat),ncol(cc.mat), byrow=T)
    missing.mean <- matched.means[1,]
    resid <- cc.mat-matched.means
    mus <- matched.means[1,]
    }

#################################################################

  # DATA PREP: INCOMPLETE SPECIMEN

  # matrix for the incomplete case (and its centroid size)
  imp.case <- matrix(X[1,],ncol(X)/3,3,byrow=T)

  # identify missing landmarks
  imputed.lms <- which(is.na(imp.case[,1]))

  # estimate centroid size of missing case with all landmarks is centroid size
  # for recorded landmarks/% total CS attributable to those landmarks on mean shape
  mean.form <- (cc.mean/c.size(cc.mean))*mean(cc.cs)
  cs.ratio <- c.size(mean.form[-imputed.lms,])/c.size(mean.form)
  missing.cs <- c.size(imp.case[-imputed.lms,])/cs.ratio


##########################################################

  # FIRST IMPUTATION OF MISSING LANDMARK (SUB IN THE MEAN COORDINATE)

  # empty vector for the imputed outcome
  imputed <- rep(NA, ncol(X))

  # translate partial complete case so partial centroid is at origin
  part.mean <- translate(translate.what=cc.mean,translate.by=cc.mean[-imputed.lms,])

  # translate and scale the incomplete specimen so it is same size as cs(part.mean)
  imp.t <- translate(imp.case, imp.case[-imputed.lms,])
  imp.ts <- (imp.t/c.size(imp.case[-imputed.lms,]))*c.size(part.mean[-imputed.lms,])

  # useful index information for vector form of imputed specimen
  missing.coords <- which(is.na(as.vector(t(imp.ts))))

  # ordinary procrustes analysis
  imp.rm <- rotation.matrix(part.mean[-imputed.lms,],imp.ts[-imputed.lms,])$RM
  missing.fit <- imp.ts%*%imp.rm

  # swap in mean landmark coordinates for missing coordinates
  missing.fit[imputed.lms,]<-part.mean[imputed.lms,]

  # translate and scale to cs=1 to complete original config. for PLS loop
  one<-translate(missing.fit)/c.size(missing.fit)

  ######################################################

  # ITERATIVE PLS PREDICTION

  # initial values for while loop criterion
  proc.dist <- 99

  while(proc.dist > 0.0001)
  {
  #PLS

    #define blocks
    predictor.block <- resid[,-missing.coords]
    missing.block <- resid[,missing.coords]
    two.blocks <- cbind(predictor.block, missing.block)

    #covariance matrix and cross block covariance matrix
    R <- (t(two.blocks)%*%two.blocks)/(nrow(two.blocks)-1)
    R1 <- R[1:ncol(predictor.block),1:ncol(predictor.block)]
    R2 <- R[-(1:ncol(predictor.block)),-(1:ncol(predictor.block))]
    R12 <- R[1:nrow(R1), -(1:ncol(R1))]

    #singular value decomposition of cross block covariance matrix
    pls.svd <- svd(R12)
    S<-diag(pls.svd$d)
    U<-pls.svd$u
    V<-pls.svd$v

    #scores
    predictor.scores <- predictor.block%*%U

  # PLS PREDICTIVE EQUATION

    #linear model to predict missing scores from block of available scores
    pls.lm <- lm(missing.block~predictor.scores)$coefficients

  # IMPUTATION BASED ON pls.lm

    # score of imputed specimen (predictor block lms) on the singular value axes
    one <- opa.one(cc.scaledmu,one)$opa
    one.vec <- as.vector(t(one))
    # current imputed shape as matrix
    #one<-matrix(missing.vec, length(one)/3,3,byrow=T)

    # residuals and predictor scores for imputed shape
    imp.resids <- one.vec - missing.mean
    imp.predictor.scores <- imp.resids[-missing.coords]%*%U

    #Predict coordinates with regression (ALPHA + XB + ADD.BACK.MEAN)
    pred.coords <- pls.lm[1,] + imp.predictor.scores%*%pls.lm[-1,] + missing.mean[missing.coords]
    #fill in the imputation matrix row for the specimen.
    for(j in 1:length(missing.coords)){one.vec[missing.coords[j]]<-pred.coords[j]}

    # new imputed shape as matrix
    redone <- matrix(one.vec,nrow(one),ncol(one), byrow=T)
    redone <- translate(redone)/c.size(redone)

  # TEST CRITERION FOR ESTIMATING AGAIN

    one.redone <- opa.one(one, redone)$opa
    proc.dist <- sqrt(sum((one.redone-one)^2))
    one <- redone
    print(proc.dist)
  }
  imputed<-as.vector(t(one))*missing.cs
  return(imputed)
  }
	INSTRUCTIONS (see the file below for the R script)

	This function imputes missing landmark data in one specimen based upon the locations of the other, recorded landmarks. The predicted relationship is estimated using two-block partial least squares analysis (PLS) on a set of complete case observations. The complete case data is divided into two blocks: one includes all landmarks that were recorded in the incomplete specimen, the other all landmarks that were not. PLS is used to identify axes that maximize covariance between the two blocks, and these relationships are used to predict missing landmarks in the incomplete case.

	INPUTS
	1. n(pk) matrix of observations in which row 1 is the case with missing data and the remaining rows are complete cases
	2. Optionally, a vector of group identities (group.id) for all specimens, with length = nrow(X).

	VALUES (returns)
	1. Vector of landmark coordinates rescaled by estimated centroid size.

	NOTES
	1. As written, only accepts 3D landmark data.
	2. Support functions nested within impute.pls2B include a complete set of functions for Generalized Procrustes Analysis.
	3. If the group identity vector is supplied, a pooled within-group covariance matrix is computed. If not, the ordinary covariance matrix for the complete dataset is computed. Pooling may produce better estimates because it reduces the influence of among group deviances on the covariance matrix. The group.id option should only be used where at least one complete case is from the same group as the missing case.

	STEP-THROUGH
	1. Generalized Procrustes Analysis on complete cases.
	2. If group.id is included, pooling.
	3. Compute initial estimate of missing coordinate data based on fitting the incomplete case to the complete case consensus configuration.
	4. Iterate PLS estimation procedure until the difference between the current and previous estimated shapes differs by less than an a priori defined threshold.
	# See the file above for instructions.

	impute.pls2B <- function(X, group.id=NA)
	{
	################################################
	# FUNCTIONS UTLIZED DURING IMPUTATION

	# Generalized Procrustes Analysis
	GPA <- function(d){
	# REMOVE LOCATION
	# array of centroid data
	cent.array <- array.of.centroids(d)
	# translate each specimen by the centroid of included landmarks
	trans.array <- d-cent.array

	# REMOVE SCLAE
	# vector of centroid sizes
	cent.size <- rep(NA, dim(trans.array)[1])
	for(i in 1:dim(trans.array)[1])
	{cent.size[i]<-c.size(trans.array[i,,], cent=centroid(trans.array[i,,]))}

	# scale to unit size
	s.array <- trans.array/cent.size

	# REMOVE ROTATION
	# initial fitting of all specimens to the first specimen in the array
	gpa.fitting <- gpa.iteration(scaled=s.array)
	r.array <- gpa.fitting$x
	r.mean <- gpa.fitting$mean.x
	scaled.mean <- gpa.fitting$scaled.mean

	list(r.array = r.array, r.mean=r.mean,
	cent.size = cent.size, scaled.mean=scaled.mean)
	}


	# SUPPORT FUNCTIONS

	# array of centroids (used to translate specimen array to origin)
	array.of.centroids <- function(d)
	{
	# matrix of centroids
	cent.matrix <- apply(d, 1, centroid)

	# make conformable arrays of centroids for each specimen
	cent.array <- array(NA, dim=c(dim(d)))

	# fill centroid array
	for(j in 1:dim(cent.array)[1]){
	cent.array[j,,] <- matrix(rep(cent.matrix[,j], dim(cent.array)[2]),
	dim(cent.array)[2], dim(cent.array)[3], byrow=TRUE)
	}
	return(cent.array)
	}


	# centroid size of landmark configuration
	c.size <- function(d, cent=centroid(d))
	{
	centroid.mat <- matrix(cent, dim(d)[1], dim(d)[2], byrow=TRUE)
	summed.sq.dists <- sum(apply((d-centroid.mat)^2, 1, sum))
	cs <- sqrt(summed.sq.dists)
	return(cs)
	}


	# centroid of landmark configuation
	centroid <- function(centroid.of)
	{
	centroid.unity.vector <- matrix(1, nrow(centroid.of), 1)
	C <- t(1/nrow(centroid.of) * t(centroid.of) %*% centroid.unity.vector)
	return (C)
	}


	# iterative rotations for Procrustes fitting
	gpa.iteration <- function(scaled){ # requires centered, scaled npk array of landmark data. k=3

	# FIRST ROTATION
	x <- array(NA, dim=c(dim(scaled)))

	# rotation of each specimen by its own rotation matrix
	for (n in 1:dim(scaled)[1]) {
	x.mean <- scaled[1,,]
	rm <- rotation.matrix(x.mean, scaled[n,,])$RM #rotation matrix for each specimen
	x[n,,] <- scaled[n,,] %*% rm # rotate all specimens to first
	}

	# ITERATIVE ROTATIONS
	delta.sos <- array.sos(x, x.mean) # provides criteria for when to stop iterating
	sos.old <- delta.sos

	while (delta.sos > .001)
	{
	# shrinkage prevention! makes the mean specimen of unit centroid size
	unscaled.mean <- apply(x, c(2, 3), mean)
	scaled.mean <- unscaled.mean/c.size(unscaled.mean)

	# rotations
	for(m in 1:dim(x)[1])
	{
	rm <- rotation.matrix(scaled.mean, x[m,,])$RM
	x[m,,] <- x[m,,] %*% rm
	}

	# data to evaluate the while loop criterion
	sos.new <- array.sos(x, scaled.mean)
	delta.sos <- sos.old-sos.new
	sos.old <- sos.new
	}

	# recompute mean a final time
	mean.x <- apply(x, c(2, 3), mean)

	list(x = x, mean.x = mean.x, scaled.mean=scaled.mean)
	}


	# sum of squares (proc dist) for an array of landmark configurations
	array.sos <- function(x, C)
	{
	imputed.lms <- imputed.lms

	#find the distance of each coordinate from the mean
	diff <- sweep(x, c(2,3), C, "-")

	#sum across the squares of each coordinate distance distance
	sos <- sum(diff^2, na.rm=TRUE)

	return(sos)
	}

	# translate a matrix (geometry rather than matrix multiplication
	# it's easier with geometry to handle rows with missing data)
	translate <- function(translate.what, translate.by=translate.what, imputed.lms=NA)
	{
	unity.vector <-
	matrix(1, nrow(translate.what), 1)

	#translation matrix
	trans.matrix <-
	unity.vector %*% centroid(translate.by)

	#tranlation by translation matrix
	translate <-
	translate.what - trans.matrix

	return(translate)
	}

	# rotation matrix to fit one shape to another
	rotation.matrix <- function(hold.this, rotate.this)
	{
	#decompose
	decomp <- svd(t(hold.this) %*% rotate.this)
	U <- decomp$u
	V <- decomp$v
	D <- decomp$d

	#if there is a 0 element to D, change it to a 1
	if(any(D==0)) {D[which(D==0)] <- 1}

	# preliminary rotation matrix
	pre.S <- matrix(0, length(D), length(D))
	diag(pre.S) <- sign(D)
	pre.RM <- V %% pre.S %% t(U)
	determ <- det(pre.RM)

	# test for reflection. if yes, adjust rotation matrix.
	if (determ >= 0) {S <- pre.S} else {
	S <- matrix(0, length(D), length(D))
	for (i in 1:length(D))
	{S[i,i] <- ifelse (D[i]==min(D), -(sign(D[i])), sign(D[i]))}
	}

	#Produce the appropriate rotation matrix
	RM <- V%%S%%t(U)

	list(RM=RM, determ=determ)
	}


	# ordinary Procrustes analysis (fit one specimen to a reference).
	# assumes reference (hold.this) is already centered and scaled.
	opa.one <- function(hold.this, rotate.this)
	{
	# translate and scaledspecimen to be rotated
	rotate.this <- translate(rotate.this)/c.size(rotate.this)

	#rotate matrix
	rm <- rotation.matrix(hold.this, rotate.this)$RM

	# rotated configuration
	opa <- rotate.this%*%rm

	list(opa=opa, cs=cs)
	}

	#converts npk array to n(pk) matrix of proper setup
	lm.array.to.matrix <- function(lm.array)
	{
	lm.matrix <- aperm(lm.array, c(1,3,2))
	dim(lm.matrix) <-
	c(dim(lm.array)[1], dim(lm.array)[2] * dim(lm.array)[3])

	return(lm.matrix)
	}


	# centering data on group (pop, species, etc) means
	group.centering <- function(m, grp.vec)
	{
	groups <- unique(grp.vec)

	#empty matrix for group means
	mean.mat <- matrix(NA, nrow=length(groups), ncol=ncol(m))

	#loop to fill matrix of group means
	for(i in 1:length(groups))
	{
	grp.dat <- subset(m, grp.vec == groups[i])
	mean.mat[i,] <- apply(grp.dat, 2, mean)
	}

	#expanded matrix of mean with nrow = nrow(m)
	matches <- match(grp.vec, groups)
	matched.means <- mean.mat[matches,]

	#within group residuals
	group.centered <- m - matched.means

	list(group.centered = group.centered,
	matched.means = matched.means,
	groups = groups,
	mean.mat=mean.mat)
	}

	#########################################################################

	# DATA PREP: COMPLETE CASES

	# rearrange complete case matrix as a npk 3D array
	d <- aperm(array(X[-1,], dim=c(nrow(X)-1, 3, ncol(complete)/3)), c(1,3,2))

	# GPA of complete cases
	cc.gpa <- GPA(d)
	cc.array <- cc.gpa$r.array
	cc.mat <- lm.array.to.matrix(cc.array)
	cc.mean <- cc.gpa$r.mean
	cc.cs <- cc.gpa$cent.size
	cc.scaledmu <- cc.gpa$scaled.mean

	# setting up residual covariance matrix, depending on whether
	# there is group id data.
	# Scenario 1: there is group data
	if(length(group.id)>1){
	cc.group.id <- group.id[-1]
	ctr <- group.centering(cc.mat,cc.group.id)
	resid <- ctr$group.centered
	mus <- ctr$mean.mat
	grps <- ctr$groups
	matched.means <- ctr$matched.means
	missing.mean <- matched.means[which(unique(cc.group.id)==group.id[1]),]} else {
	# Scenario 2: no group id information
	matched.means <- matrix(as.vector(t(cc.mean)),nrow(cc.mat),ncol(cc.mat), byrow=T)
	missing.mean <- matched.means[1,]
	resid <- cc.mat-matched.means
	mus <- matched.means[1,]
	}

	#################################################################

	# DATA PREP: INCOMPLETE SPECIMEN

	# matrix for the incomplete case (and its centroid size)
	imp.case <- matrix(X[1,],ncol(X)/3,3,byrow=T)

	# identify missing landmarks
	imputed.lms <- which(is.na(imp.case[,1]))

	# estimate centroid size of missing case with all landmarks is centroid size
	# for recorded landmarks/% total CS attributable to those landmarks on mean shape
	mean.form <- (cc.mean/c.size(cc.mean))*mean(cc.cs)
	cs.ratio <- c.size(mean.form[-imputed.lms,])/c.size(mean.form)
	missing.cs <- c.size(imp.case[-imputed.lms,])/cs.ratio


	##########################################################

	# FIRST IMPUTATION OF MISSING LANDMARK (SUB IN THE MEAN COORDINATE)

	# empty vector for the imputed outcome
	imputed <- rep(NA, ncol(X))

	# translate partial complete case so partial centroid is at origin
	part.mean <- translate(translate.what=cc.mean,translate.by=cc.mean[-imputed.lms,])

	# translate and scale the incomplete specimen so it is same size as cs(part.mean)
	imp.t <- translate(imp.case, imp.case[-imputed.lms,])
	imp.ts <- (imp.t/c.size(imp.case[-imputed.lms,]))*c.size(part.mean[-imputed.lms,])

	# useful index information for vector form of imputed specimen
	missing.coords <- which(is.na(as.vector(t(imp.ts))))

	# ordinary procrustes analysis
	imp.rm <- rotation.matrix(part.mean[-imputed.lms,],imp.ts[-imputed.lms,])$RM
	missing.fit <- imp.ts%*%imp.rm

	# swap in mean landmark coordinates for missing coordinates
	missing.fit[imputed.lms,]<-part.mean[imputed.lms,]

	# translate and scale to cs=1 to complete original config. for PLS loop
	one<-translate(missing.fit)/c.size(missing.fit)

	######################################################

	# ITERATIVE PLS PREDICTION

	# initial values for while loop criterion
	proc.dist <- 99

	while(proc.dist > 0.0001)
	{
	#PLS

	#define blocks
	predictor.block <- resid[,-missing.coords]
	missing.block <- resid[,missing.coords]
	two.blocks <- cbind(predictor.block, missing.block)

	#covariance matrix and cross block covariance matrix
	R <- (t(two.blocks)%*%two.blocks)/(nrow(two.blocks)-1)
	R1 <- R[1:ncol(predictor.block),1:ncol(predictor.block)]
	R2 <- R[-(1:ncol(predictor.block)),-(1:ncol(predictor.block))]
	R12 <- R[1:nrow(R1), -(1:ncol(R1))]

	#singular value decomposition of cross block covariance matrix
	pls.svd <- svd(R12)
	S<-diag(pls.svd$d)
	U<-pls.svd$u
	V<-pls.svd$v

	#scores
	predictor.scores <- predictor.block%*%U

	# PLS PREDICTIVE EQUATION

	#linear model to predict missing scores from block of available scores
	pls.lm <- lm(missing.block~predictor.scores)$coefficients

	# IMPUTATION BASED ON pls.lm

	# score of imputed specimen (predictor block lms) on the singular value axes
	one <- opa.one(cc.scaledmu,one)$opa
	one.vec <- as.vector(t(one))
	# current imputed shape as matrix
	#one<-matrix(missing.vec, length(one)/3,3,byrow=T)

	# residuals and predictor scores for imputed shape
	imp.resids <- one.vec - missing.mean
	imp.predictor.scores <- imp.resids[-missing.coords]%*%U

	#Predict coordinates with regression (ALPHA + XB + ADD.BACK.MEAN)
	pred.coords <- pls.lm[1,] + imp.predictor.scores%*%pls.lm[-1,] + missing.mean[missing.coords]
	#fill in the imputation matrix row for the specimen.
	for(j in 1:length(missing.coords)){one.vec[missing.coords[j]]<-pred.coords[j]}

	# new imputed shape as matrix
	redone <- matrix(one.vec,nrow(one),ncol(one), byrow=T)
	redone <- translate(redone)/c.size(redone)

	# TEST CRITERION FOR ESTIMATING AGAIN

	one.redone <- opa.one(one, redone)$opa
	proc.dist <- sqrt(sum((one.redone-one)^2))
	one <- redone
	print(proc.dist)
	}
	imputed<-as.vector(t(one))*missing.cs
	return(imputed)
	}