joaovissoci/taxometric_function.R

## taxometric_function.R

###########################################################################################################
#
# MAXEIG -- performs MAXEIG, but can also be used to perform MAXCOV.
#
# Written by John Ruscio
# Last modified January 9, 2012
#
###########################################################################################################
#
# Preliminary program to read and submit research data to analysis and then, if requested, perform analyses
# of comparison data to test fit of averaged curves.
#
###########################################################################################################

MAXEIG <- function(Data.Set, Comp.Data = T, N.Pop = 100000, N.Samples = 100, Supplied.Class = F,
                   Supplied.P = 0, Ind.Triplets = F, Ind.Comp = F, Intervals = F, N.Int = 15,
                   Windows = 25, Overlap = .90, Calc.Cov = F, St.Ind = F, Replications = 0,
                   Gr.Orient = 1, Gr.Rows = 3, Gr.Cols = 2, Gr.Smooth = 0, Gr.Common.Y = F, Gr.Ref = 2,
                   Gr.Avg = F, Gr.Ind = F, Classify = 1, Gr.Bayes = F, Save.Class = 0, File.Output = F,
                   File.Name = "Output.txt", Seed = 1)
{
   Data.Set <- Remove.Missing(Data.Set)  # remove any missing data (listwise deletion)
	N <- dim(Data.Set)[1]  # determine sample size
	I <- dim(Data.Set)[2]  # determine number of indicators; adjust if one column contains class assignments
	if (Supplied.Class) I <- I - 1  # subtract to allow for class assignment data, if provided
	if ((version$major == 1) & (version$minor < 9)) library(eda)
		else library(stats)  # load R code for running medians smoother
   set.seed(Seed)  # set random number seed to allow exact replications of results

	# automatically fix some potential parameter inconsistencies or omissions
	if (Comp.Data)  Gr.Avg <- T  # show the averaged curve that will be compared to those of comparison data
   if ((Comp.Data) & (N.Samples == 1)) N.Samples <- 100  # do not allow a single sample of comparison data
	if (Ind.Comp) Ind.Triplets <- T  # for this input method, triplets must be used
	if (Ind.Comp & (I < 4)) Ind.Comp <- F  # cannot form composites using individual variables
	if (Calc.Cov) Ind.Triplets <- T  # covariances can only be calculated using two outputs
	if (Ind.Comp) Classify <- 1  # cannot use Bayes' Theorem with composite input indicators
	if (Ind.Comp | !Ind.Triplets) Gr.Ind <- F  # cannot average curves for each indicator
	if ((Classify == 1) & (Save.Class == 2)) Save.Class <- 1  # cannot return Bayesian prob if not generated
	if (Gr.Avg) St.Ind <- T  # always standardize indicators if graphs will be averaged
	if (St.Ind) Data.Set[,1:I] <- apply(Data.Set[,1:I], 2, scale)  # standardize data if requested

   # check for tied scores and set number of replications
   Ties <- F
   for (i in 1:I)
      if (length(unique(Data.Set[,I])) < N) Ties <- T
   if ((Replications == 0) & (Ties)) Replications <- 10  # default is 10 replications if any tied scores
   if (Replications == 0) Replications <- 1
   if ((Replications > 1) & (!Ties)) Replications <- 1  # eliminate unnecessary replications

	# if output to file is requested, redirect all text output
	if (File.Output) sink(File.Name)

	# label supplied data and submit to main program for analysis; class assignments will be returned
	Research.Results <- MAXEIG.Main(Data.Set, Research.Data = T, Comp.Data, N.Pop, N.Samples,
                                 Supplied.P, Supplied.Class, N, I, Ind.Triplets, Ind.Comp, Intervals,
                                 N.Int, Windows, Overlap, Calc.Cov, St.Ind, Replications,
                                 Gr.Orient, Gr.Rows, Gr.Cols, Gr.Smooth,Gr.Common.Y, Gr.Ref, Gr.Avg,
                                 Gr.Ind, Classify, Save.Class, Gr.Bayes, File.Output, File.Name)

	if (Comp.Data)
	{
		P.Est.M <- Research.Results[[1]]
		P.Est.SD <- Research.Results[[2]]
		Class.Assign <- Research.Results[[3]]
		MAXEIG.Results <- Research.Results[[4]]
	}
	else
	{
		P.Est.M <- Research.Results[[1]]
		Class.Assign <- Research.Results[[2]]
	}

	# if comparison data requested, then generate and submit samples for analysis
	if (Comp.Data)
	{
		# create matrices to store results for each comparison data set
		if (Intervals) N.Subsamples <- N.Int
			else N.Subsamples <- Windows
		Tax.Y.Values <- matrix(nrow = N.Samples, ncol = N.Subsamples)
		Dim.Y.Values <- matrix(nrow = N.Samples, ncol = N.Subsamples)
		if (Ind.Comp) N.Curves <- (I * (I - 1)) / 2
			else if (Ind.Triplets) N.Curves <- (I * (I - 1) * (I - 2)) / 2
				else N.Curves <- I
		Tax.P.Ests <- matrix(nrow = N.Samples, ncol = N.Curves)
		Dim.P.Ests <- matrix(nrow = N.Samples, ncol = N.Curves)

		# generate and analyze categorical data
      set.seed(Seed)
		if (!Supplied.Class)  # if class assignments were not provided, generate using base rate
		{
			Data.Set <- cbind(Data.Set, rep(0, N))
			if (Supplied.P == 0) P <- P.Est.M  # use estimated P in research data if no P was provided
				else P <- Supplied.P
         if ((P.Est.M * N) < 20) P.Est.M <- 20 / N
         if ((N - (P.Est.M * N)) < 20) P.Est.M <- N - (20 / N)
			for (i in 1:I)
				Data.Set[, (I + 1)] <- Data.Set[, (I + 1)] + Data.Set[,i]
			Data.Set <- Data.Set[sort.list(Data.Set[,(I + 1)]),]
			P.Est.Series <- c(rep(2, round(N * P)))
			Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
			Data.Set[,(I + 1)] <- c(Q.Est.Series, P.Est.Series)
		}
      Tax.Data <- Data.Set[(Data.Set[,(I + 1)] == 2), 1:I]
      Com.Data <- Data.Set[(Data.Set[,(I + 1)] == 1), 1:I]
      P.Data <- dim(Tax.Data)[1] / (dim(Tax.Data)[1] + dim(Com.Data)[1])
      cat("\nGenerating and analyzing comparison data\n")
      Sim.Tax <- GenData(Tax.Data, round(N.Pop * P.Data))
      Sim.Com <- GenData(Com.Data, (N.Pop - round(N.Pop * P.Data)))
      Sim.Pop <- rbind(Sim.Tax, Sim.Com)
      Res.Corr <- cor(Data.Set[,1:I])[lower.tri(diag(I))] - cor(Sim.Pop)[lower.tri(diag(I))]
      RMSR <- sqrt(sum(Res.Corr * Res.Corr) / (.5 * (I * I - I)))
      cat("   Categorical population generated, RMSR r =",round(RMSR,3),"\n")
		for (Sims in 1:N.Samples)
		{
         Sim.Sample <- Sim.Pop[sample(1:N.Pop, N, replace = T),]
			MAXEIG.Tax.Results <- MAXEIG.Main(Sim.Sample, Research.Data = F, Comp.Data, N.Pop, N.Samples,
                                          Supplied.P, Supplied.Class, N, I, Ind.Triplets, Ind.Comp,
                                          Intervals, N.Int, Windows, Overlap, Calc.Cov,
                                          St.Ind, Replications, Gr.Orient, Gr.Rows, Gr.Cols, Gr.Smooth,
                                          Gr.Common.Y, Gr.Ref, Gr.Avg, Gr.Ind, Classify, Save.Class,
                                          Gr.Bayes, File.Output, File.Name)
			Tax.Y.Values[Sims,] <- MAXEIG.Tax.Results[[2]]
			Tax.P.Ests[Sims,] <- MAXEIG.Tax.Results[[3]]
		}
      cat("      Analysis of",N.Samples,"samples of categorical comparison data completed\n")

		# generate and analyze dimensional samples
      set.seed(Seed)
      Sim.Pop <- GenData(Data.Set[, 1:I], N.Pop)
      Res.Corr <- cor(Data.Set[,1:I])[lower.tri(diag(I))] - cor(Sim.Pop)[lower.tri(diag(I))]
      RMSR <- sqrt(sum(Res.Corr * Res.Corr) / (.5 * (I * I - I)))
      cat("\n   Dimensional population generated, RMSR r =",round(RMSR,3),"\n")
		for (Sims in 1:N.Samples)
		{
			Sim.Sample <- Sim.Pop[sample(1:N.Pop, N, replace = T),]
			MAXEIG.Dim.Results <- MAXEIG.Main(Sim.Sample, Research.Data = F, Comp.Data, N.Pop, N.Samples,
                                          Supplied.P, Supplied.Class, N, I, Ind.Triplets, Ind.Comp,
                                          Intervals, N.Int, Windows, Overlap, Calc.Cov,
                                          St.Ind, Replications, Gr.Orient, Gr.Rows, Gr.Cols, Gr.Smooth,
                                          Gr.Common.Y, Gr.Ref, Gr.Avg, Gr.Ind, Classify, Save.Class,
                                          Gr.Bayes, File.Output, File.Name)
			Dim.Y.Values[Sims,] <- MAXEIG.Dim.Results[[2]]
			Dim.P.Ests[Sims,] <- MAXEIG.Dim.Results[[3]]
		}
      cat("      Analysis of",N.Samples,"samples of dimensional comparison data completed\n")

		# summarize base rate estimates for comparison data
		P.Est.Summary <- matrix(nrow = (N.Samples + 2), ncol = 4)
		for (Sims in 1:N.Samples)
		{
			P.Est.Summary[Sims,1] <- mean(Tax.P.Ests[Sims,], na.rm = T)
			P.Est.Summary[Sims,2] <- sd(Tax.P.Ests[Sims,], na.rm = T)
			P.Est.Summary[Sims,3] <- mean(Dim.P.Ests[Sims,], na.rm = T)
			P.Est.Summary[Sims,4] <- sd(Dim.P.Ests[Sims,], na.rm = T)
		}
		for (i in 1:4)
		{
			P.Est.Summary[Sims + 1,i] <- mean(P.Est.Summary[1:Sims,i])
			P.Est.Summary[Sims + 2,i] <- sd(P.Est.Summary[1:Sims,i])
		}
		Row.Labels <- c(1:Sims,paste("M for",N.Samples,"samples"),paste("SD for",N.Samples,"samples"))
		Col.Labels <- c("Categorical M","SD","Dimensional M","SD")
		dimnames(P.Est.Summary) <- list(Row.Labels,Col.Labels)
		cat("\n\nBase rate estimates for comparison data:\n\n")
		if (N.Samples == 1)
		{
			cat("  Categorical sample:  M =",round(P.Est.Summary[1,1],4),", SD =",
				round(P.Est.Summary[1,2],4),"\n")
			cat("  Dimensional sample:  M =",round(P.Est.Summary[1,3],4),", SD =",
				round(P.Est.Summary[1,4],4),"\n")
		}
		else print(round(P.Est.Summary[((N.Samples+1):(N.Samples+2)),],4))

		# plot sampling distributions of Ms/SDs of base rate estimates
		if (N.Samples > 1)
		{
			dev.new(width=7, height=3.5)
			par(mfrow=c(1,2), omi=c(.25,.25,.25,.25), cex=.75)

			Y.M.Max <- max(c(density(P.Est.Summary[1:Sims,1])$y,density(P.Est.Summary[1:Sims,3])$y)) * 1.25
			Y.SD.Max <- max(c(density(P.Est.Summary[1:Sims,2])$y,density(P.Est.Summary[1:Sims,4])$y)) * 1.25

			plot(density(P.Est.Summary[1:Sims,1], adjust = .75), xlim = c(0,1), ylim = c(0, Y.M.Max),
				xlab = "Ms of Base Rate Estimates", ylab = "", main = "", lwd = 2, axes = F)
			axis(1)
			lines(density(P.Est.Summary[1:Sims,3], adjust = .75), lwd = 2, lty = 2)
			abline(v = P.Est.M, lwd = 2, lty = 3)

			plot(density(P.Est.Summary[1:Sims,2], adjust = .75), xlim = c(0,.5), ylim = c(0, Y.SD.Max),
				xlab = "SDs of Base Rate Estimates", ylab = "", main = "", lwd = 2, axes = F)
			axis(1)
			lines(density(P.Est.Summary[1:Sims,4], adjust = .75), lwd = 2, lty = 2)
			abline(v = P.Est.SD, lwd = 2, lty = 3)

			mtext("BASE RATE ESTIMATES FOR COMPARISON DATA",outer=T)
		}

		# plot averaged curves for comparison data
		X.Values <- MAXEIG.Dim.Results[[1]]
		X.Range <- c(min(X.Values), max(X.Values))

		Tax.Y <- vector("numeric", length(X.Values))
		for (i in 1:length(Tax.Y))
			Tax.Y[i] <- mean(Tax.Y.Values[,i],na.rm=T)

		Dim.Y <- vector("numeric", length(X.Values))
		for (i in 1:length(Dim.Y))
			Dim.Y[i] <- mean(Dim.Y.Values[,i],na.rm=T)

		Y.Values <- c(MAXEIG.Results,Tax.Y.Values,Dim.Y.Values)
		Y.Range <- c(min(0, min(Y.Values)), max(Y.Values) * 1.10)

		# label the x axis in accord with input method and indicator
		if (Intervals) X.Label <- paste(N.Int,"Intervals")
			else X.Label <- paste(Windows,"Windows")
		if (Ind.Comp)
			if (Intervals) X.Label <- paste(N.Int,"Intervals")
				else X.Label <- paste(Windows,"Windows")

		# label the y axis in accord with calculation type and output indicators
		if (Calc.Cov) Y.Label <- "Covariance"
			else Y.Label <- "Eigenvalue"

		# 2-panel plot for comparison data
      Tax.Sum <- Dim.Sum <- matrix(0, nrow = 6, ncol = length(X.Values))
		for (i in 1:length(X.Values))
		{
         Tax.Sum[,i] <- summary(Tax.Y.Values[,i])
         Dim.Sum[,i] <- summary(Dim.Y.Values[,i])
		}
		dev.new(width=7, height=3.5)
		par(mfrow=c(1,2), omi=c(.25,.25,.25,.25), cex=.75)

		plot(X.Values, MAXEIG.Results, type = "o", pch = 16, lwd = 2, ylim = Y.Range,
		     xlim = X.Range, ylab = Y.Label, xlab = X.Label, main = "Categorical Comparison Data")
      for (i in 1:(length(X.Values)-1))
         polygon(x = c(X.Values[i], X.Values[i], X.Values[i+1], X.Values[i+1]),
            y = c(Tax.Sum[2,i], Tax.Sum[5,i], Tax.Sum[5,i+1], Tax.Sum[2,i+1]), col = 8, lty = 0)
		lines(X.Values, Tax.Sum[1,])
		lines(X.Values, Tax.Sum[6,])
      points(X.Values, MAXEIG.Results, type = "o", pch = 16)
      lines(X.Values, MAXEIG.Results, lwd = 2)
		if ((Gr.Ref == 1) | ((Gr.Ref == 2) & (Y.Range[1] < 0))) abline(h = 0)

		plot(X.Values, MAXEIG.Results, type = "o", pch = 16, lwd = 2, ylim = Y.Range,
		     xlim = X.Range, ylab = Y.Label, xlab = X.Label, main = "Dimensional Comparison Data")
      for (i in 1:(length(X.Values)-1))
         polygon(x = c(X.Values[i], X.Values[i], X.Values[i+1], X.Values[i+1]),
            y = c(Dim.Sum[2,i], Dim.Sum[5,i], Dim.Sum[5,i+1], Dim.Sum[2,i+1]), col = 8, lty = 0)
		lines(X.Values, Dim.Sum[1,])
		lines(X.Values, Dim.Sum[6,])
      points(X.Values, MAXEIG.Results, type = "o", pch = 16)
      lines(X.Values, MAXEIG.Results, lwd = 2)
		if ((Gr.Ref == 1) | ((Gr.Ref == 2) & (Y.Range[1] < 0))) abline(h = 0)

		# calculate and print comparison curve fit index (CCFI)
		Fit.RMSR.t <- sqrt(sum((Tax.Y - MAXEIG.Results)^2)/N.Subsamples)
		Fit.RMSR.d <- sqrt(sum((Dim.Y - MAXEIG.Results)^2)/N.Subsamples)
		CCFI <- Fit.RMSR.d / (Fit.RMSR.d + Fit.RMSR.t)
		cat("\n\nComparison curve fit index (CCFI) =",round(Fit.RMSR.d,4),"/ (",round(Fit.RMSR.d,4),
			"+",round(Fit.RMSR.t,4),") =",round(CCFI,3),"\n")
		cat("     Note:  CCFI values can range from 0 (dimensional) to 1 (categorical).  The more a CCFI\n")
      cat("            value deviates from .50, the stronger the result.  When .40 < CCFI < .60,\n")
      cat("            this should be interpreted with caution.\n\n")
	}
	if (File.Output) sink()
	if (Save.Class > 0) return(Class.Assign)
}


###########################################################################################################
#
# Main program shows the overall structure through calls to four subsidiary programs
#
###########################################################################################################

MAXEIG.Main <- function(Data, Research.Data, Comp.Data, N.Pop, N.Samples, Supplied.P, Supplied.Class, N, I,
                        Ind.Triplets, Ind.Comp, Intervals, N.Int, Windows, Overlap, Calc.Cov,
                        St.Ind, Replications, Gr.Orient, Gr.Rows, Gr.Cols, Gr.Smooth, Gr.Common.Y, Gr.Ref,
                        Gr.Avg, Gr.Ind, Classify, Save.Class, Gr.Bayes, File.Output, File.Name)
{
	# set up matrix to store input/output configurations for each curve to be generated
	Analysis.Structure <- MAXEIG.Structure(Ind.Triplets, Ind.Comp, I, Intervals)
	Curve.Info <- Analysis.Structure[[1]]
	Structure <- Analysis.Structure[[2]]
	Input.Type <- Analysis.Structure[[3]]

	# carry out analyses by calling calculation procedure once per curve
	Curves <- list(1)
	for (Curve.Num in 1:length(Curve.Info[,1]))
		Curves[[Curve.Num]] <- MAXEIG.Calculate(Data[,1:I], N, I, Curve.Info, Curve.Num, Structure, Input.Type,
								    Ind.Triplets, Ind.Comp, Intervals, N.Int, Windows, Overlap, Calc.Cov, Replications)

	if (!Research.Data | Comp.Data)
	{
		# calculate averaged curve
		X.Values <- rep(0,length(Curves[[1]][,1]))
		for (i in 1:length(Curve.Info[,1]))
			X.Values <- X.Values + Curves[[i]][,1]
		X.Values <- X.Values / length(Curve.Info[,1])

		Y.Values <- rep(0,length(X.Values))
		for (i in 1:length(Curve.Info[,1]))
			Y.Values <- Y.Values + Curves[[i]][,3]
		Y.Values <- Y.Values / length(Curve.Info[,1])

		# calculate base rate estimates
		Data.Matrix <- cbind(Data[,1:I], 1:N, rep(0,N), rep(0,N))
		N.Curves <- length(Curve.Info[,1])
		P.Estimates <- vector("numeric",N.Curves)  # store base rate estimates
		for (i in 1:N.Curves)
		{
			if (Structure < 3) Input <- Curve.Info[i,2]
				else
				{
					Data.Matrix[,(I + 2)] <- rep(0, N)
					for (j in 1:I)
						Data.Matrix[,(I + 2)] <- Data.Matrix[,(I + 2)] + Data.Matrix[,j]
					Input <- I + 2
				}
			Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,Input]),]

			# determine curve data to use (smoothed or unsmoothed)
			if (Gr.Smooth == 0) Curve.Data <- cbind(Curves[[i]][,1], Curves[[i]][,3], Curves[[i]][,2])
			if (Gr.Smooth == 1)
			{
				Lowess.Y <- lowess(Curves[[i]][,1], Curves[[i]][,3])$y
				for (j in 1:(length(Curves[[i]][,1])-1))
					if (Curves[[i]][j,1] == Curves[[i]][(j + 1),1])
						Lowess.Y <- c(Lowess.Y[1:j],Lowess.Y[j],Lowess.Y[j+1:length(Lowess.Y)])
				Lowess.Y <- Lowess.Y[1:length(Curves[[i]][,1])]
				Curve.Data <- cbind(Curves[[i]][,1], Lowess.Y, Curves[[i]][,2])
			}
			if (Gr.Smooth == 2) Curve.Data <- cbind(Curves[[i]][,1], smooth(Curves[[i]][,3]), Curves[[i]][,2])

			# estimate hitmax, P
			Hitmax.Value <- max(Curve.Data[,2])  # y value at peak
			Hitmax.Locs <- (1:(length(Curve.Data[,1])))[Curve.Data[,2] == Hitmax.Value]  # peak subsample(s)
			Hitmax.Loc <- round(median(Hitmax.Locs))
			Hitmax.Case <- trunc(mean(Curves[[i]][Hitmax.Loc,4:5]))  # case at midpoint of peak subsample(s)
			Ints <- length(Curve.Data[,1])
			p <- vector("numeric",Ints)
			n <- vector("numeric",Ints)
			K <- 4 * Hitmax.Value
			for (j in 1:Ints)
			{
				if ((K ^ 2 - 4 * K * Curve.Data[j,2]) >= 0)
					quad1 <- (K + sqrt(K ^ 2 - 4 * K * Curve.Data[j,2])) / ( 2 * K)
					else quad1 <- 1
				if ((K ^ 2 - 4 * K * Curve.Data[j,2]) >= 0)
					quad2 <- (K - sqrt(K ^ 2 - 4 * K * Curve.Data[j,2])) / ( 2 * K)
					else quad2 <- 0
				if (j < Hitmax.Loc) p[j] <- min(1,quad2)
				if (j == Hitmax.Loc) p[j] <- .5
				if (j > Hitmax.Loc) p[j] <- max(quad1,0)
				if ((Curve.Data[j,2] < 0) & (j < Hitmax.Loc)) p[j] <- 0
				if ((Curve.Data[j,2] < 0) & (j > Hitmax.Loc)) p[j] <- 1
				n[j] <- Curve.Data[j,3]
			}
			P.Estimates[i] <- sum(p * n) / sum(n)
		}
	}

	# plot the results
	if (Research.Data) P.Avg <- MAXEIG.Plot(I, Curve.Info, Curves, Structure, Input.Type, Ind.Triplets,
							    Ind.Comp, Intervals, Calc.Cov, Gr.Orient, Gr.Rows, Gr.Cols,
                         Gr.Smooth, Gr.Common.Y, Gr.Ref, Gr.Avg, Gr.Ind)

	# summarize analytic specifications
	if (Research.Data) MAXEIG.Summarize(Data[,1:I], N, I, Curve.Info, Curves, Structure, Input.Type, Ind.Triplets,
							Ind.Comp, Intervals, N.Int, Windows, Overlap, Calc.Cov, Replications,
                     Gr.Smooth, Gr.Avg, P.Avg, Classify, Gr.Orient, Gr.Rows, Gr.Cols, Gr.Bayes, Comp.Data,
                     N.Pop, N.Samples, Supplied.P, Supplied.Class)

	# generate and report estimates of latent parameters and, if requested, classify cases
	if (Research.Data) Class.Assign <- MAXEIG.P.Est(Data, N, I, Curve.Info, Curves, Structure, Input.Type,
									Ind.Triplets, Ind.Comp, Intervals, N.Int, Windows, Overlap,
                           Calc.Cov, Replications, Gr.Smooth, Gr.Avg, P.Avg, Classify, Gr.Orient, Gr.Rows,
									Gr.Cols, Gr.Bayes, Save.Class, Comp.Data, Supplied.P, Supplied.Class)

	if (Research.Data & Comp.Data) return(list(Class.Assign[[2]],Class.Assign[[3]],Class.Assign[[1]],Y.Values))
	if (Research.Data & !Comp.Data) return(list(Class.Assign[[2]],Class.Assign[[1]]))
	if (!Research.Data) return(list(X.Values,Y.Values,P.Estimates))
}


###########################################################################################################
#
# Program to determine structure of analysis and set up matrix to contain relevant curve information
#
###########################################################################################################

MAXEIG.Structure <- function(Ind.Triplets, Ind.Comp, I, Intervals)
{
	# determine structure of the requested analysis:
	#    1 = each indicator serves as input once, with all others as output
	#    2 = triplets of indicators serve in all (input, output, output) combinations
	#    3 = triplets of indicators, with each possible pair removed as outputs, input = sum of others
	if (Ind.Comp) Structure <- 3
		else if (Ind.Triplets) Structure <- 2
			else Structure <- 1

	# based on structure, create matrix listing input and/or output indicators to use for each curve
	if (Structure == 1)
	{
		N.Curves <- I
		Curve.Info <- matrix(nrow = N.Curves, ncol = 2)
		Curve.Info[,1] <- 1:N.Curves
		Curve.Info[,2] <- 1:N.Curves
	}

	if (Structure == 2)
	{
		if (I == 3) N.Curves <- 3
			else N.Curves <- I * (prod(seq(1:(I - 1)))) / ((prod(seq(1:(I - 3)))) * 2)
		Curve.Info <- matrix(nrow = N.Curves, ncol = 4)
		Curve.Info[,1] <- 1:N.Curves
		Counter <- 1
		for (Triplet.A in 1:I)
			for (Triplet.B in 1:I)
				for (Triplet.C in 1:I)
					if ((Triplet.A != Triplet.B) & (Triplet.A != Triplet.C) & (Triplet.B < Triplet.C))
					{
						Curve.Info[Counter,2] <- Triplet.A
						Curve.Info[Counter,3] <- Triplet.B
						Curve.Info[Counter,4] <- Triplet.C
						Counter <- Counter + 1
					}
	}

	if (Structure == 3)
	{
		N.Curves <- (I * (I - 1)) / 2
		Curve.Info <- matrix(nrow = N.Curves, ncol = 3)
		Curve.Info[,1] <- 1:N.Curves
		Outputs <- 1
		for (Output.A in 1:(I - 1))
			for (Output.B in (Output.A + 1):I)
			{
				Curve.Info[Outputs,2] <- Output.A
				Curve.Info[Outputs,3] <- Output.B
				Outputs <- Outputs + 1
			}
	}

	# determine type of input divisions:
	# 1 = equal-sized intervals
	# 2 = SD intervals (no longer allowed)
	# 3 = overlapping windows
	if (Intervals) Input.Type <- 1
		else Input.Type <- 3
	return(list(Curve.Info, Structure, Input.Type))
}


###########################################################################################################
#
# Program to subdivide the input and calculate eigenvalues (or, if requested, covariances)
#
###########################################################################################################

MAXEIG.Calculate <- function(Data, N, I, Curve.Info, Curve.Num, Structure, Input.Type, Ind.Triplets,
                             Ind.Comp, Intervals, N.Int, Windows, Overlap, Calc.Cov,
                             Replications)
{
	# make room in data for summed indicator plus random numbers for resorting at each replication
	Data.Matrix <- cbind(Data, rep(0,N), rnorm(N, mean = 0, sd = 1))

	# determine input column, creating sum of other variables if necessary, and sort by input
	if (Structure == 3)
	{
		Output1 <- Curve.Info[Curve.Num,2]
		Output2 <- Curve.Info[Curve.Num,3]
		Input <- I + 1
		for (i in 1:I)
			Data.Matrix[,(I + 1)] <- Data.Matrix[,(I + 1)] + Data.Matrix[,i]
		Data.Matrix[,(I + 1)] <- Data.Matrix[,(I + 1)] - Data.Matrix[,Output1] - Data.Matrix[,Output2]
	}
	else Input <- Curve.Info[Curve.Num, 2]
	Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,Input]),]
	# divide input as appropriate for a given input method
	# generate matrix containing case numbers that form endpoints of each subsample
	# generate appropriate labels for x axis of graph

	if (Input.Type == 1)  # create equal-sized intervals; label = mean value within each interval
	{
		Cuts <- matrix(nrow = 2, ncol = N.Int)
		temp <- seq(from = 1, to = N, by = round(N / N.Int))
		Cuts[1,] <- temp[1:N.Int]
		Cuts[2,] <- c((temp[2:N.Int] - 1),N)
		X.Labels <- vector("numeric", N.Int)
		for (i in 1:N.Int)
			X.Labels[i] <- round(mean(Data.Matrix[Cuts[1,i]:Cuts[2,i],Input]),2)
	}

	if (Input.Type == 3)  # create overlapping windows; label = mean value within each window
	{
		N.Win <- N / (Windows * (1 - Overlap) + Overlap)  # determine total n per window
		N.Overlap <- N.Win * Overlap  # determine how many cases overlap with adjacent windows
		Cuts <- matrix(nrow = 2, ncol = Windows)
		Cuts[1,] <- round(seq(from = 1, by = (N.Win - N.Overlap), length = Windows))
		Cuts[2,] <- round(seq(from = N.Win, to = N, by = (N.Win - N.Overlap)))
		X.Labels <- vector("numeric", Windows)
		for (i in 1:Windows)
			X.Labels[i] <- round(mean(Data.Matrix[Cuts[1,i]:Cuts[2,i],Input]),2)
	}

	# create matrix to store curve results
	N.Subsamples <- length(X.Labels)
	Curve.Results <- matrix(rep(0, N.Subsamples * 5), nrow = N.Subsamples, ncol = 5)

	# having set up subsamples along the input and created a storage structure, perform the calculations

	# replications loop; results are cumulated, subsequently divided by number of replications to yield average
	for (Rep in 1:Replications)
	{
		# shuffle cases, then sort by input
      Data.Matrix <- Data.Matrix[sample(1:N, N, replace = F),]
		Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,Input]),]
		# begin calculations, storing results in either matrix or array as follows:
		# 1st column = labels for x axis
		# 2nd column = n per interval
		# 3rd column = eigenvalues (or covariances)
		# 4th, 5th columns = endpoints of each interval (case numbers)
		# calculate values for one series of subsamples
		Curve.Results[,4] <- Cuts[1,]
		Curve.Results[,5] <- Cuts[2,]
		Curve.Results[,1] <- X.Labels
		if (Calc.Cov)  # calculate covariances
		{
			if (Structure == 2) Outputs <- c(Curve.Info[Curve.Num,3], Curve.Info[Curve.Num,4])
			if (Structure == 3) Outputs <- c(Curve.Info[Curve.Num,2], Curve.Info[Curve.Num,3])
			for (i in 1:N.Subsamples)
			{
				Curve.Results[i,2] <- Cuts[2,i] - Cuts[1,i] + 1
				Curve.Results[i,3] <- Curve.Results[i,3] + var(Data.Matrix[Cuts[1,i]:Cuts[2,i],Outputs])[2]
			}
      }
		else  # calculate eigenvalues
		{
			if (Structure == 1)
			{
				Outputs <- c(1:I)
				Outputs <- Outputs[Outputs != Input]
			}
			if (Structure == 2) Outputs <- c(Curve.Info[Curve.Num,3], Curve.Info[Curve.Num,4])
			if (Structure == 3) Outputs <- c(Curve.Info[Curve.Num,2], Curve.Info[Curve.Num,3])
			for (i in 1:N.Subsamples)
			{
				Curve.Results[i,2] <- Cuts[2,i] - Cuts[1,i] + 1
				Cov.Matrix <- var(Data.Matrix[Cuts[1,i]:Cuts[2,i],Outputs])
				Data.No.Diag <- Cov.Matrix - diag(diag(Cov.Matrix))
				Curve.Results[i,3] <- Curve.Results[i,3] + eigen(Data.No.Diag)$values[1]
			}
		}
	}
	# divide cumulated covariances or eigenvalues by number of replications to yield average values
	Curve.Results[,3] <- Curve.Results[,3] / Replications
	return(Curve.Results)
}


###########################################################################################################
#
# Program to plot and label a panel of curves
#
###########################################################################################################

MAXEIG.Plot <- function(I, Curve.Info, Curves, Structure, Input.Type, Ind.Triplets, Ind.Comp, Intervals,
                        Calc.Cov, Gr.Orient, Gr.Rows, Gr.Cols, Gr.Smooth, Gr.Common.Y,
                        Gr.Ref, Gr.Avg, Gr.Ind)
{
	P.Avg <- 0
	# create and label new graph sheet
	if (Gr.Orient == 1) dev.new(width=8.5, height=11)
		else dev.new(width=11, height=8.5)
	par(mfrow=c(Gr.Rows, Gr.Cols), omi=c(.25,.25,.25,.25))
	Page <- 1
	N.Curves <- length(Curve.Info[,1])

	# if applying a common Y scale for all graphs, set the range here (algorithm accentuates peak/flatness)
	if (Gr.Common.Y)
	{
		Ys <- 0
		for (i in 1:N.Curves)
			Ys <- c(Ys, Curves[[i]][,3])
		Y.Min <- min(Ys[2:length(Ys)])
		Y.Max <- max(Ys[2:length(Ys)])
		Y.M <- mean(Ys[2:length(Ys)])
		Y.SD <- sd(Ys[2:length(Ys)])
		Y.Range <- c(min(0, Y.Min), max(((Y.Max / Y.SD) * (Y.M / 1.5)), (Y.Max * 1.10)))
	}

	# plot each curve
	for (i in 1:N.Curves)
	{
		if (Structure == 3) Input <- I + 1
			else Input <- Curve.Info[i,2]
		if (Structure == 1)
		{
			Outputs <- c(1:I)
			Outputs <- Outputs[Outputs != Input]
		}
		if (Structure == 2) Outputs <- c(Curve.Info[i,3], Curve.Info[i,4])
		if (Structure == 3) Outputs <- c(Curve.Info[i,2], Curve.Info[i,3])
		X.Values <- Curves[[i]][,1]
		Y.Values <- Curves[[i]][,3]
		X.Range <- c(min(X.Values), max(X.Values))
		if (!Gr.Common.Y)
			Y.Range <- c(min(0, min(Y.Values)), max(((max(Y.Values) / sd(Y.Values))
			           * (mean(Y.Values) / 1.5)), max(Y.Values) * 1.10))


		# label the x axis in accord with input method and indicator
		if (Input.Type == 1) X.Label <- paste(length(X.Values),"Intervals, Indicator",Input)
		if (Input.Type == 3) X.Label <- paste(length(X.Values),"Windows, Indicator",Input)
		if (Ind.Comp)
		{
			if (Input.Type == 1) X.Label <- paste(length(X.Values),"Intervals")
			if (Input.Type == 3) X.Label <- paste(length(X.Values),"Windows")
		}

		# label the y axis in accord with calculation type and output indicators
		if (Calc.Cov) Y.Label <- paste("Covariance, Ind",Outputs[1],"and",Outputs[2])
			else if (Ind.Triplets) Y.Label <- paste("Eigenvalue, Ind",Outputs[1],"and",Outputs[2])
				else Y.Label <- "Eigenvalue, All Other Vars"

		# check to see whether new graph sheet needs to be created
		if ((i > 1) & ((i - 1) %% (Gr.Rows * Gr.Cols) == 0))
		{
			if (Calc.Cov) mtext(paste("MAXCOV CURVES, page ",Page,sep=""),outer=T)
				else mtext(paste("MAXEIG CURVES, page ",Page,sep=""),outer=T)
			if (Gr.Orient == 1) dev.new(width=8.5, height=11)
				else dev.new(width=11, height=8.5)
			par(mfrow=c(Gr.Rows, Gr.Cols), omi=c(.25,.25,.25,.25))
			Page <- Page + 1
		}

		# plot the graph, plus smoothed curve and/or reference line at 0 if requested
		plot(X.Values, Y.Values, type = "p", pch = 16, ylim = Y.Range, xlim = X.Range,
		     ylab = Y.Label, xlab = X.Label)
		if (Gr.Smooth == 0)  # no smoothing, connect "raw" data points
			lines(X.Values, Y.Values)  # solid line for raw data only
		else
			lines(X.Values, Y.Values, lty = 3)  # dotted line for raw data, solid for smoothed
		if (Gr.Smooth == 1)  # LOWESS smoother
			lines(lowess(X.Values, Y.Values))
		if (Gr.Smooth == 2)  # running medians smoother
			lines(X.Values,smooth(Y.Values))
		if ((Gr.Ref == 1) | ((Gr.Ref == 2) & (Y.Range[1] < 0))) abline(h = 0)
	}
	if (Calc.Cov) mtext(paste("MAXCOV CURVES, page ",Page,sep=""),outer=T)
		else mtext(paste("MAXEIG CURVES, page ",Page,sep=""),outer=T)

	# plot curves averaged for each indicator
	if (Gr.Ind)
	{
		# create a new graph sheet
		if (Gr.Orient == 1) dev.new(width=8.5, height=11)
			else dev.new(width=11, height=8.5)
		par(mfrow=c(Gr.Rows, Gr.Cols), omi=c(.25,.25,.25,.25))
		Page <- 1

		# average y values for curves using the same input indicator
		N.Subsamples <- length(Curves[[1]][,1])
		Ind.Graphs <- matrix(nrow = I, ncol = N.Subsamples)
		N.Each.Ind <- N.Curves / I
		for (i in 1:I)
		{
			Ind.Curves <- ((i - 1) * N.Each.Ind + 1):(i * N.Each.Ind)
			Ys <- vector("numeric",N.Subsamples)
			for (j in 1:N.Subsamples)
				for (k in Ind.Curves)
					Ys[j] <- Ys[j] + Curves[[k]][j,3]
			Ind.Graphs[i,] <- Ys / N.Each.Ind
		}
		# plot a curve for each indicator
		for (i in 1:I)
		{
			Input <- i
			X.Values <- Curves[[i * N.Each.Ind]][,1]
			X.Range <- c(min(X.Values), max(X.Values))
			Ind.Curves <- ((i - 1) * N.Each.Ind + 1):(i * N.Each.Ind)
			Y.Values <- Ind.Graphs[i,]
			for (j in Ind.Curves)
				Y.Values <- c(Y.Values,Curves[[j]][,3])
			Y.Range <- c(min(0, min(Y.Values)), max(((max(Y.Values) / sd(Y.Values))
			           * (mean(Y.Values) / 1.5)), max(Y.Values) * 1.10))

			# label the x axis in accord with input method and indicator
			if (Input.Type == 1) X.Label <- paste(length(X.Values),"Intervals, Indicator",Input)
			if (Input.Type == 3) X.Label <- paste(length(X.Values),"Windows, Indicator",Input)

			# label the y axis in accord with calculation type
			if (Calc.Cov) Y.Label <- "Covariance"
				else Y.Label <- "Eigenvalue"

			# check to see whether new graph sheet needs to be created
			if ((i > 1) & ((i - 1) %% (Gr.Rows * Gr.Cols) == 0))
			{
				if (Calc.Cov) mtext(paste("MAXCOV INDICATOR CURVES, page ",Page,sep=""),outer=T)
					else mtext(paste("MAXEIG INDICATOR CURVES, page ",Page,sep=""),outer=T)
				if (Gr.Orient == 1) dev.new(width=8.5, height=11)
					else dev.new(width=11, height=8.5)
				par(mfrow=c(Gr.Rows, Gr.Cols), omi=c(.25,.25,.25,.25))
				Page <- Page + 1
			}

			# plot the averaged graph, with dotted lines for each individual curve
			plot(X.Values, Ind.Graphs[i,], type = "l", pch = 16, ylim = Y.Range, xlim = X.Range,
			     ylab = Y.Label, xlab = X.Label, lwd = 2)
			if ((Gr.Ref == 1) | ((Gr.Ref == 2) & (Y.Range[1] < 0))) abline(h = 0)
			for (j in Ind.Curves)
				lines(Curves[[j]][,1],Curves[[j]][,3], lty = 3, lwd = 2)
		}
   	if (Calc.Cov) mtext(paste("MAXCOV INDICATOR CURVES, page ",Page,sep=""),outer=T)
			else mtext(paste("MAXEIG INDICATOR CURVES, page ",Page,sep=""),outer=T)
   }

	if (Gr.Avg)
	{
		# create and label new graph sheet
      dev.new(width=3.7, height=3.5)
		par(omi=c(.25,.25,.25,.25), cex = .75)

		X.Values <- rep(0,length(Curves[[1]][,1]))
		for (i in 1:length(Curve.Info[,1]))
			X.Values <- X.Values + Curves[[i]][,1]
		X.Values <- X.Values / length(Curve.Info[,1])

		Y.Values <- rep(0,length(X.Values))
		for (i in 1:length(Curve.Info[,1]))
			Y.Values <- Y.Values + Curves[[i]][,3]
		Y.Values <- Y.Values / length(Curve.Info[,1])

		Hitmax.Value <- max(Y.Values)  # y value at peak
		Hitmax.Locs <- (1:(length(Y.Values)))[Y.Values == Hitmax.Value]  # peak subsample(s)
		Hitmax.Loc <- round(median(Hitmax.Locs))
		Hitmax.Case <- trunc(mean(Curves[[1]][Hitmax.Loc,4:5]))  # case at midpoint of peak subsample(s)
		Ints <- length(X.Values)
		p <- vector("numeric",Ints)
		n <- vector("numeric",Ints)
		K <- 4 * Hitmax.Value
		for (j in 1:Ints)
		{
			if ((K ^ 2 - 4 * K * Y.Values[j]) >= 0)
				quad1 <- (K + sqrt(K ^ 2 - 4 * K * Y.Values[j])) / ( 2 * K)
				else quad1 <- 1
			if ((K ^ 2 - 4 * K * Y.Values[j]) >= 0)
				quad2 <- (K - sqrt(K ^ 2 - 4 * K * Y.Values[j])) / ( 2 * K)
				else quad2 <- 0
			if (j < Hitmax.Loc) p[j] <- min(1,quad2)
			if (j == Hitmax.Loc) p[j] <- .5
			if (j > Hitmax.Loc) p[j] <- max(quad1,0)
		}
		P.Avg <- mean(p)

		X.Range <- c(min(X.Values), max(X.Values))
		Y.Values.Avg <- Y.Values
		for (i in 1:N.Curves)
			Y.Values <- c(Y.Values,Curves[[i]][,3])
		Y.Range <- c(min(0, min(Y.Values)), max(((max(Y.Values) / sd(Y.Values))
		           * (mean(Y.Values) / 1.5)), max(Y.Values) * 1.10))

		# label the x axis in accord with input method and indicator
		if (Input.Type == 1) X.Label <- paste(length(X.Values),"Intervals")
		if (Input.Type == 3) X.Label <- paste(length(X.Values),"Windows")
		if (Ind.Comp)
		{
			if (Input.Type == 1) X.Label <- paste(length(X.Values),"Intervals")
			if (Input.Type == 3) X.Label <- paste(length(X.Values),"Windows")
		}

		# label the y axis in accord with calculation type and output indicators
		if (Calc.Cov) Y.Label <- "Covariance"
			else Y.Label <- "Eigenvalue"

		# plot the averaged graph, with dotted lines for each individual curve
        if (Calc.Cov) Title <- "Averaged MAXCOV Curve"
           else Title <- "Averaged MAXEIG Curve"
		plot(X.Values, Y.Values.Avg, type = "l", pch = 16, ylim = Y.Range, xlim = X.Range,
		     ylab = Y.Label, xlab = X.Label, main = Title, lwd = 2)
		if ((Gr.Ref == 1) | ((Gr.Ref == 2) & (Y.Range[1] < 0))) abline(h = 0)
		for (i in 1:N.Curves)
			lines(Curves[[i]][,1],Curves[[i]][,3], lty = 3, lwd = 2)
	}
	if (Gr.Avg) return(P.Avg)
}


###########################################################################################################
#
# Program to produce text summary (data parameters, analytic overview)
#
###########################################################################################################

MAXEIG.Summarize <- function(Data, N, I, Curve.Info, Curves, Structure, Input.Type, Ind.Triplets, Ind.Comp,
				     Intervals, N.Int, Windows, Overlap, Calc.Cov, Replications, Gr.Smooth, Gr.Avg,
                 P.Avg, Classify, Gr.Orient, Gr.Rows, Gr.Cols, Gr.Bayes, Comp.Data, N.Pop, N.Samples,
                 Supplied.P, Supplied.Class)
{
	# report relevant analytic information and program specifications
	if (Calc.Cov) cat("\n\nSUMMARY OF MAXCOV ANALYTIC SPECIFICATIONS\n")
		else cat("\n\nSUMMARY OF MAXEIG ANALYTIC SPECIFICATIONS\n")
	cat("\nSample size: ",N,"\n")
	cat("Number of indicator variables: ",I,"\n")
	cat("Replications: ",Replications,"\n")
	if (Intervals) Subsample.Label <- paste(N.Int,"intervals")
	if (!Intervals) Subsample.Label <- paste(Windows,"windows with",Overlap,"overlap")
	cat("Subsamples: ",Subsample.Label,"\n")
	if (!Intervals) cat("n per window at",Windows,"windows: ", round(N /
      (Windows * (1 - Overlap) + Overlap)),"\n")
	if (Input.Type == 1) cat("n per interval: ",round(N / N.Int),"\n")
	if (Ind.Triplets & !Ind.Comp) Ind.Label <-
		"Variables serve in all possible (input, output, output) triplets"
	if (Ind.Triplets & Ind.Comp) Ind.Label <-
		paste("Two variables at a time serve as outputs, input = sum of remaining",(I-2),"variables")
	if (!Ind.Triplets) Ind.Label <- "Each variable serves once as input, with all other variables as outputs"
	cat("Indicators: ",Ind.Label,"\n")
	cat("Total number of curves: ",length(Curve.Info[,1]),"\n")
	if (Gr.Smooth == 0) cat("Y values smoothed for graphing and estimation:  No\n")
	if (Gr.Smooth == 1) cat("Y values smoothed for graphing and estimation:  Yes, LOWESS method\n")
	if (Gr.Smooth == 2) cat("Y values smoothed for graphing and estimation:  Yes, running medians\n")
	if (Calc.Cov) cat("Base rate estimation:  General covariance mixture theorem\n")
		else cat("Base rate estimation:  Adapted general covariance mixture theorem\n")
	if ((Supplied.P == 0) & (!Supplied.Class) & (Classify == 1))
      cat("Classification of cases:  Cut total score at estimated base rate\n")
	if ((Supplied.P == 0) & (!Supplied.Class) & (Classify == 2))
      cat("Classification of cases:  Bayes' Theorem\n")
   if (Supplied.P > 0) cat("Classification of cases:  Cut total score at supplied base rate\n")
   if (Supplied.Class) cat("Classification of cases:  Supplied with data\n")
   if (Comp.Data)
   {
      cat("Size of finite populations of comparison data: ",N.Pop,"\n")
      cat("Number of samples of comparison data drawn from each population: ",N.Samples,"\n")
   }
   cat("\n")
}


###########################################################################################################
#
# Function to generate and report estimates of the taxon base rate and other latent parameters
#
###########################################################################################################

MAXEIG.P.Est <- function(Data, N, I, Curve.Info, Curves, Structure, Input.Type, Ind.Triplets, Ind.Comp,
				 Intervals, N.Int, Windows, Overlap, Calc.Cov, Replications, Gr.Smooth, Gr.Avg,
             P.Avg, Classify, Gr.Orient, Gr.Rows, Gr.Cols, Gr.Bayes, Save.Class, Comp.Data, Supplied.P,
             Supplied.Class)
{
	if (Calc.Cov) cat("\n\nSUMMARY OF MAXCOV PARAMETER ESTIMATES\n")
		else cat("\n\nSUMMARY OF MAXEIG PARAMETER ESTIMATES\n")

	# estimate input hitmax and base rate estimate for each curve
	Data.Matrix <- cbind(Data[,1:I], 1:N, rep(0,N), rep(0,N))  # data plus case identifiers (for sorting into original
                                                           # form), summed indicators, class assignments
	N.Curves <- length(Curve.Info[,1])
	Curve.Est <- matrix(nrow = N.Curves, ncol = 2)  # set up matrix to hold hitmax and base rate estimates

	# Estimate P via general covariance mixture theorem (whether doing MAXCOV or MAXEIG)
	VP.FP <- matrix(nrow = N.Curves, ncol = 2)  # set up matrices to hold VP, FP estimates
	for (i in 1:N.Curves)
	{
		if (Structure < 3) Input <- Curve.Info[i,2]
			else
			{
				Data.Matrix[,(I + 2)] <- rep(0, N)
				for (j in 1:I)
					Data.Matrix[,(I + 2)] <- Data.Matrix[,(I + 2)] + Data.Matrix[,j]
				Input <- I + 2
			}
		Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,Input]),]

		# determine curve data to use (smoothed or unsmoothed)
		if (Gr.Smooth == 0) Curve.Data <- cbind(Curves[[i]][,1], Curves[[i]][,3], Curves[[i]][,2])
		if (Gr.Smooth == 1)
		{
			Lowess.Y <- lowess(Curves[[i]][,1], Curves[[i]][,3])$y
			for (j in 1:(length(Curves[[i]][,1])-1))
				if (Curves[[i]][j,1] == Curves[[i]][(j + 1),1])
					Lowess.Y <- c(Lowess.Y[1:j],Lowess.Y[j],Lowess.Y[j+1:length(Lowess.Y)])
			Lowess.Y <- Lowess.Y[1:length(Curves[[i]][,1])]
			Curve.Data <- cbind(Curves[[i]][,1], Lowess.Y, Curves[[i]][,2])
		}
		if (Gr.Smooth == 2) Curve.Data <- cbind(Curves[[i]][,1], smooth(Curves[[i]][,3]), Curves[[i]][,2])

		# estimate hitmax, P, VP, FP
		Hitmax.Value <- max(Curve.Data[,2])  # y value at peak
		Hitmax.Locs <- (1:(length(Curve.Data[,1])))[Curve.Data[,2] == Hitmax.Value]  # peak subsample(s)
		Hitmax.Loc <- round(median(Hitmax.Locs))
		Hitmax.Case <- trunc(mean(Curves[[i]][Hitmax.Loc,4:5]))  # case at midpoint of peak subsample(s)
		Ints <- length(Curve.Data[,1])
		p <- vector("numeric",Ints)
		n <- vector("numeric",Ints)
		K <- 4 * Hitmax.Value
		VP <- 0
		FP <- 0
		for (j in 1:Ints)
		{
			if ((K ^ 2 - 4 * K * Curve.Data[j,2]) >= 0)
				quad1 <- (K + sqrt(K ^ 2 - 4 * K * Curve.Data[j,2])) / ( 2 * K)
				else quad1 <- 1
			if ((K ^ 2 - 4 * K * Curve.Data[j,2]) >= 0)
				quad2 <- (K - sqrt(K ^ 2 - 4 * K * Curve.Data[j,2])) / ( 2 * K)
				else quad2 <- 0
			if (j < Hitmax.Loc) p[j] <- min(1,quad2)
			if (j == Hitmax.Loc) p[j] <- .5
			if (j > Hitmax.Loc) p[j] <- max(quad1,0)
			if ((Curve.Data[j,2] < 0) & (j < Hitmax.Loc)) p[j] <- 0
			if ((Curve.Data[j,2] < 0) & (j > Hitmax.Loc)) p[j] <- 1
			n[j] <- Curve.Data[j,3]
			if (Structure < 3)
			{
				if (j >= Hitmax.Loc) VP <- VP + p[j] * n[j]
				if (j >= Hitmax.Loc) FP <- FP + (1 - p[j]) * n[j]
			}
		}
		Curve.Est[i,1] <- Data.Matrix[Hitmax.Case,Input]
		Curve.Est[i,2] <- sum(p * n) / sum(n)
		VP.FP[i,1] <- VP / sum(p * n)
		VP.FP[i,2] <- FP / sum((1 - p) * n)
	}

	Row.Labels <- paste("Curve",1:N.Curves)
	Col.Labels <- c("Hitmax","P")
	dimnames(Curve.Est) <- list(Row.Labels, Col.Labels)
	cat("\nEstimated hitmax values and taxon base rates for each curve:\n\n")
	print(round(Curve.Est,3))
	P.Est.M <- mean(Curve.Est[,2], na.rm = T)
	P.Est.SD <- sd(Curve.Est[,2], na.rm = T)
	cat("\nSummary of base rate estimates across curves:\n   M =",round(P.Est.M,3),
		"\n  SD =",round(P.Est.SD,3),"\n\n")

	# if more than one curve per indicator, gather hitmax, P, VP, FP estimates for each indicator
	if (Structure < 3)
	{
		Ind.Est <- matrix(nrow = I, ncol = 4)
		N.Each.Ind <- N.Curves / I
		for (i in 1:I)
		{
			Ind.Curves <- ((i - 1) * N.Each.Ind + 1):(i * N.Each.Ind)
			Ind.Est[i,1] <- median(Curve.Est[Ind.Curves,1], na.rm = T)
			Ind.Est[i,2] <- mean(Curve.Est[Ind.Curves,2], na.rm = T)
			Ind.Est[i,3] <- mean(VP.FP[Ind.Curves,1], na.rm = T)
			Ind.Est[i,4] <- mean(VP.FP[Ind.Curves,2], na.rm = T)
		}
		if ((Structure == 2) & (I > 3))
		{
			Row.Labels <- paste("Indicator",1:I)
			Col.Labels <- c("Hitmax","P","VP","FP")
			dimnames(Ind.Est) <- list(Row.Labels, Col.Labels)
			cat("\nEstimated hitmax, base rate, VP, FP estimates for each indicator:\n\n")
			print(round(Ind.Est,9))
			cat("\nSummary of base rate estimates across indicators:\n   M =",
           			round(mean(Ind.Est[,2], na.rm = T),3), "\n  SD =",
				round(sd(Ind.Est[,2], na.rm = T),3),"\n\n")
		}
		if ((Structure == 1) | (I == 3))
		{
			Row.Labels <- paste("Indicator",1:I)
			Col.Labels <- c("Hitmax","P","VP","FP")
			dimnames(Ind.Est) <- list(Row.Labels, Col.Labels)
			cat("\nEstimated VP, FP values at each indicator's hitmax cut:\n\n")
			print(round(Ind.Est[,3:4],9))
			cat("\n\n")
		}
	}

	# print base rate estimate for averaged curve(s) if appropriate
	if (Gr.Avg) cat("\nBase rate estimate for averaged curve =",round(P.Avg, 3),"\n\n")

	# assign cases to classes
	if ((Supplied.P == 0) & (!Supplied.Class) & (Classify == 1))  # use estimated base rate to classify
	{
		for (i in 1:I)
			Data.Matrix[, (I + 2)] <- Data.Matrix[, (I + 2)] + Data.Matrix[,i]
		Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,(I + 2)]),]
		P.Est.Series <- c(rep(2, trunc(N * P.Est.M + .5)))
		Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
		Data.Matrix[,(I + 3)] <- c(Q.Est.Series, P.Est.Series)
	}

	if ((Supplied.P == 0) & (!Supplied.Class) & (Classify == 2))  # use Bayes' Theorem to classify
	{
		Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,(I + 1)]),]
		P.Term <- rep(P.Est.M,N)
		Q.Term <- rep(1 - P.Est.M,N)
		for (i in 1:I)
			if ((Ind.Est[i,3] < .9999) & (Ind.Est[i,4] < .9999))
			{
				P.Term <- (Data.Matrix[,i] >= Ind.Est[i,1]) * P.Term * Ind.Est[i,3] +
						(Data.Matrix[,i] < Ind.Est[i,1]) * P.Term * (1 - Ind.Est[i,3])
				Q.Term <- (Data.Matrix[,i] >= Ind.Est[i,1]) * Q.Term * Ind.Est[i,4] +
						(Data.Matrix[,i] < Ind.Est[i,1]) * Q.Term * (1 - Ind.Est[i,4])
			}
		Bayes.Prob <- P.Term / (P.Term + Q.Term)
		Data.Matrix[,(I + 3)] <- (Bayes.Prob >= .50) + 1

		if (Gr.Bayes)
		{
			if (Gr.Orient == 1) dev.new(width=8.5, height=11)
				else dev.new(width=11, height=8.5)
			par(mfrow=c(Gr.Rows, Gr.Cols), omi=c(.25,.25,.25,.25))
			hist(Bayes.Prob, breaks = seq(from = 0, to = 1, by = .10), xlab = "Prob(Taxon Member)",
				ylab = "Frequency", main="")
			mtext("DISTRIBUTION OF BAYESIAN PROBABILITIES", outer=T)
		}
	}

	if (Supplied.P > 0)  # use supplied base rate to classify
	{
		for (i in 1:I)
			Data.Matrix[, (I + 2)] <- Data.Matrix[, (I + 2)] + Data.Matrix[,i]
		Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,(I + 2)]),]
		P.Est.Series <- c(rep(2, trunc(N * Supplied.P + .5)))
		Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
		Data.Matrix[,(I + 3)] <- c(Q.Est.Series, P.Est.Series)
	}

	if (Supplied.Class)  # use supplied classification
      Data.Matrix <- cbind(Data[,1:I], (1:N), rep(0,N), Data[, (I + 1)])

	# Construct full-sample and within-groups data files
	Data.f <- as.matrix(Data.Matrix[,(1:I)])
	Nt <- sum(Data.Matrix[,(I + 3)]) - N
	Nc <- N - Nt
	Data.t <- Data.Matrix[(Data.Matrix[,(I + 3)] == 2),1:I]
	Data.c <- Data.Matrix[(Data.Matrix[,(I + 3)] == 1),1:I]
   for (i in 1:I)
   {
      if (min(Data.t[,i]) == max(Data.t[,i])) Data.t[,i] <- Data.t[,i] + rnorm(Nt, 0, .0001)
      if (min(Data.c[,i]) == max(Data.c[,i])) Data.c[,i] <- Data.c[,i] + rnorm(Nc, 0, .0001)
   }

	# Calculate distributions for each indicator (full sample, taxon, complement)
	Dist.Full <- matrix(nrow = I + 2, ncol = 4)
	Dist.Tax <- matrix(nrow = I + 2, ncol = 4)
	Dist.Comp <- matrix(nrow = I + 2, ncol = 4)
	Rows <- c(paste("Ind",1:I), "M", "SD")
	Cols <- c("M","SD","Skew","Kurtosis")
	dimnames(Dist.Full) <- list(Rows, Cols)
	dimnames(Dist.Tax) <- list(Rows, Cols)
	dimnames(Dist.Comp) <- list(Rows, Cols)
	for (i in 1:I)
	{
		Dist.Full[i,1] <- mean(Data.f[,i])
		Dist.Tax[i,1] <- mean(Data.t[,i])
		Dist.Comp[i,1] <- mean(Data.c[,i])
		Dist.Full[i,2] <- sd(Data.f[,i])
		Dist.Tax[i,2] <- sd(Data.t[,i])
		Dist.Comp[i,2] <- sd(Data.c[,i])
		Dist.Full[i,3] <- Skew(Data.f[,i])
		Dist.Tax[i,3] <- Skew(Data.t[,i])
		Dist.Comp[i,3] <- Skew(Data.c[,i])
		Dist.Full[i,4] <- Kurtosis(Data.f[,i])
		Dist.Tax[i,4] <- Kurtosis(Data.t[,i])
		Dist.Comp[i,4] <- Kurtosis(Data.c[,i])
	}
	for (i in 1:4)
	{
		Dist.Full[I + 1,i] <- mean(Dist.Full[1:I,i])
		Dist.Tax[I + 1,i] <- mean(Dist.Tax[1:I,i])
		Dist.Comp[I + 1,i] <- mean(Dist.Comp[1:I,i])
		Dist.Full[I + 2,i] <- sd(Dist.Full[1:I,i])
		Dist.Tax[I + 2,i] <- sd(Dist.Tax[1:I,i])
		Dist.Comp[I + 2,i] <- sd(Dist.Comp[1:I,i])
	}
	cat("\nIndicator distributions in the full sample ( N =",N,"):\n\n")
	print(round(Dist.Full,3))
	cat("\nIndicator distributions in the taxon ( n =",Nt,"):\n\n")
	print(round(Dist.Tax,3))
	cat("\nIndicator distributions in the complement ( n =",Nc,"):\n\n")
	print(round(Dist.Comp,3))

	# calculate validity for each indicator
	Ind.Val <- matrix(nrow = I + 2, ncol = 2)
	Rows <- c(paste("Ind",1:I), "M", "SD")
	Cols <- c("Raw Units","Cohen's d")
	dimnames(Ind.Val) <- list(Rows, Cols)
	for (i in 1:I)
	{
		Ind.Val[i,1] <- Dist.Tax[i,1] - Dist.Comp[i,1]
		Ind.Val[i,2] <- Ind.Val[i,1] / sqrt(((Nt - 1) * Dist.Tax[i,2]^2 + (Nc - 1) * Dist.Comp[i,2]^2)
					/ (Nt + Nc - 2))
	}
	Ind.Val[I + 1,1] <- mean(Ind.Val[1:I,1])
	Ind.Val[I + 1,2] <- mean(Ind.Val[1:I,2])
	Ind.Val[I + 2,1] <- sd(Ind.Val[1:I,1])
	Ind.Val[I + 2,2] <- sd(Ind.Val[1:I,2])
	cat("\nBetween-group validity:\n\n")
	print(round(Ind.Val,3))

	# Calculate, print, and summarize full-sample and within-group correlations
	Cor.Matrix <- cor(Data.f[,1:I])
	Rows <- paste("Ind",1:I)
	Cols <- paste("Ind",1:I)
	dimnames(Cor.Matrix) <- list(Rows, Cols)
	cat("\nIndicator correlations:\n\nFull Sample ( N =",N,"):\n\n")
	print(round(Cor.Matrix,3))
	Cor.Taxon <- cor(Data.t)
	Cor.Comp <- cor(Data.c)
	dimnames(Cor.Taxon) <- list(Rows, Cols)
	dimnames(Cor.Comp) <- list(Rows, Cols)
	cat("\nTaxon ( n =",Nt,"):\n\n")
	print(round(Cor.Taxon,3))
	cat("\nComplement ( n =",Nc,"):\n\n")
	print(round(Cor.Comp,3))
	Cor.Summary <- matrix(nrow = 3, ncol = 2)
	dimnames(Cor.Summary) <- list(c("Full Sample","Taxon", "Complement"), c("M", "SD"))
	Cor.Summary[1,1] <- mean(Cor.Matrix[lower.tri(Cor.Matrix)])
	Cor.Summary[2,1] <- mean(Cor.Taxon[lower.tri(Cor.Taxon)])
	Cor.Summary[3,1] <- mean(Cor.Comp[lower.tri(Cor.Comp)])
	Cor.Summary[1,2] <- sd(Cor.Matrix[lower.tri(Cor.Matrix)])
	Cor.Summary[2,2] <- sd(Cor.Taxon[lower.tri(Cor.Taxon)])
	Cor.Summary[3,2] <- sd(Cor.Comp[lower.tri(Cor.Comp)])
	cat("\nSummary of indicator correlations:\n\n")
	print(round(Cor.Summary,3))
	cat("\n")

	# return list of class assignments and mean base rate estimate (after resorting by original case numbers)
	Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,(I + 1)]),]
	if (Save.Class < 2) return(list(Data.Matrix[,(I + 3)],P.Est.M,P.Est.SD))
		else return(list(round(Bayes.Prob,9),P.Est.M,P.Est.SD))
}


###########################################################################################################
#
# MAXSLOPE
#
# Written by John Ruscio
# Last modified January 9, 2012
#
###########################################################################################################
#
# Preliminary program to read data, call for comparison data if/as requested, submit each data set to
# main program along with an identifying label.
#
###########################################################################################################

MAXSLOPE <- function(Data.Set, Comp.Data = T, N.Pop = 100000, N.Samples = 100, Supplied.Class = F,
                     Supplied.P = 0, File.Output = F, File.Name = "Output.txt", Seed = 1)
{
   Data.Set <- Remove.Missing(Data.Set)  # remove any missing data (listwise deletion)
	N <- dim(Data.Set)[1]  # determine sample size
	I <- dim(Data.Set)[2]  # determine number of indicators; adjust if one column contains class assignments
	if (Supplied.Class) I <- I - 1
   if (I > 2)
   {
      cat("*** More than two indicators were submitted for analysis; only the first two indicators will be used ***\n\n")
      if (Supplied.Class) Data.Set <- Data.Set[,c(1,2,(I + 1))]
         else Data.Set <- Data.Set[,1:2]
      I <- 2
   }
	Data.Set[,1:2] <- apply(Data.Set[,1:2], 2, scale)  # standardize data
   set.seed(Seed)  # set random number seed to allow exact replications of results
   if ((Comp.Data) & (N.Samples == 1)) N.Samples <- 100  # do not allow a single sample of comparison data

	# if output to file is requested. redirect all text output
	if (File.Output) sink(File.Name)

	Research.Results <- MAXSLOPE.Main(Data.Set, Research.Data = T, Comp.Data, N.Pop, N.Samples, N,
                                    Supplied.P, Supplied.Class)
   Xs <- Research.Results[[1]]
	Slopes <- Research.Results[[2]]
	P.Est <- Research.Results[[3]]

	# if comparison data were requested, then generate and submit data for analysis
	if (Comp.Data)
	{
		# create matrices to store results for comparison
		Tax.Slopes <- Dim.Slopes <- matrix(0, nrow = N.Samples, ncol = N - 1)

		# generate and analyze categorical data
      set.seed(Seed)
		if (!Supplied.Class)  # if class assignments were not provided, generate using base rate
		{
			Data.Set <- cbind(Data.Set, rep(0, N))
			if (Supplied.P == 0) P <- P.Est  # use estimated P in research data if no P was provided
				else P <- Supplied.P
         if ((P.Est * N) < 20) P.Est <- 20 / N
         if ((N - (P.Est * N)) < 20) P.Est <- N - (20 / N)
			for (i in 1:I)
				Data.Set[, (I + 1)] <- Data.Set[, (I + 1)] + Data.Set[,i]
			Data.Set <- Data.Set[sort.list(Data.Set[,(I + 1)]),]
			P.Est.Series <- c(rep(2, round(N * P)))
			Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
			Data.Set[,(I + 1)] <- c(Q.Est.Series, P.Est.Series)
		}
      Tax.Data <- Data.Set[(Data.Set[,(I + 1)] == 2), 1:I]
      Com.Data <- Data.Set[(Data.Set[,(I + 1)] == 1), 1:I]
      P.Data <- dim(Tax.Data)[1] / (dim(Tax.Data)[1] + dim(Com.Data)[1])
      cat("\nGenerating and analyzing comparison data\n")
      Sim.Tax <- GenData(Tax.Data, round(N.Pop * P.Data))
      Sim.Com <- GenData(Com.Data, (N.Pop - round(N.Pop * P.Data)))
      Sim.Pop <- rbind(Sim.Tax, Sim.Com)
      Res.Corr <- cor(Data.Set[,1:I])[lower.tri(diag(I))] - cor(Sim.Pop)[lower.tri(diag(I))]
      RMSR <- sqrt(sum(Res.Corr * Res.Corr) / (.5 * (I * I - I)))
      cat("   Categorical population generated, RMSR r =",round(RMSR,3),"\n")
		for (Sims in 1:N.Samples)
		{
         Sim.Sample <- Sim.Pop[sample(1:N.Pop, N, replace = T),]
			Tax.Slopes[Sims,] <- MAXSLOPE.Main(Sim.Sample, Research.Data = F, Comp.Data, N.Pop,
                                          N.Samples, N, Supplied.P, Supplied.Class)
		}
      cat("      Analysis of",N.Samples,"samples of categorical comparison data completed\n")

		# generate and analyze dimensional samples
      set.seed(Seed)
      Sim.Pop <- GenData(Data.Set[, 1:I], N.Pop)
      Res.Corr <- cor(Data.Set[,1:I])[lower.tri(diag(I))] - cor(Sim.Pop)[lower.tri(diag(I))]
      RMSR <- sqrt(sum(Res.Corr * Res.Corr) / (.5 * (I * I - I)))
      cat("\n   Dimensional population generated, RMSR r =",round(RMSR,3),"\n")
		for (Sims in 1:N.Samples)
		{
			Sim.Sample <- Sim.Pop[sample(1:N.Pop, N, replace = T),]
			Dim.Slopes[Sims,] <- MAXSLOPE.Main(Sim.Sample, Research.Data = F, Comp.Data, N.Pop,
                                          N.Samples, N, Supplied.P, Supplied.Class)
		}
      cat("      Analysis of",N.Samples,"samples of dimensional comparison data completed\n")

		# 2-panel plot for comparison data
      Tax.Min <- Tax.Max <- Tax.Q1 <- Tax.Q2 <- Tax.Q3 <- Dim.Min <- Dim.Max <- Dim.Q1 <- Dim.Q2 <- Dim.Q3 <- rep(0, length(Xs))
		for (i in 1:length(Xs))
		{
         Tax.Min[i] <- min(Tax.Slopes[(Tax.Slopes[,i] != -Inf),i])
         Tax.Max[i] <- max(Tax.Slopes[(Tax.Slopes[,i] != Inf),i])
         Tax.Q1[i] <- sort(Tax.Slopes[,i])[N.Samples * .25]
         Tax.Q2[i] <- sort(Tax.Slopes[,i])[N.Samples * .50]
         Tax.Q3[i] <- sort(Tax.Slopes[,i])[N.Samples * .75]
         Dim.Min[i] <- min(Dim.Slopes[(Dim.Slopes[,i] != -Inf),i])
         Dim.Max[i] <- max(Dim.Slopes[(Dim.Slopes[,i] != Inf),i])
         Dim.Q1[i] <- sort(Dim.Slopes[,i])[N.Samples * .25]
         Dim.Q2[i] <- sort(Dim.Slopes[,i])[N.Samples * .50]
         Dim.Q3[i] <- sort(Dim.Slopes[,i])[N.Samples * .75]
		}

		Min.X <- min(Xs)
		Max.X <- max(Xs)
		Min.Y <- min(c(Slopes, Tax.Min[Tax.Min != -Inf], Dim.Min[Dim.Min != -Inf]), na.rm = T)
		Max.Y <- max(c(Slopes, Tax.Max[Tax.Max != Inf], Dim.Max[Dim.Min != Inf]), na.rm = T)

		dev.new(width=7, height=3.5)
		par(mfrow=c(1,2), omi=c(.25,.25,.25,.25), cex=.75)

   	plot(x = Xs, y = Slopes, type = "o", pch = 16, lwd = 2, xlim = c(Min.X, Max.X),
         ylim = c(Min.Y, Max.Y), xlab = "Indicator Score", ylab = "LOWESS Slope", main = "Categorical Comparison Data")
      for (i in 1:(length(Xs)-1))
         polygon(x = c(Xs[i], Xs[i], Xs[i+1], Xs[i+1]),
            y = c(Tax.Q1[i], Tax.Q3[i], Tax.Q3[i+1], Tax.Q1[i+1]), col = 8, lty = 0)
		lines(Xs, Tax.Min)
		lines(Xs, Tax.Max)
      points(Xs, Slopes, type = "o", pch = 16)
      lines(Xs, Slopes, lwd = 2)

   	plot(x = Xs, y = Slopes, type = "o", pch = 16, lwd = 2, xlim = c(Min.X, Max.X),
         ylim = c(Min.Y, Max.Y), xlab = "Indicator Score", ylab = "LOWESS Slope", main = "Dimensional Comparison Data")
      for (i in 1:(length(Xs)-1))
         polygon(x = c(Xs[i], Xs[i], Xs[i+1], Xs[i+1]),
            y = c(Dim.Q1[i], Dim.Q3[i], Dim.Q3[i+1], Dim.Q1[i+1]), col = 8, lty = 0)
		lines(Xs, Dim.Min)
		lines(Xs, Dim.Max)
      points(Xs, Slopes, type = "o", pch = 16)
      lines(Xs, Slopes, lwd = 2)

      # calculate and print comparison curve fit index (CCFI)
		Fit.RMSR.t <- sqrt(sum((Slopes - Tax.Q2)^2)/(N - 1))
		Fit.RMSR.d <- sqrt(sum((Slopes - Dim.Q2)^2)/(N - 1))
      CCFI <- Fit.RMSR.d / (Fit.RMSR.d + Fit.RMSR.t)

      cat("\n\nComparison curve fit index (CCFI) =",round(Fit.RMSR.d,4),"/ (",round(Fit.RMSR.d,4),
         "+",round(Fit.RMSR.t,4),") =",round(CCFI,3),"\n")
		cat("     Note:  CCFI values can range from 0 (dimensional) to 1 (categorical).  The more a CCFI\n")
      cat("            value deviates from .50, the stronger the result.  When .40 < CCFI < .60,\n")
      cat("            this should be interpreted with caution.\n\n")
	}

	cat("\n")
	if (File.Output) sink()
}


###########################################################################################################
#
# Main program -- calculate, plot, and summarize results -- no subsidiary programs are called
#
###########################################################################################################

MAXSLOPE.Main <- function(Data, Research.Data, Comp.Data, N.Pop, N.Samples, N, Supplied.P, Supplied.Class)
{
   # perform two MAXSLOPE analyses
   x1 <- y2 <- Data[,1]
   x2 <- y1 <- Data[,2]
   low1 <- lowess(y1 ~ x1)
   low2 <- lowess(y2 ~ x2)
   slope1 <- (low1$y[2:N] - low1$y[1:(N - 1)]) /
               (low1$x[2:N] - low1$x[1:(N - 1)])
   slope2 <- (low2$y[2:N] - low2$y[1:(N - 1)]) /
               (low2$x[2:N] - low2$x[1:(N - 1)])

   # replace missing slopes with nearest neighbor
   while(sum(is.nan(slope1)) > 0)
   {
      for (i in 2:(N - 1))
         if (is.nan(slope1[i]) & !is.nan(slope1[i - 1])) slope1[i] <- slope1[i - 1]
      for (i in 1:(N - 2))
         if (is.nan(slope1[i]) & !is.nan(slope1[i + 1])) slope1[i] <- slope1[i + 1]
   }
   while(sum(is.nan(slope2)) > 0)
   {
      for (i in 2:(N - 1))
         if (is.nan(slope2[i]) & !is.nan(slope2[i - 1])) slope2[i] <- slope2[i - 1]
      for (i in 1:(N - 2))
         if (is.nan(slope2[i]) & !is.nan(slope2[i + 1])) slope2[i] <- slope2[i + 1]
   }

	# average the curves, estimate the base rate, and return results
   Y <- (slope1 + slope2) / 2
   X <- (low1$x[1:(N - 1)] + low2$x[1:(N - 1)]) / 2
   my <- max(Y)
   my.x <- max(X[Y == my])
   my.n <- sum(X >= my.x) - .5 * (sum(Y = my) - 1)
   P.Est <- my.n / N

	if (Research.Data)
	{
		# create and label new graph sheet
      dev.new(width=3.7, height=3.5)
		par(omi=c(.25,.25,.25,.25), cex = .75)

		# plot results as slopes by x values
      X.Range <- c(min(X), max(X))
		Y.Range <- c(min(0, min(Y)), max(((max(Y) / sd(Y))
			           * (mean(Y) / 1.5)), max(Y) * 1.10))
		plot(X, Y, type = "o", pch = 16, xlim = X.Range, ylim = Y.Range,
         main = "MAXSLOPE Curve", xlab = "Indicator Score", ylab = "LOWESS Slope")

		# report relevant analytic information and program specifications
		cat("\n\nSUMMARY OF MAXSLOPE ANALYTIC SPECIFICATIONS\n")
		cat("\nSample size: ",N,"\n")
      if ((Supplied.P == 0) & (!Supplied.Class))
         cat("Classification of cases:  Cut total score at estimated base rate\n")
      if (Supplied.P > 0) cat("Classification of cases:  Cut total score at supplied base rate\n")
      if (Supplied.Class) cat("Classification of cases:  Supplied with data\n")
      if (Comp.Data)
      {
         cat("Size of finite populations of comparison data: ",N.Pop,"\n")
         cat("Number of samples of comparison data drawn from each population: ",N.Samples,"\n")
      }
      cat("\n")

		# report estimated taxon base rate
		cat("\n\nSUMMARY OF MAXSLOPE PARAMETER ESTIMATES\n")
		cat("\nEstimated taxon base rate =", round(P.Est, 3), "\n\n")

		# assign cases to classes using estimated base rate
      if ((Supplied.P == 0) & (!Supplied.Class))
      {
         Data.Matrix <- cbind(Data, 1:N)
         Data.Matrix[,3] <- apply(Data.Matrix[,1:2], 1, sum)
         Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,3]),]
			P.Est.Series <- c(rep(2, round(N * P.Est)))
			Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
			Data.Matrix[,3] <- c(Q.Est.Series, P.Est.Series)
      }

		# assign cases to classes using supplied base rate
      if ((Supplied.P > 0) & (!Supplied.Class))
      {
         Data.Matrix <- cbind(Data, 1:N)
         Data.Matrix[,3] <- apply(Data.Matrix[,1:2], 1, sum)
         Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,3]),]
			P.Est.Series <- c(rep(2, round(N * Supplied.P)))
			Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
			Data.Matrix[,3] <- c(Q.Est.Series, P.Est.Series)
      }

		# assign cases to classes using supplied classification
   	if (Supplied.Class)
         Data.Matrix <- Data

		# Construct full-sample and within-groups data files
		Data.f <- as.matrix(Data.Matrix[,1:2])
		Nt <- sum(Data.Matrix[,3]) - N
		Nc <- N - Nt
		Data.t <- Data.Matrix[(Data.Matrix[,3] == 2),1:2]
		Data.c <- Data.Matrix[(Data.Matrix[,3] == 1),1:2]
      for (i in 1:2)
      {
         if (min(Data.t[,i]) == max(Data.t[,i])) Data.t[,i] <- Data.t[,i] + rnorm(Nt, 0, .0001)
         if (min(Data.c[,i]) == max(Data.c[,i])) Data.c[,i] <- Data.c[,i] + rnorm(Nc, 0, .0001)
      }

		# Calculate distributions for each indicator (full sample, taxon, complement)
		Dist.Full <- Dist.Tax <- Dist.Comp <- matrix(nrow = 4, ncol = 4)
		Rows <- c(paste("Ind",1:2), "M", "SD")
		Cols <- c("M","SD","Skew","Kurtosis")
		dimnames(Dist.Full) <- list(Rows, Cols)
		dimnames(Dist.Tax) <- list(Rows, Cols)
		dimnames(Dist.Comp) <- list(Rows, Cols)
		for (i in 1:2)
		{
			Dist.Full[i,1] <- mean(Data.f[,i])
			Dist.Tax[i,1] <- mean(Data.t[,i])
			Dist.Comp[i,1] <- mean(Data.c[,i])
			Dist.Full[i,2] <- sd(Data.f[,i])
			Dist.Tax[i,2] <- sd(Data.t[,i])
			Dist.Comp[i,2] <- sd(Data.c[,i])
			Dist.Full[i,3] <- Skew(Data.f[,i])
			Dist.Tax[i,3] <- Skew(Data.t[,i])
			Dist.Comp[i,3] <- Skew(Data.c[,i])
			Dist.Full[i,4] <- Kurtosis(Data.f[,i])
			Dist.Tax[i,4] <- Kurtosis(Data.t[,i])
			Dist.Comp[i,4] <- Kurtosis(Data.c[,i])
		}
		for (i in 1:4)
		{
			Dist.Full[3,i] <- mean(Dist.Full[1:2,i])
			Dist.Tax[3,i] <- mean(Dist.Tax[1:2,i])
			Dist.Comp[3,i] <- mean(Dist.Comp[1:2,i])
			Dist.Full[4,i] <- sd(Dist.Full[1:2,i])
			Dist.Tax[4,i] <- sd(Dist.Tax[1:2,i])
			Dist.Comp[4,i] <- sd(Dist.Comp[1:2,i])
		}
		cat("\nIndicator distributions in the full sample ( N =",N,"):\n\n")
		print(round(Dist.Full,3))
		cat("\nIndicator distributions in the taxon ( n =",Nt,"):\n\n")
		print(round(Dist.Tax,3))
		cat("\nIndicator distributions in the complement ( n =",Nc,"):\n\n")
		print(round(Dist.Comp,3))

		# calculate validity for each indicator
		Ind.Val <- matrix(nrow = 4, ncol = 2)
		Rows <- c(paste("Ind",1:2), "M", "SD")
		Cols <- c("Raw Units","Cohen's d")
		dimnames(Ind.Val) <- list(Rows, Cols)
		for (i in 1:2)
		{
			Ind.Val[i,1] <- Dist.Tax[i,1] - Dist.Comp[i,1]
			Ind.Val[i,2] <- Ind.Val[i,1] / sqrt(((Nt - 1) * Dist.Tax[i,2]^2 + (Nc - 1) * Dist.Comp[i,2]^2)
						/ (Nt + Nc - 2))
		}
		Ind.Val[3,1] <- mean(Ind.Val[1:2,1])
		Ind.Val[3,2] <- mean(Ind.Val[1:2,2])
		Ind.Val[4,1] <- sd(Ind.Val[1:2,1])
		Ind.Val[4,2] <- sd(Ind.Val[1:2,2])
		cat("\nBetween-group validity:\n\n")
		print(round(Ind.Val,3))

		# Calculate, print, and summarize full-sample and within-group correlations
		cat("\nIndicator correlations:\n\n")
      cat("Full Sample r =",round(cor(Data.f)[1,2],3),"\n")
      cat("      Taxon r =",round(cor(Data.t)[1,2],3),"\n")
      cat(" Complement r =",round(cor(Data.c)[1,2],3),"\n\n")
	}

	# if research data, return x values, slopes, and base rate estimate;
	# if comparison data, return only slopes
	if (Research.Data) return(list(X, Y, P.Est))
		else return(Y)
}


###########################################################################################################
#
# L-Mode
#
# Written by John Ruscio
# Last modified January 9, 2012
#
###########################################################################################################
#
# Preliminary program to read data, call for comparison data if/as requested, submit each data set to
# main program along with an identifying label.
#
###########################################################################################################

LMode <- function(Data.Set, Comp.Data = T, N.Pop = 100000, N.Samples = 100, Supplied.Class = F,
                  Supplied.P = 0, Mode.L = -.001, Mode.R = .001, St.Ind = T, Classify = 1, Save.Class = F,
                  File.Output = F, File.Name = "Output.txt", Seed = 1)
{
   Data.Set <- Remove.Missing(Data.Set)  # remove any missing data (listwise deletion)
	N <- dim(Data.Set)[1]  # determine sample size
	I <- dim(Data.Set)[2]  # determine number of indicators; adjust if one column contains class assignments
	if (Supplied.Class) I <- I - 1
	if (St.Ind) Data.Set[,1:I] <- apply(Data.Set[,1:I], 2, scale)  # standardize data if requested
   set.seed(Seed)  # set random number seed to allow exact replications of results
   if ((Comp.Data) & (N.Samples == 1)) N.Samples <- 100  # do not allow a single sample of comparison data

	# if output to file is requested. redirect all text output
	if (File.Output) sink(File.Name)

	Research.Results <- LMode.Main(Data.Set, Research.Data = T, Comp.Data, N.Pop, N.Samples, N, I,
                                 Mode.L, Mode.R, Classify, Save.Class, Supplied.P, Supplied.Class)
	Scores <- Research.Results[[1]]
	P.Est <- Research.Results[[2]]
	Class.Assign <- Research.Results[[3]]

	# if comparison data were requested, then generate and submit data for analysis
	if (Comp.Data)
	{
		# create matrices to store results for comparison
		Tax.Scores <- Dim.Scores <- matrix(0, nrow = N.Samples, ncol = N)

		# generate and analyze categorical data
      set.seed(Seed)
		if (!Supplied.Class)  # if class assignments were not provided, generate using base rate
		{
			Data.Set <- cbind(Data.Set, rep(0, N))
			if (Supplied.P == 0) P <- P.Est  # use estimated P in research data if no P was provided
				else P <- Supplied.P
         if ((P.Est * N) < 20) P.Est <- 20 / N
         if ((N - (P.Est * N)) < 20) P.Est <- N - (20 / N)
			for (i in 1:I)
				Data.Set[, (I + 1)] <- Data.Set[, (I + 1)] + Data.Set[,i]
			Data.Set <- Data.Set[sort.list(Data.Set[,(I + 1)]),]
			P.Est.Series <- c(rep(2, round(N * P)))
			Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
			Data.Set[,(I + 1)] <- c(Q.Est.Series, P.Est.Series)
		}
      Tax.Data <- Data.Set[(Data.Set[,(I + 1)] == 2), 1:I]
      Com.Data <- Data.Set[(Data.Set[,(I + 1)] == 1), 1:I]
      P.Data <- dim(Tax.Data)[1] / (dim(Tax.Data)[1] + dim(Com.Data)[1])
      cat("\nGenerating and analyzing comparison data\n")
      Sim.Tax <- GenData(Tax.Data, round(N.Pop * P.Data))
      Sim.Com <- GenData(Com.Data, (N.Pop - round(N.Pop * P.Data)))
      Sim.Pop <- rbind(Sim.Tax, Sim.Com)
      Res.Corr <- cor(Data.Set[,1:I])[lower.tri(diag(I))] - cor(Sim.Pop)[lower.tri(diag(I))]
      RMSR <- sqrt(sum(Res.Corr * Res.Corr) / (.5 * (I * I - I)))
      cat("   Categorical population generated, RMSR r =",round(RMSR,3),"\n")
		for (Sims in 1:N.Samples)
		{
         Sim.Sample <- Sim.Pop[sample(1:N.Pop, N, replace = T),]
			Tax.Scores[Sims,] <- LMode.Main(Sim.Sample, Research.Data = F, Comp.Data, N.Pop,
                                       N.Samples, N, I, Mode.L, Mode.R, Classify, Supplied.P,
                                       Supplied.Class)
		}
      cat("      Analysis of",N.Samples,"samples of categorical comparison data completed\n")

		# generate and analyze dimensional samples
      set.seed(Seed)
      Sim.Pop <- GenData(Data.Set[, 1:I], N.Pop)
      Res.Corr <- cor(Data.Set[,1:I])[lower.tri(diag(I))] - cor(Sim.Pop)[lower.tri(diag(I))]
      RMSR <- sqrt(sum(Res.Corr * Res.Corr) / (.5 * (I * I - I)))
      cat("\n   Dimensional population generated, RMSR r =",round(RMSR,3),"\n")
		for (Sims in 1:N.Samples)
		{
			Sim.Sample <- Sim.Pop[sample(1:N.Pop, N, replace = T),]
			Dim.Scores[Sims,] <- LMode.Main(Sim.Sample, Research.Data = F, Comp.Data, N.Pop,
                                       N.Samples, N, I, Mode.L, Mode.R, Classify, Supplied.P,
                                       Supplied.Class)
		}
      cat("      Analysis of",N.Samples,"samples of dimensional comparison data completed\n")

      Res.Density <- density(Scores)
      Tax.Density <- Dim.Density <- matrix(0, nrow = N.Samples, ncol = 512)
      for (i in 1:N.Samples)
      {
         Tax.Density[i, ] <- density(Tax.Scores[i,], from = min(Res.Density$x), to = max(Res.Density$x))$y
         Dim.Density[i, ] <- density(Dim.Scores[i,], from = min(Res.Density$x), to = max(Res.Density$x))$y
      }
      Tax.Density.M <- apply(Tax.Density, 2, mean)
      Dim.Density.M <- apply(Dim.Density, 2, mean)

      # 2-panel plot for comparison data
      Tax.Sum <- Dim.Sum <- matrix(0, nrow = 6, ncol = 512)
		for (i in 1:512)
		{
         Tax.Sum[,i] <- summary(Tax.Density[,i])
         Dim.Sum[,i] <- summary(Dim.Density[,i])
		}
		dev.new(width=7, height=3.5)
		par(mfrow=c(1,2), omi=c(.25,.25,.25,.25), cex=.75)

		Max.Y <- max(c(Res.Density$y, Tax.Sum[6,], Dim.Sum[6,]))
		Min.X <- min(Res.Density$x)
		Max.X <- max(Res.Density$x)

   	plot(Res.Density, type = "l", xlim = c(Min.X, Max.X), ylim = c(0, Max.Y), xlab = "Factor Scores",
	     	  ylab = "Density", main = "Categorical Comparison Data", lwd = 2)
      for (i in 1:511)
         polygon(x = c(Res.Density$x[i], Res.Density$x[i], Res.Density$x[i+1], Res.Density$x[i+1]),
            y = c(Tax.Sum[2,i], Tax.Sum[5,i], Tax.Sum[5,i+1], Tax.Sum[2,i+1]), col = 8, lty = 0)
		lines(Res.Density$x, Tax.Sum[1,])
		lines(Res.Density$x, Tax.Sum[6,])
      lines(Res.Density, lwd = 2)

   	plot(Res.Density, type = "l", xlim = c(Min.X, Max.X), ylim = c(0, Max.Y), xlab = "Factor Scores",
	     	  ylab = "Density", main = "Dimensional Comparison Data", lwd = 2)
      for (i in 1:511)
         polygon(x = c(Res.Density$x[i], Res.Density$x[i], Res.Density$x[i+1], Res.Density$x[i+1]),
            y = c(Dim.Sum[2,i], Dim.Sum[5,i], Dim.Sum[5,i+1], Dim.Sum[2,i+1]), col = 8, lty = 0)
		lines(Res.Density$x, Dim.Sum[1,])
		lines(Res.Density$x, Dim.Sum[6,])
      lines(Res.Density, lwd = 2)

      # calculate and print comparison curve fit index (CCFI)
      data <- list(Res.Density, Dim.Density.M)
      Fit.RMSR.d <- optimize(f = Fit.Densities, interval = c(-1,1), data)$objective
      data <- list(Res.Density, Tax.Density.M)
      Fit.RMSR.t <- optimize(f = Fit.Densities, interval = c(-1,1), data)$objective
      CCFI <- Fit.RMSR.d / (Fit.RMSR.d + Fit.RMSR.t)

      cat("\n\nComparison curve fit index (CCFI) =",round(Fit.RMSR.d,4),"/ (",round(Fit.RMSR.d,4),
         "+",round(Fit.RMSR.t,4),") =",round(CCFI,3),"\n")
		cat("     Note:  CCFI values can range from 0 (dimensional) to 1 (categorical).  The more a CCFI\n")
      cat("            value deviates from .50, the stronger the result.  When .40 < CCFI < .60,\n")
      cat("            this should be interpreted with caution.\n\n")
	}

	cat("\n")
	if (File.Output) sink()
	if (Save.Class) return(Class.Assign)
}


###########################################################################################################
#
# Main program -- calculate, plot, and summarize results -- no subsidiary programs are called
#
###########################################################################################################

LMode.Main <- function(Data, Research.Data, Comp.Data, N.Pop, N.Samples, N, I, Mode.L, Mode.R, Classify,
                        Save.Class, Supplied.P, Supplied.Class)
{
	# load R code for running medians smoother
	if ((version$major == 1) & (version$minor < 9)) library(eda)
		else library(stats)  # load R code for running medians smoother

	# make room in data file for factor scores (col = I + 1) and class assignments (col = I + 2)
	Data.Matrix <- cbind(Data[,1:I], rep(0, N), rep(0, N))

	# submit all indicators to factor analysis and store factor scores
	Factor.Results <- Factor.Analysis.LMode(Data.Matrix[,1:I])
	Data.Matrix[,(I + 1)] <- Factor.Results$scores
	Neg.Loadings <- (mean(Factor.Results$loadings) < 0)
	if (Neg.Loadings) Data.Matrix[,(I + 1)] <- Data.Matrix[,(I + 1)] * -1
	Score.Density <- density(Data.Matrix[,(I + 1)])

	if (Research.Data)
	{
		# compute location of estimated latent modes
		Max.L <- max(smooth(Score.Density$y)[Score.Density$x <= Mode.L])
		Loc.L <- (1: length(Score.Density$x))[smooth(Score.Density$y) == Max.L]
		if (length(Loc.L) > 1) Loc.L <- Loc.L[1]
		Max.R <- max(smooth(Score.Density$y)[Score.Density$x >= Mode.R])
		Loc.R <- (1: length(Score.Density$x))[smooth(Score.Density$y) == Max.R]
		if (length(Loc.R) > 1) Loc.R <- Loc.R[1]

		# create and label new graph sheet
      dev.new(width=3.7, height=3.5)
		par(omi=c(.25,.25,.25,.25), cex = .75)

		# plot results as frequency distribution of factor scores, with vertical lines at estimated latent modes
		Max.Y <- max(smooth(Score.Density$y))
		Min.X <- min(smooth(Score.Density$x))
		Max.X <- max(smooth(Score.Density$x))
		plot(Score.Density, type = "l", xlim = c(Min.X, Max.X), ylim = c(0, Max.Y),
         main = "L-Mode Curve", xlab = "Factor Scores", ylab = "Density")
		abline(v = Score.Density$x[Loc.L])
		abline(v = Score.Density$x[Loc.R])

		# report relevant analytic information and program specifications
		cat("\n\nSUMMARY OF L-MODE ANALYTIC SPECIFICATIONS\n")
		cat("\nSample size: ",N,"\n")
		cat("Number of indicator variables: ",I,"\n")
      if ((Supplied.P == 0) & (!Supplied.Class) & (Classify == 1))
         cat("Classification of cases:  Cut total score at estimated base rate\n")
      if ((Supplied.P == 0) & (!Supplied.Class) & (Classify == 2))
         cat("Classification of cases:  Nearest mode\n")
      if (Supplied.P > 0) cat("Classification of cases:  Cut total score at supplied base rate\n")
      if (Supplied.Class) cat("Classification of cases:  Supplied with data\n")
      if (Comp.Data)
      {
         cat("Size of finite populations of comparison data: ",N.Pop,"\n")
         cat("Number of samples of comparison data drawn from each population: ",N.Samples,"\n")
      }
      cat("\n")

		# estimate (and report) taxon base rate from location of latent modes
		cat("\n\nSUMMARY OF L-MODE PARAMETER ESTIMATES\n")

		P.R <- 1 / (1 + Score.Density$x[Loc.R] ^ 2)
		P.L <- 1 - (1 / (1 + Score.Density$x[Loc.L] ^ 2))
		P.Est <- mean(c(P.L, P.R))
		cat("\nSummary of taxon base rate estimates:\n")
		cat("   Based on location of left mode: ",round(P.L,3),"\n")
		cat("   Based on location of right mode: ",round(P.R,3),"\n")
		cat("   M =",round(P.Est,3),"\n\n")

		# estimate (and report) latent class M, SD on each indicator (using factor loadings)
		Ms <- apply(Data.Matrix[,1:I], 2, mean)
		SDs <- apply(Data.Matrix[,1:I], 2, sd)
		Q.Est <- 1 - P.Est
		M.SD <- matrix(nrow = I, ncol = 2)
		M.SD[,1] <- Ms + SDs * (Q.Est / sqrt(P.Est * Q.Est) * as.vector(Factor.Results$loadings))
		M.SD[,2] <- Ms + SDs * (-P.Est / sqrt(P.Est * Q.Est) * as.vector(Factor.Results$loadings))
		Row.Labels <- paste("Indicator",1:I)
		Col.Labels <- c("Taxon","Complement")
		dimnames(M.SD) <- list(Row.Labels, Col.Labels)
		cat("\nEstimated latent group M on each indicator (via factor loadings):\n\n")
		print(round(M.SD,3))
		cat("\n")

		# assign cases to classes using estimated base rate
      if ((Supplied.P == 0) & (!Supplied.Class) & (Classify == 1))
      {
         Data.Matrix <- cbind(Data.Matrix, 1:N)
         Data.Matrix[,(I + 2)] <- apply(Data.Matrix[,1:I], 1, sum)
         Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,(I + 2)]),]
			P.Est.Series <- c(rep(2, round(N * P.Est)))
			Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
			Data.Matrix[,(I + 2)] <- c(Q.Est.Series, P.Est.Series)
         Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,(I + 3)]),]
         Data.Matrix <- Data.Matrix[,1:(I + 2)]
      }

		# assign cases to classes and print base rate estimate from empirical classification
      if ((Supplied.P == 0) & (!Supplied.Class) & (Classify == 2))
      {
   		Mean.Diff.C <- abs(Data.Matrix[,(I + 1)] - Score.Density$x[Loc.L])
   		Mean.Diff.T <- abs(Data.Matrix[,(I + 1)] - Score.Density$x[Loc.R])
         for (i in 1:N)
   			if (Mean.Diff.T[i] <= Mean.Diff.C[i]) Data.Matrix[i,(I + 2)] <- 2
   				else Data.Matrix[i,(I + 2)] <- 1
   		cat("\nBase rate estimated from classification of cases:\n")
   		cat("   P =",round((mean(Data.Matrix[,(I + 2)]) - 1),3),"\n\n")
      }

		# assign cases to classes using supplied base rate
      if (Supplied.P > 0)
      {
         Data.Matrix <- cbind(Data.Matrix, 1:N)
         Data.Matrix[,(I + 2)] <- apply(Data.Matrix[,1:I], 1, sum)
         Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,(I + 2)]),]
			P.Est.Series <- c(rep(2, round(N * Supplied.P)))
			Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
			Data.Matrix[,(I + 2)] <- c(Q.Est.Series, P.Est.Series)
         Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,(I + 3)]),]
         Data.Matrix <- Data.Matrix[,1:(I + 2)]
      }

		# assign cases to classes using supplied classification
   	if (Supplied.Class)
         Data.Matrix <- cbind(Data[,1:I],Data.Matrix[,(I + 1)],Data[,(I + 1)])

		# Construct full-sample and within-groups data files
		Data.f <- as.matrix(Data.Matrix[,(1:I)])
		Nt <- sum(Data.Matrix[,(I + 2)]) - N
		Nc <- N - Nt
		Data.t <- Data.Matrix[(Data.Matrix[,(I + 2)] == 2),1:I]
		Data.c <- Data.Matrix[(Data.Matrix[,(I + 2)] == 1),1:I]
      for (i in 1:I)
      {
         if (min(Data.t[,i]) == max(Data.t[,i])) Data.t[,i] <- Data.t[,i] + rnorm(Nt, 0, .0001)
         if (min(Data.c[,i]) == max(Data.c[,i])) Data.c[,i] <- Data.c[,i] + rnorm(Nc, 0, .0001)
      }

		# Calculate distributions for each indicator (full sample, taxon, complement)
		Dist.Full <- matrix(nrow = I + 2, ncol = 4)
		Dist.Tax <- matrix(nrow = I + 2, ncol = 4)
		Dist.Comp <- matrix(nrow = I + 2, ncol = 4)
		Rows <- c(paste("Ind",1:I), "M", "SD")
		Cols <- c("M","SD","Skew","Kurtosis")
		dimnames(Dist.Full) <- list(Rows, Cols)
		dimnames(Dist.Tax) <- list(Rows, Cols)
		dimnames(Dist.Comp) <- list(Rows, Cols)
		for (i in 1:I)
		{
			Dist.Full[i,1] <- mean(Data.f[,i])
			Dist.Tax[i,1] <- mean(Data.t[,i])
			Dist.Comp[i,1] <- mean(Data.c[,i])
			Dist.Full[i,2] <- sd(Data.f[,i])
			Dist.Tax[i,2] <- sd(Data.t[,i])
			Dist.Comp[i,2] <- sd(Data.c[,i])
			Dist.Full[i,3] <- Skew(Data.f[,i])
			Dist.Tax[i,3] <- Skew(Data.t[,i])
			Dist.Comp[i,3] <- Skew(Data.c[,i])
			Dist.Full[i,4] <- Kurtosis(Data.f[,i])
			Dist.Tax[i,4] <- Kurtosis(Data.t[,i])
			Dist.Comp[i,4] <- Kurtosis(Data.c[,i])
		}
		for (i in 1:4)
		{
			Dist.Full[I + 1,i] <- mean(Dist.Full[1:I,i])
			Dist.Tax[I + 1,i] <- mean(Dist.Tax[1:I,i])
			Dist.Comp[I + 1,i] <- mean(Dist.Comp[1:I,i])
			Dist.Full[I + 2,i] <- sd(Dist.Full[1:I,i])
			Dist.Tax[I + 2,i] <- sd(Dist.Tax[1:I,i])
			Dist.Comp[I + 2,i] <- sd(Dist.Comp[1:I,i])
		}
		cat("\nIndicator distributions in the full sample ( N =",N,"):\n\n")
		print(round(Dist.Full,3))
		cat("\nIndicator distributions in the taxon ( n =",Nt,"):\n\n")
		print(round(Dist.Tax,3))
		cat("\nIndicator distributions in the complement ( n =",Nc,"):\n\n")
		print(round(Dist.Comp,3))

		# calculate validity for each indicator
		Ind.Val <- matrix(nrow = I + 2, ncol = 2)
		Rows <- c(paste("Ind",1:I), "M", "SD")
		Cols <- c("Raw Units","Cohen's d")
		dimnames(Ind.Val) <- list(Rows, Cols)
		for (i in 1:I)
		{
			Ind.Val[i,1] <- Dist.Tax[i,1] - Dist.Comp[i,1]
			Ind.Val[i,2] <- Ind.Val[i,1] / sqrt(((Nt - 1) * Dist.Tax[i,2]^2 + (Nc - 1) * Dist.Comp[i,2]^2)
						/ (Nt + Nc - 2))
		}
		Ind.Val[I + 1,1] <- mean(Ind.Val[1:I,1])
		Ind.Val[I + 1,2] <- mean(Ind.Val[1:I,2])
		Ind.Val[I + 2,1] <- sd(Ind.Val[1:I,1])
		Ind.Val[I + 2,2] <- sd(Ind.Val[1:I,2])
		cat("\nBetween-group validity:\n\n")
		print(round(Ind.Val,3))

		# Print factor score validity estimate
		Factor.Val <- matrix(nrow = 1, ncol = 6)
		Factor.Val[1,1] <- mean(Data.Matrix[(Data.Matrix[,(I + 2)] == 2),(I + 1)])
		Factor.Val[1,2] <- sd(Data.Matrix[(Data.Matrix[,(I + 2)] == 2),(I + 1)])
		Factor.Val[1,3] <- mean(Data.Matrix[(Data.Matrix[,(I + 2)] == 1),(I + 1)])
		Factor.Val[1,4] <- sd(Data.Matrix[(Data.Matrix[,(I + 2)] == 1),(I + 1)])
		Factor.Val[1,5] <- Factor.Val[1,1] - Factor.Val[1,3]
		Factor.Val[1,6] <- Factor.Val[1,5] / sqrt(((Nt - 1) * Factor.Val[1,2]^2 + (Nc - 1) * Factor.Val[1,4]^2)
                               / (Nt + Nc - 2))
		Row.Labels <- "Factor Scores"
		Col.Labels <- c("Taxon M", "Taxon SD", "Comp M", "Comp SD", "Raw Units", "Cohen's d")
		dimnames(Factor.Val) <- list(Row.Labels, Col.Labels)
		cat("\nBetween-group validity on factor scores:\n\n")
		print(round(Factor.Val[,5:6],3))

		# Calculate, print, and summarize full-sample and within-group correlations
		Cor.Matrix <- cor(Data.f[,1:I])
		Rows <- paste("Ind",1:I)
		Cols <- paste("Ind",1:I)
		dimnames(Cor.Matrix) <- list(Rows, Cols)
		cat("\nIndicator correlations:\n\nFull Sample ( N =",N,"):\n\n")
		print(round(Cor.Matrix,3))
		Cor.Taxon <- cor(Data.t)
		Cor.Comp <- cor(Data.c)
		dimnames(Cor.Taxon) <- list(Rows, Cols)
		dimnames(Cor.Comp) <- list(Rows, Cols)
		cat("\nTaxon ( n =",Nt,"):\n\n")
		print(round(Cor.Taxon,3))
		cat("\nComplement ( n =",Nc,"):\n\n")
		print(round(Cor.Comp,3))
		Cor.Summary <- matrix(nrow = 3, ncol = 2)
		dimnames(Cor.Summary) <- list(c("Full Sample","Taxon", "Complement"), c("M", "SD"))
		Cor.Summary[1,1] <- mean(Cor.Matrix[lower.tri(Cor.Matrix)])
		Cor.Summary[2,1] <- mean(Cor.Taxon[lower.tri(Cor.Taxon)])
		Cor.Summary[3,1] <- mean(Cor.Comp[lower.tri(Cor.Comp)])
		Cor.Summary[1,2] <- sd(Cor.Matrix[lower.tri(Cor.Matrix)])
		Cor.Summary[2,2] <- sd(Cor.Taxon[lower.tri(Cor.Taxon)])
		Cor.Summary[3,2] <- sd(Cor.Comp[lower.tri(Cor.Comp)])
		cat("\nSummary of indicator correlations:\n\n")
		print(round(Cor.Summary,3))
		cat("\n")
	}

	# if research data, return factor scores, base rate estimate, and class assignments;
	# if comparison data, return only factor scores
	if (Research.Data) return(list(Data.Matrix[,(I + 1)], P.Est, Data.Matrix[,(I + 2)]))
		else return(Data.Matrix[,(I + 1)])
}


###########################################################################################################
#
# MAMBAC
#
# Written by John Ruscio
# Last modified January 9, 2012
#
###########################################################################################################
#
# Preliminary program to read and submit research data to analysis and then, if requested, perform analyses
# of comparison data to test fit of averaged curves.
#
###########################################################################################################

MAMBAC <- function(Data.Set, Comp.Data = T, N.Pop = 100000, N.Samples = 100, Supplied.Class = F,
                   Supplied.P = 0, All.Pairs = T, Ind.Comp = F, N.End = 25, N.Cuts = 50, St.Ind = F,
                   Replications = 0, Gr.Orient = 1, Gr.Rows = 3, Gr.Cols = 2, Gr.Smooth = F,
                   Gr.Common.Y = F, Gr.Avg = F, Gr.Ind = F, File.Output = F, File.Name = "Output.txt", Seed = 1)
{
   Data.Set <- Remove.Missing(Data.Set)  # remove any missing data (listwise deletion)
	N <- dim(Data.Set)[1]  # determine sample size
	I <- dim(Data.Set)[2]  # determine number of indicators; adjust if one column contains class assignments
	if (Supplied.Class) I <- I - 1  # subtract to allow for class assignment data, if provided
   set.seed(Seed)  # set random number seed to allow exact replications of results

	# automatically fix some potential parameter inconsistencies or omissions
	if (Comp.Data) Gr.Avg <- T  # need averaged curve to compare to those for comparison data
   if ((Comp.Data) & (N.Samples == 1)) N.Samples <- 100  # do not allow a single sample of comparison data
	if (Ind.Comp)
	{
		All.Pairs <- F  # cannot use both methods
		Gr.Ind <- F  # cannot average by input indicator if composites are used
	}
	if (All.Pairs) Ind.Comp <- F  # cannot use both methods
	if (Ind.Comp & (I < 3))
	{
		Ind.Comp <- F  # cannot form composites using individual variables
		All.Pairs <- T
	}
	if (Gr.Avg) St.Ind <- T  # always standardize indicators if graphs will be averaged
	if (St.Ind) Data.Set[,1:I] <- apply(Data.Set[,1:I], 2, scale)  # standardize data if requested

   # check for tied scores and set number of replications
   Ties <- F
   for (i in 1:I)
      if (length(unique(Data.Set[,I])) < N) Ties <- T
   if ((Replications == 0) & (Ties)) Replications <- 10  # default is 10 replications if any tied scores
   if (Replications == 0) Replications <- 1
   if ((Replications > 1) & (!Ties)) Replications <- 1  # eliminate unnecessary replications

	# if output to file is requested, redirect all text output
	if (File.Output) sink(File.Name)

	# submit data to main program for analysis; taxon base rate estimate will be returned
	MAMBAC.Results <- MAMBAC.Main(Data.Set, Research.Data = T, Comp.Data, N.Pop, N.Samples, N, I,
                                 All.Pairs, Ind.Comp, N.End, N.Cuts, Replications, Gr.Orient, Gr.Rows,
                                 Gr.Cols, Gr.Smooth, Gr.Common.Y, Gr.Avg, Gr.Ind, Supplied.P,
                                 Supplied.Class)

	P.Est.M <- MAMBAC.Results[[3]]
	P.Est.SD <- MAMBAC.Results[[4]]

	# if comparison data requested, then generate and submit samples for analysis
	if (Comp.Data)
	{
		# create matrices to store results for comparison data
		Tax.Y.Values <- matrix(nrow = N.Samples, ncol = N.Cuts)
		Dim.Y.Values <- matrix(nrow = N.Samples, ncol = N.Cuts)
		if (Ind.Comp) N.Curves <- I
			else N.Curves <- I * (I - 1)
		Tax.P.Ests <- matrix(nrow = N.Samples, ncol = N.Curves)
		Dim.P.Ests <- matrix(nrow = N.Samples, ncol = N.Curves)

		# generate and analyze categorical data
      set.seed(Seed)
		if (!Supplied.Class)  # if class assignments were not provided, generate using base rate
		{
			Data.Set <- cbind(Data.Set, rep(0, N))
			if (Supplied.P == 0) P <- P.Est.M  # use estimated P in research data if no P was provided
				else P <- Supplied.P
         if ((P.Est.M * N) < 20) P.Est.M <- 20 / N
         if ((N - (P.Est.M * N)) < 20) P.Est.M <- N - (20 / N)
			for (i in 1:I)
				Data.Set[, (I + 1)] <- Data.Set[, (I + 1)] + Data.Set[,i]
			Data.Set <- Data.Set[sort.list(Data.Set[,(I + 1)]),]
			P.Est.Series <- c(rep(2, round(N * P)))
			Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
			Data.Set[,(I + 1)] <- c(Q.Est.Series, P.Est.Series)
		}
      Tax.Data <- Data.Set[(Data.Set[,(I + 1)] == 2), 1:I]
      Com.Data <- Data.Set[(Data.Set[,(I + 1)] == 1), 1:I]
      P.Data <- dim(Tax.Data)[1] / (dim(Tax.Data)[1] + dim(Com.Data)[1])
      cat("\nGenerating and analyzing comparison data\n")
      Sim.Tax <- GenData(Tax.Data, round(N.Pop * P.Data))
      Sim.Com <- GenData(Com.Data, (N.Pop - round(N.Pop * P.Data)))
      Sim.Pop <- rbind(Sim.Tax, Sim.Com)
      Res.Corr <- cor(Data.Set[,1:I])[lower.tri(diag(I))] - cor(Sim.Pop)[lower.tri(diag(I))]
      RMSR <- sqrt(sum(Res.Corr * Res.Corr) / (.5 * (I * I - I)))
      cat("   Categorical population generated, RMSR r =",round(RMSR,3),"\n")
		for (Sims in 1:N.Samples)
		{
         Sim.Sample <- Sim.Pop[sample(1:N.Pop, N, replace = T),]
			MAMBAC.Tax.Results <- MAMBAC.Main(Sim.Sample, Research.Data = F, Comp.Data, N.Pop, N.Samples,
                                          N, I, All.Pairs, Ind.Comp, N.End, N.Cuts, Replications,
                                          Gr.Orient, Gr.Rows, Gr.Cols, Gr.Smooth, Gr.Common.Y,
                                          Gr.Avg, Gr.Ind, Supplied.P, Supplied.Class)
			Tax.Y.Values[Sims,] <- MAMBAC.Tax.Results[[1]]
			Tax.P.Ests[Sims,] <- MAMBAC.Tax.Results[[2]]
		}
      cat("      Analysis of",N.Samples,"samples of categorical comparison data completed\n")

		# generate and analyze dimensional samples
      set.seed(Seed)
      Sim.Pop <- GenData(Data.Set[, 1:I], N.Pop)
      Res.Corr <- cor(Data.Set[,1:I])[lower.tri(diag(I))] - cor(Sim.Pop)[lower.tri(diag(I))]
      RMSR <- sqrt(sum(Res.Corr * Res.Corr) / (.5 * (I * I - I)))
      cat("\n   Dimensional population generated, RMSR r =",round(RMSR,3),"\n")
		for (Sims in 1:N.Samples)
		{
			Sim.Sample <- Sim.Pop[sample(1:N.Pop, N, replace = T),]
			MAMBAC.Dim.Results <- MAMBAC.Main(Sim.Sample, Research.Data = F, Comp.Data, N.Pop, N.Samples,
                                          N, I, All.Pairs, Ind.Comp, N.End, N.Cuts, Replications,
                                          Gr.Orient, Gr.Rows, Gr.Cols, Gr.Smooth, Gr.Common.Y,
                                          Gr.Avg, Gr.Ind, Supplied.P, Supplied.Class)
			Dim.Y.Values[Sims,] <- MAMBAC.Dim.Results[[1]]
			Dim.P.Ests[Sims,] <- MAMBAC.Dim.Results[[2]]
		}
      cat("      Analysis of",N.Samples,"samples of dimensional comparison data completed\n")

		# summarize base rate estimates for comparison data
		P.Est.Summary <- matrix(nrow = (N.Samples + 2), ncol = 4)
		for (Sims in 1:N.Samples)
		{
			P.Est.Summary[Sims,1] <- mean(Tax.P.Ests[Sims,], na.rm = T)
			P.Est.Summary[Sims,2] <- sd(Tax.P.Ests[Sims,], na.rm = T)
			P.Est.Summary[Sims,3] <- mean(Dim.P.Ests[Sims,], na.rm = T)
			P.Est.Summary[Sims,4] <- sd(Dim.P.Ests[Sims,], na.rm = T)
		}
		for (i in 1:4)
		{
			P.Est.Summary[Sims + 1,i] <- mean(P.Est.Summary[1:Sims,i])
			P.Est.Summary[Sims + 2,i] <- sd(P.Est.Summary[1:Sims,i])
		}
		Row.Labels <- c(1:Sims,paste("M for",N.Samples,"samples"),paste("SD for",N.Samples,"samples"))
		Col.Labels <- c("Categorical M","SD","Dimensional M","SD")
		dimnames(P.Est.Summary) <- list(Row.Labels,Col.Labels)
		cat("\n\nBase rate estimates for comparison data:\n\n")
		if (N.Samples == 1)
		{
			cat("  Categorical sample:  M =",round(P.Est.Summary[1,1],4),", SD =",
				round(P.Est.Summary[1,2],4),"\n")
			cat("  Dimensional sample:  M =",round(P.Est.Summary[1,3],4),", SD =",
				round(P.Est.Summary[1,4],4),"\n")
		}
		else
         print(round(P.Est.Summary[((N.Samples+1):(N.Samples+2)),],4))

		# plot sampling distributions of Ms/SDs of base rate estimates
		if (N.Samples > 1)
		{
			dev.new(width=7, height=3.5)
			par(mfrow=c(1,2), omi=c(.25,.25,.25,.25), cex=.75)

			Y.M.Max <- max(c(density(P.Est.Summary[1:Sims,1])$y,density(P.Est.Summary[1:Sims,3])$y)) * 1.25
			Y.SD.Max <- max(c(density(P.Est.Summary[1:Sims,2])$y,density(P.Est.Summary[1:Sims,4])$y)) * 1.25

			plot(density(P.Est.Summary[1:Sims,1], adjust = .75), xlim = c(0,1), ylim = c(0, Y.M.Max),
				xlab = "Ms of Base Rate Estimates", ylab = "", main = "", lwd = 2, axes = F)
			axis(1)
			lines(density(P.Est.Summary[1:Sims,3], adjust = .75), lwd = 2, lty = 2)
			abline(v = P.Est.M, lwd = 2, lty = 3)

			plot(density(P.Est.Summary[1:Sims,2], adjust = .75), xlim = c(0,.5), ylim = c(0, Y.SD.Max),
				xlab = "SDs of Base Rate Estimates", ylab = "", main = "", lwd = 2, axes = F)
			axis(1)
			lines(density(P.Est.Summary[1:Sims,4], adjust = .75), lwd = 2, lty = 2)
			abline(v = P.Est.SD, lwd = 2, lty = 3)

			mtext("BASE RATE ESTIMATES FOR COMPARISON DATA",outer=T)
		}

		# plot panel of averaged curves
		MAMBAC.Y <- MAMBAC.Results[[2]]

		X.Values <- MAMBAC.Results[[1]]
		X.Range <- c(min(X.Values), max(X.Values))
		Tax.Y <- vector("numeric", length(X.Values))
		for (i in 1:length(Tax.Y))
			Tax.Y[i] <- mean(Tax.Y.Values[,i])

		Dim.Y <- vector("numeric", length(X.Values))
		for (i in 1:length(Dim.Y))
			Dim.Y[i] <- mean(Dim.Y.Values[,i])

		Y.Range <- c(min(MAMBAC.Y,Tax.Y.Values,Dim.Y.Values),max(MAMBAC.Y,Tax.Y.Values,Dim.Y.Values))

		# label the x and y axes
		X.Label <- paste(N.Cuts,"Cuts")
		Y.Label <- "Mean Difference"

		# 2-panel plot for comparison data
      Tax.Sum <- Dim.Sum <- matrix(0, nrow = 6, ncol = length(X.Values))
		for (i in 1:length(X.Values))
		{
         Tax.Sum[,i] <- summary(Tax.Y.Values[,i])
         Dim.Sum[,i] <- summary(Dim.Y.Values[,i])
		}
		dev.new(width=7, height=3.5)
		par(mfrow=c(1,2), omi=c(.25,.25,.25,.25), cex=.75)

		plot(X.Values, MAMBAC.Y, type = "o", pch = 16, lwd = 2, ylim = Y.Range,
		     xlim = X.Range, ylab = Y.Label, xlab = X.Label, main = "Categorical Comparison Data")
      for (i in 1:(length(X.Values)-1))
         polygon(x = c(X.Values[i], X.Values[i], X.Values[i+1], X.Values[i+1]),
            y = c(Tax.Sum[2,i], Tax.Sum[5,i], Tax.Sum[5,i+1], Tax.Sum[2,i+1]), col = 8, lty = 0)
		lines(X.Values, Tax.Sum[1,])
		lines(X.Values, Tax.Sum[6,])
      points(X.Values, MAMBAC.Y, type = "o", pch = 16)
      lines(X.Values, MAMBAC.Y, lwd = 2)

		plot(X.Values, MAMBAC.Y, type = "o", pch = 16, lwd = 2, ylim = Y.Range,
		     xlim = X.Range, ylab = Y.Label, xlab = X.Label, main = "Dimensional Comparison Data")
      for (i in 1:(length(X.Values)-1))
         polygon(x = c(X.Values[i], X.Values[i], X.Values[i+1], X.Values[i+1]),
            y = c(Dim.Sum[2,i], Dim.Sum[5,i], Dim.Sum[5,i+1], Dim.Sum[2,i+1]), col = 8, lty = 0)
		lines(X.Values, Dim.Sum[1,])
		lines(X.Values, Dim.Sum[6,])
      points(X.Values, MAMBAC.Y, type = "o", pch = 16)
      lines(X.Values, MAMBAC.Y, lwd = 2)

		# calculate and print comparison curve fit index (CCFI)
		Fit.RMSR.t <- sqrt(sum((Tax.Y - MAMBAC.Y)^2)/N.Cuts)
		Fit.RMSR.d <- sqrt(sum((Dim.Y - MAMBAC.Y)^2)/N.Cuts)
		CCFI <- Fit.RMSR.d / (Fit.RMSR.d + Fit.RMSR.t)
      cat("\n\nComparison curve fit index (CCFI) =",round(Fit.RMSR.d,4),"/ (",round(Fit.RMSR.d,4),
         "+",round(Fit.RMSR.t,4),") =",round(CCFI,3),"\n")
		cat("     Note:  CCFI values can range from 0 (dimensional) to 1 (categorical).  The more a CCFI\n")
      cat("            value deviates from .50, the stronger the result.  When .40 < CCFI < .60,\n")
      cat("            this should be interpreted with caution.\n\n")
	}
	if (File.Output) sink()
}


###########################################################################################################
#
# Main program shows the overall structure through calls to four subsidiary programs
#
###########################################################################################################

MAMBAC.Main <- function(Data, Research.Data, Comp.Data, N.Pop, N.Samples, N, I, All.Pairs, Ind.Comp, N.End,
                        N.Cuts, Replications, Gr.Orient, Gr.Rows, Gr.Cols, Gr.Smooth, Gr.Common.Y,
                        Gr.Avg, Gr.Ind, Supplied.P, Supplied.Class)
{
	# set up matrix to store input/output configurations for each curve to be generated
	Curve.Info <- MAMBAC.Structure(I, All.Pairs)

	# carry out analyses by calling calculation procedure once per curve
	Curves <- list(1)
	for (Curve.Num in 1:length(Curve.Info[,1]))
		Curves[[Curve.Num]] <- MAMBAC.Calculate(Data[,1:I], N, I, Curve.Info, Curve.Num, All.Pairs, Ind.Comp,
								    N.End, N.Cuts, Replications)

	# plot the results (returns P.Avg for research data, X.Values & Y.Values for comparison data)
	MAMBAC.Results <- MAMBAC.Plot(Research.Data, I, Curve.Info, Curves, All.Pairs, Ind.Comp, N.End,
						N.Cuts, Replications, Gr.Orient, Gr.Rows, Gr.Cols, Gr.Smooth, Gr.Common.Y,
						Gr.Avg, Gr.Ind)
	if (Research.Data) P.Avg <- MAMBAC.Results[[1]]

	# print summary of analytic specification (for research data only)
	if (Research.Data) MAMBAC.Summarize(Data[,1:I], N, I, Curve.Info, Curves, All.Pairs, Ind.Comp, N.End,
							N.Cuts, Replications, Gr.Smooth, Gr.Avg, Comp.Data, N.Pop, N.Samples,
                     Supplied.P, Supplied.Class)

	# generate estimates of taxon base rate (for research data only)
	if (Research.Data) P.Est <- MAMBAC.P.Est(Data, N, I, Curve.Info, Curves, All.Pairs, Ind.Comp, N.End,
							     N.Cuts, Replications, Gr.Smooth, Gr.Avg, P.Avg, Supplied.P, Supplied.Class)

	if (!Research.Data)
	{
		N.Curves <- length(Curve.Info[,1])
		P.Estimates <- vector("numeric", N.Curves)
		for (i in 1:N.Curves)
		{
			P.Estimates[i] <- 1/((Curves[[i]][length(Curves[[i]][,2]),2]/Curves[[i]][1, 2]) + 1)
			if (P.Estimates[i] < 0) P.Estimates[i] <- 0  # correct for possibilities of
			if (P.Estimates[i] > 1) P.Estimates[i] <- 1  # negative values at extremes
		}
	}
	if (Research.Data) return(list(MAMBAC.Results[[2]], MAMBAC.Results[[3]], P.Est[[1]], P.Est[[2]]))
		else return(list(MAMBAC.Results, P.Estimates))
}


###########################################################################################################
#
# Program to determine structure of analysis and set up matrix to contain relevant curve information
#
###########################################################################################################

MAMBAC.Structure <- function(I, All.Pairs)
{
	# based on indicator configuration, create matrix listing input/output indicators to use for each curve
	if (All.Pairs)
	{
		N.Curves <- I * (I - 1)
		Curve.Info <- matrix(nrow = N.Curves, ncol = 3)
		Curve.Info[,1] <- 1:N.Curves
		Counter <- 0
		for (i in 1:(I - 1))
		{
			for (j in (i + 1):I)
			{
				Curve.Info[Counter + 1, 2:3] <- c(i, j)
				Curve.Info[Counter + 2, 2:3] <- c(j, i)
				Counter <- Counter + 2
			}
		}
	}
	else
	{
		N.Curves <- I
		Curve.Info <- matrix(nrow = N.Curves, ncol = 2)
		Curve.Info[,1] <- 1:N.Curves
		Curve.Info[,2] <- 1:N.Curves
	}
	return(Curve.Info)
}


###########################################################################################################
#
# Program to locate cutting scores along the input and calculate mean differences
#
###########################################################################################################

MAMBAC.Calculate <- function(Data, N, I, Curve.Info, Curve.Num, All.Pairs, Ind.Comp, N.End, N.Cuts,
				     Replications)
{
	# make room in data for composite indicator plus random numbers for resorting at each replication
	Data.Matrix <- cbind(Data, rep(0,N), rnorm(N, mean = 0, sd = 1))

	# determine input column, creating composite indicator if necessary, and sort by input
	if (All.Pairs)
	{
		Input <- Curve.Info[Curve.Num, 2]
		Output <- Curve.Info[Curve.Num, 3]
	}
	else
	{
		Output <- Curve.Info[Curve.Num,2]
		Input <- I + 1
		for (i in 1:I)
			Data.Matrix[,(I + 1)] <- Data.Matrix[,(I + 1)] + Data.Matrix[,i]
		Data.Matrix[,(I + 1)] <- Data.Matrix[,(I + 1)] - Data.Matrix[,Output]
	}
	Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,Input]),]

	Cuts <- seq(from = N.End, to = (N - N.End), length = N.Cuts)
	X.Labels <- Cuts

	# create matrix to store curve results
	Num.Cuts <- length(Cuts)
	Curve.Results <- matrix(rep(0, Num.Cuts * 2), nrow = Num.Cuts, ncol = 2)

	# having set up subsamples along the input and created a storage structure, perform the calculations

	# replications loop; results are cumulated, subsequently divided by number of replications to yield average
	for (Rep in 1:Replications)
	{
		# shuffle cases, then sort by input
      Data.Matrix <- Data.Matrix[sample(1:N, N, replace = F),]
		Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,Input]),]
		# begin calculations, storing results in either matrix or array as follows:
		# 1st column = labels for x axis
		# 2nd column = MAMBAC values

		Curve.Results[,1] <- X.Labels
		for (i in 1:Num.Cuts)
			Curve.Results[i,2] <- mean(Data.Matrix[(Cuts[i]+1):N,Output]) -
	                                  mean(Data.Matrix[1:Cuts[i],Output])
	}
	# divide cumulated mean differences by number of replications to yield average values
	Curve.Results[,2] <- Curve.Results[,2] / Replications
	return(Curve.Results)
}


###########################################################################################################
#
# Program to plot and label a panel of curves
#
###########################################################################################################

MAMBAC.Plot <- function(Research.Data, I, Curve.Info, Curves, All.Pairs, Ind.Comp, N.End, N.Cuts,
				Replications, Gr.Orient, Gr.Rows, Gr.Cols, Gr.Smooth, Gr.Common.Y, Gr.Avg, Gr.Ind)
{
	P.Avg <- 0
	N.Curves <- length(Curve.Info[,1])

	# create and label new graph sheet
	if (Research.Data)
	{
		if (Gr.Orient == 1) dev.new(width=8.5, height=11)
			else dev.new(width=11, height=8.5)
		par(mfrow=c(Gr.Rows, Gr.Cols), omi=c(.25,.25,.25,.25))
		Page <- 1

		# if applying a common Y scale for all graphs, set the range here (algorithm accentuates peak/flatness)
		if (Gr.Common.Y)
		{
			Ys <- 0
			for (i in 1:N.Curves)
				Ys <- c(Ys, Curves[[i]][,2])
			Y.Min <- min(Ys[2:length(Ys)])
			Y.Max <- max(Ys[2:length(Ys)])
			Y.Range <- c(Y.Min, Y.Max)
		}

		# plot each curve
		for (i in 1:N.Curves)
		{
			if (All.Pairs)
			{
				Input <- Curve.Info[i,2]
				Output <- Curve.Info[i,3]
			}
			else
			{
				Input <- Curve.Info[i,1]
				Output <- Curve.Info[i,2]
			}
			X.Values <- Curves[[i]][,1]
			Y.Values <- Curves[[i]][,2]

			X.Range <- c(min(X.Values), max(X.Values))
			if (!Gr.Common.Y) Y.Range <- c(min(Y.Values), max(Y.Values))

			# label the x axis in accord with input method and indicator
			if (All.Pairs) X.Label <- paste(N.Cuts,"Cuts on Indicator",Input)
				else X.Label <- paste(N.Cuts,"Cuts")

			# label the y axis
			Y.Label <- paste("Mean Difference, Indicator",Output)

			# check to see whether new graph sheet needs to be created
			if ((i > 1) & ((i - 1) %% (Gr.Rows * Gr.Cols) == 0))
			{
				mtext(paste("MAMBAC CURVES, page ",Page,sep=""),outer=T)
				if (Gr.Orient == 1) dev.new(width=8.5, height=11)
					else dev.new(width=11, height=8.5)
				par(mfrow=c(Gr.Rows, Gr.Cols), omi=c(.25,.25,.25,.25))
				Page <- Page + 1
			}

			# plot the graph, plus smoothed curve if requested
			plot(X.Values, Y.Values, type = "p", pch = 16, ylim = Y.Range, xlim = X.Range,
			     ylab = Y.Label, xlab = X.Label)
			if (!Gr.Smooth)  # no smoothing, connect "raw" data points
				lines(X.Values, Y.Values)  # solid line for raw data only
			else
				lines(lowess(X.Values, Y.Values))  # dotted line for LOWESS smoothed curve
		}
		mtext(paste("MAMBAC CURVES, page ",Page,sep=""),outer=T)
	}

	if (Gr.Ind & Research.Data)
	{
		if (Gr.Orient == 1) dev.new(width=8.5, height=11)
			else dev.new(width=11, height=8.5)
		par(mfrow=c(Gr.Rows, Gr.Cols), omi=c(.25,.25,.25,.25))

		# average y values for curves using the same input indicator
		Ind.Graphs <- matrix(nrow = I, ncol = N.Cuts)
		N.Each.Ind <- N.Curves / I
		for (i in 1:I)
		{
			Ind.Curves <- Curve.Info[,1][Curve.Info[,2] == i]
			Ys <- vector("numeric",N.Cuts)
			for (j in 1:N.Cuts)
				for (k in Ind.Curves)
					Ys[j] <- Ys[j] + Curves[[k]][j,2]
			Ind.Graphs[i,] <- Ys / N.Each.Ind
		}

		# plot a curve for each indicator
		Page <- 1
		for (i in 1:I)
		{
			Input <- i
			X.Values <- Curves[[i * N.Each.Ind]][,1]
			X.Range <- c(min(X.Values), max(X.Values))
			Ind.Curves <- Curve.Info[,1][Curve.Info[,2] == i]
			Y.Values <- Ind.Graphs[i,]
			for (j in Ind.Curves)
				Y.Values <- c(Y.Values,Curves[[j]][,2])
			Y.Range <- c(min(Y.Values), max(Y.Values))
			X.Label <- paste(N.Cuts,"Cuts on Indicator",i)
			Y.Label <- "Mean Difference"

			# check to see whether new graph sheet needs to be created
			if ((i > 1) & ((i - 1) %% (Gr.Rows * Gr.Cols) == 0))
			{
				mtext(paste("MAMBAC INDICATOR CURVES, page ",Page,sep=""),outer=T)
				if (Gr.Orient == 1) dev.new(width=8.5, height=11)
					else dev.new(width=11, height=8.5)
				par(mfrow=c(Gr.Rows, Gr.Cols), omi=c(.25,.25,.25,.25))
				Page <- Page + 1
			}

			# plot the averaged graph, with dotted lines for each individual curve
			plot(X.Values, Ind.Graphs[i,], type = "l", pch = 16, ylim = Y.Range, xlim = X.Range,
			     ylab = Y.Label, xlab = X.Label, lwd = 2)
			for (j in Ind.Curves)
				lines(Curves[[j]][,1],Curves[[j]][,2], lty = 3, lwd = 2)
		}
		mtext(paste("MAMBAC INDICATOR CURVES, page",Page),outer=T)
	}

	if (Gr.Avg)
	{
		if (Research.Data)
		{
			dev.new(width=3.7, height=3.5)
			par(omi=c(.25,.25,.25,.25), cex = .75)
		}

		X.Values <- Curves[[1]][,1]
		X.Range <- c(min(X.Values), max(X.Values))
		Y.Values <- rep(0,length(X.Values))
		for (i in 1:N.Curves)
			Y.Values <- Y.Values + Curves[[i]][,2]
		Y.Values <- Y.Values / N.Curves
		Y.Vals <- Y.Values
		for (j in 1:N.Curves)
			Y.Vals <- c(Y.Vals,Curves[[j]][,2])
		Y.Range <- c(min(Y.Vals), max(Y.Vals))

		X.Label <- paste(N.Cuts,"Cuts")
		Y.Label <- "Mean Difference"

		P.Avg <- 1/((Y.Values[length(Y.Values)]/Y.Values[1]) + 1)
		if (P.Avg < 0) P.Avg <- 0  # correct for possibilities of negative values at extremes
		if (P.Avg > 1) P.Avg <- 1

		if (Research.Data)
		{
			plot(X.Values, Y.Values, type = "l", pch = 16, ylim = Y.Range, xlim = X.Range,
			     ylab = Y.Label, xlab = X.Label, main = "Averaged MAMBAC Curve", lwd = 2)
			for (j in 1:N.Curves)
				lines(Curves[[j]][,1],Curves[[j]][,2], lty = 3, lwd = 2)
		}
	}

	if (!Research.Data) return(Y.Values)
		else return(list(P.Avg,X.Values,Y.Values))
}


###########################################################################################################
#
# Function to produce text summary of analytic specifications
#
###########################################################################################################

MAMBAC.Summarize <- function(Data, N, I, Curve.Info, Curves, All.Pairs, Ind.Comp, N.End, N.Cuts,
                  Replications, Gr.Smooth, Gr.Avg, Comp.Data, N.Pop, N.Samples, Supplied.P,
                  Supplied.Class)
{
	# report relevant analytic information and program specifications
	cat("\n\nSUMMARY OF MAMBAC ANALYTIC SPECIFICATIONS\n")
	cat("\nSample size: ",N,"\n")
	cat("Number of indicator variables: ",I,"\n")
	cat("Replications: ",Replications,"\n")
	cat("Cuts: ",N.Cuts,"evenly-spaced cuts beginning",N.End,"cases from either extreme\n")
	if (All.Pairs) Ind.Label <- "Variables serve in all possible input-output pairs"
	else Ind.Label <- paste("One variable serves as output, input = sum of remaining",(I-1),"variables")
	cat("Indicators: ",Ind.Label,"\n")
	cat("Total number of curves: ",length(Curve.Info[,1]),"\n")
	if (Gr.Smooth) cat("Y values smoothed for graphing and P estimation:  Yes, LOWESS method\n")
		else cat("Y values smoothed for graphing and P estimation:  No\n")
	if ((Supplied.P == 0) & (!Supplied.Class))
      cat("Classification of cases:  Cut total score at estimated base rate\n")
   if (Supplied.P > 0) cat("Classification of cases:  Cut total score at supplied base rate\n")
   if (Supplied.Class) cat("Classification of cases:  Supplied with data\n")
   if (Comp.Data)
   {
      cat("Size of finite populations of comparison data: ",N.Pop,"\n")
      cat("Number of samples of comparison data drawn from each population: ",N.Samples,"\n")
   }
   cat("\n")
}


###########################################################################################################
#
# Function to generate and report estimates of the taxon base rate and other latent parameters
#
###########################################################################################################

MAMBAC.P.Est <- function(Data, N, I, Curve.Info, Curves, All.Pairs, Ind.Comp, N.End, N.Cuts, Replications,
				 Gr.Smooth, Gr.Avg, P.Avg, Supplied.P, Supplied.Class)
{
	cat("\n\nSUMMARY OF MAMBAC PARAMETER ESTIMATES\n")

	# estimate base rate for each curve
	N.Curves <- length(Curve.Info[,1])
	P.Estimates <- matrix(nrow = N.Curves, ncol = 1)
	if (Gr.Smooth)
		for (i in 1:N.Curves)
		{
			Num.Y <- length(Curves[[i]][,1])
			Lowess.Y <- lowess(Curves[[i]][,1],Curves[[i]][,2])$y
			for (j in 1:(Num.Y-1))
				if (Curves[[i]][j,1] == Curves[[i]][(j + 1),1])
					Lowess.Y <- c(Lowess.Y[1:j],Lowess.Y[j],Lowess.Y[j+1:length(Lowess.Y)])
			Lowess.Y <- Lowess.Y[1:Num.Y]
			P.Estimates[i,1] <- 1/((Lowess.Y[Num.Y]/Lowess.Y[1]) + 1)
			if (P.Estimates[i,1] < 0) P.Estimates[i,1] <- 0  # correct for possibilities of
			if (P.Estimates[i,1] > 1) P.Estimates[i,1] <- 1  # negative values at extremes
		}
	else
		for (i in 1:N.Curves)
		{
			P.Estimates[i,1] <- 1/((Curves[[i]][length(Curves[[i]][,2]),2]/Curves[[i]][1, 2]) + 1)
			if (P.Estimates[i,1] < 0) P.Estimates[i,1] <- 0  # correct for possibilities of
			if (P.Estimates[i,1] > 1) P.Estimates[i,1] <- 1  # negative values at extremes
		}
	Row.Labels <- paste("Curve",1:N.Curves)
	Col.Labels <- c("P")
	dimnames(P.Estimates) <- list(Row.Labels, Col.Labels)
	cat("\nEstimated taxon base rates for each curve:\n\n")
	print(round(P.Estimates,3))
	P.Est.M <- mean(P.Estimates[,1])
	P.Est.SD <- sd(P.Estimates[,1])
	cat("\nSummary of base rate estimates across curves:\n   M =",round(P.Est.M,3),
	          "\n  SD =",round(P.Est.SD,3),"\n\n")

	# if more than one curve per indicator, gather P estimates for each indicator
	if (All.Pairs & (I > 2))
	{
		Ind.Est <- matrix(rep(0, I), nrow = I, ncol = 1)
		N.Each.Ind <- N.Curves / I
		for (i in 1:N.Curves)
			Ind.Est[Curve.Info[i,2]] <- Ind.Est[Curve.Info[i,2]] + P.Estimates[i]
		Ind.Est[,1] <- Ind.Est[,1] / N.Each.Ind
		Row.Labels <- paste("Indicator",1:I)
		Col.Labels <- c("P")
		dimnames(Ind.Est) <- list(Row.Labels, Col.Labels)
		cat("Estimated taxon base rates for each indicator:\n\n")
		print(round(Ind.Est,3))
		cat("\n")
		cat("Summary of base rate estimates across indicators:\n   M =",round(mean(Ind.Est[,1]),3),
                 "\n  SD =",round(sd(Ind.Est[,1]),3),"\n\n")
	}

	# print base rate estimate for averaged curve if appropriate
	if (Gr.Avg)
		cat("Base rate estimate for averaged curve =",round(P.Avg, 3),"\n\n")

	# assign cases to classes using base-rate classification method with estimated base rate
   if ((Supplied.P == 0) & (!Supplied.Class))
   {
   	Data.Matrix <- cbind(Data[,1:I],rep(0,N),rep(0,N))
      for (i in 1:I)
         Data.Matrix[, (I + 1)] <- Data.Matrix[, (I + 1)] + Data.Matrix[,i]
      Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,(I + 1)]),]
      P.Est.Series <- c(rep(2, trunc(N * P.Est.M + .5)))
      Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
      Data.Matrix[,(I + 2)] <- c(Q.Est.Series, P.Est.Series)
   }

	# assign cases to classes using base-rate classification method with supplied base rate
   if (Supplied.P > 0)
   {
   	Data.Matrix <- cbind(Data[,1:I],rep(0,N),rep(0,N))
      for (i in 1:I)
         Data.Matrix[, (I + 1)] <- Data.Matrix[, (I + 1)] + Data.Matrix[,i]
      Data.Matrix <- Data.Matrix[sort.list(Data.Matrix[,(I + 1)]),]
      P.Est.Series <- c(rep(2, trunc(N * Supplied.P + .5)))
      Q.Est.Series <- c(rep(1, N - length(P.Est.Series)))
      Data.Matrix[,(I + 2)] <- c(Q.Est.Series, P.Est.Series)
   }

	# assign cases to classes using supplied classification
   if (Supplied.Class)
      Data.Matrix <- cbind(Data[,1:I], rep(0,N), Data[, (I + 1)])

	# Construct full-sample and within-groups data files
	Data.f <- as.matrix(Data.Matrix[,(1:I)])
	Nt <- sum(Data.Matrix[,(I + 2)]) - N
	Nc <- N - Nt
	Data.t <- Data.Matrix[(Data.Matrix[,(I + 2)] == 2),1:I]
	Data.c <- Data.Matrix[(Data.Matrix[,(I + 2)] == 1),1:I]
   for (i in 1:I)
   {
      if (min(Data.t[,i]) == max(Data.t[,i])) Data.t[,i] <- Data.t[,i] + rnorm(Nt, 0, .0001)
      if (min(Data.c[,i]) == max(Data.c[,i])) Data.c[,i] <- Data.c[,i] + rnorm(Nc, 0, .0001)
   }

	# Calculate distributions for each indicator (full sample, taxon, complement)
	Dist.Full <- matrix(nrow = I + 2, ncol = 4)
	Dist.Tax <- matrix(nrow = I + 2, ncol = 4)
	Dist.Comp <- matrix(nrow = I + 2, ncol = 4)
	Rows <- c(paste("Ind",1:I), "M", "SD")
	Cols <- c("M","SD","Skew","Kurtosis")
	dimnames(Dist.Full) <- list(Rows, Cols)
	dimnames(Dist.Tax) <- list(Rows, Cols)
	dimnames(Dist.Comp) <- list(Rows, Cols)
	for (i in 1:I)
	{
		Dist.Full[i,1] <- mean(Data.f[,i])
		Dist.Tax[i,1] <- mean(Data.t[,i])
		Dist.Comp[i,1] <- mean(Data.c[,i])
		Dist.Full[i,2] <- sd(Data.f[,i])
		Dist.Tax[i,2] <- sd(Data.t[,i])
		Dist.Comp[i,2] <- sd(Data.c[,i])
		Dist.Full[i,3] <- Skew(Data.f[,i])
		Dist.Tax[i,3] <- Skew(Data.t[,i])
		Dist.Comp[i,3] <- Skew(Data.c[,i])
		Dist.Full[i,4] <- Kurtosis(Data.f[,i])
		Dist.Tax[i,4] <- Kurtosis(Data.t[,i])
		Dist.Comp[i,4] <- Kurtosis(Data.c[,i])
	}
	for (i in 1:4)
	{
		Dist.Full[I + 1,i] <- mean(Dist.Full[1:I,i])
		Dist.Tax[I + 1,i] <- mean(Dist.Tax[1:I,i])
		Dist.Comp[I + 1,i] <- mean(Dist.Comp[1:I,i])
		Dist.Full[I + 2,i] <- sd(Dist.Full[1:I,i])
		Dist.Tax[I + 2,i] <- sd(Dist.Tax[1:I,i])
		Dist.Comp[I + 2,i] <- sd(Dist.Comp[1:I,i])
	}
	cat("\nIndicator distributions in the full sample ( N =",N,"):\n\n")
	print(round(Dist.Full,3))
	cat("\nIndicator distributions in the taxon ( n =",Nt,"):\n\n")
	print(round(Dist.Tax,3))
	cat("\nIndicator distributions in the complement ( n =",Nc,"):\n\n")
	print(round(Dist.Comp,3))

	# calculate validity for each indicator
	Ind.Val <- matrix(nrow = I + 2, ncol = 2)
	Rows <- c(paste("Ind",1:I), "M", "SD")
	Cols <- c("Raw Units","Cohen's d")
	dimnames(Ind.Val) <- list(Rows, Cols)
	for (i in 1:I)
	{
		Ind.Val[i,1] <- Dist.Tax[i,1] - Dist.Comp[i,1]
		Ind.Val[i,2] <- Ind.Val[i,1] / sqrt(((Nt - 1) * Dist.Tax[i,2]^2 + (Nc - 1) * Dist.Comp[i,2]^2)
					/ (Nt + Nc - 2))
	}
	Ind.Val[I + 1,1] <- mean(Ind.Val[1:I,1])
	Ind.Val[I + 1,2] <- mean(Ind.Val[1:I,2])
	Ind.Val[I + 2,1] <- sd(Ind.Val[1:I,1])
	Ind.Val[I + 2,2] <- sd(Ind.Val[1:I,2])
	cat("\nBetween-group validity:\n\n")
	print(round(Ind.Val,3))

	# Calculate, print, and summarize full-sample and within-group correlations
	Cor.Matrix <- cor(Data.f[,1:I])
	Rows <- paste("Ind",1:I)
	Cols <- paste("Ind",1:I)
	dimnames(Cor.Matrix) <- list(Rows, Cols)
	cat("\nIndicator correlations:\n\nFull Sample ( N =",N,"):\n\n")
	print(round(Cor.Matrix,3))
	Cor.Taxon <- cor(Data.t)
	Cor.Comp <- cor(Data.c)
	dimnames(Cor.Taxon) <- list(Rows, Cols)
	dimnames(Cor.Comp) <- list(Rows, Cols)
	cat("\nTaxon ( n =",Nt,"):\n\n")
	print(round(Cor.Taxon,3))
	cat("\nComplement ( n =",Nc,"):\n\n")
	print(round(Cor.Comp,3))
	Cor.Summary <- matrix(nrow = 3, ncol = 2)
	dimnames(Cor.Summary) <- list(c("Full Sample","Taxon", "Complement"), c("M", "SD"))
	Cor.Summary[1,1] <- mean(Cor.Matrix[lower.tri(Cor.Matrix)])
	Cor.Summary[2,1] <- mean(Cor.Taxon[lower.tri(Cor.Taxon)])
	Cor.Summary[3,1] <- mean(Cor.Comp[lower.tri(Cor.Comp)])
	Cor.Summary[1,2] <- sd(Cor.Matrix[lower.tri(Cor.Matrix)])
	Cor.Summary[2,2] <- sd(Cor.Taxon[lower.tri(Cor.Taxon)])
	Cor.Summary[3,2] <- sd(Cor.Comp[lower.tri(Cor.Comp)])
	cat("\nSummary of indicator correlations:\n\n")
	print(round(Cor.Summary,3))
	cat("\n")

	# return M and SD of base rate estimates
	return(list(P.Est.M,P.Est.SD))
}


################################################################################################################
GenData <- function(Supplied.Data, N, N.Factors = 0, Max.Trials = 5, Initial.Multiplier = 1, seed = 0)
{
# Initialize variables and (if applicable) set random number seed (step 1) -------------------------------------

   k <- dim(Supplied.Data)[2]                       # Number of variables
   Data <- matrix(0, nrow = N, ncol = k)            # Matrix to store the simulated data
   Distributions <- matrix(0, nrow = N, ncol = k)   # Matrix to store each variable's score distribution
   Iteration <- 0                                   # Iteration counter
   Best.RMSR <- 1                                   # Lowest RMSR correlation
   Trials.Without.Improvement <- 0                  # Trial counter
   if (seed != 0) set.seed(seed)                    # If user specified a random seed, set it

# Generate distribution for each variable (step 2) -------------------------------------------------------------

   for (i in 1:k)
   {
      if (min(Supplied.Data[,i]) == max(Supplied.Data[,i]))
         Supplied.Data[,i] <- Supplied.Data[,i] + rnorm(dim(Supplied.Data)[1], 0, .0001)
      V <- F
      while (!V)
      {
         Distributions[,i] <- sort(sample(Supplied.Data[,i], N, replace = T))
         V <- min(Distributions[,i]) != max(Distributions[,i])
      }
   }

# Calculate and store a copy of the target correlation matrix (step 3) -----------------------------------------

   Target.Corr <- cor(Supplied.Data)
   Intermediate.Corr <- Target.Corr

# If number of latent factors was not specified, determine it through parallel analysis (step 4) ---------------

   if (N.Factors == 0)
   {
      Eigenvalues.Observed <- eigen(Intermediate.Corr)$values
      Eigenvalues.Random <- matrix(0, nrow = 100, ncol = k)
      Random.Data <- matrix(0, nrow = N, ncol = k)
      for (i in 1:100)
      {
         for (j in 1:k)
         {
            V <- F
            while (!V)
            {
               Random.Data[,j] <- sample(Distributions[,j], size = N, replace = TRUE)
               V <- min(Random.Data[,j]) != max(Random.Data[,j])
            }
         }
         Eigenvalues.Random[i,] <- eigen(cor(Random.Data))$values
      }
      Eigenvalues.Random <- apply(Eigenvalues.Random, 2, mean) # calculate mean eigenvalue for each factor
      N.Factors <- max(1, sum(Eigenvalues.Observed > Eigenvalues.Random))
   }

# Generate random normal data for shared and unique components, initialize factor loadings (steps 5, 6) --------

   Shared.Comp <- matrix(rnorm(N * N.Factors, 0, 1), nrow = N, ncol = N.Factors)
   Unique.Comp <- matrix(rnorm(N * k, 0, 1), nrow = N, ncol = k)
   Shared.Load <- matrix(0, nrow = k, ncol = N.Factors)
   Unique.Load <- matrix(0, nrow = k, ncol = 1)

# Begin loop that ends when specified number of iterations pass without improvement in RMSR correlation --------

   while (Trials.Without.Improvement < Max.Trials)
   {
      Iteration <- Iteration + 1

# Calculate factor loadings and apply to reproduce desired correlations (steps 7, 8) ---------------------------

      Fact.Anal <- Factor.Analysis(Intermediate.Corr, Corr.Matrix = TRUE, N.Factors = N.Factors)
      if (N.Factors == 1) Shared.Load[,1] <- Fact.Anal$loadings
      else
         for (i in 1:N.Factors)
            Shared.Load[,i] <- Fact.Anal$loadings[,i]
      Shared.Load[Shared.Load > 1] <- 1
      Shared.Load[Shared.Load < -1] <- -1
      if (Shared.Load[1,1] < 0) Shared.Load <- Shared.Load * -1
      for (i in 1:k)
         if (sum(Shared.Load[i,] * Shared.Load[i,]) < 1) Unique.Load[i,1] <-
            (1 - sum(Shared.Load[i,] * Shared.Load[i,]))
         else Unique.Load[i,1] <- 0
      Unique.Load <- sqrt(Unique.Load)
      for (i in 1:k)
         Data[,i] <- (Shared.Comp %*% t(Shared.Load))[,i] + Unique.Comp[,i] * Unique.Load[i,1]

# Replace normal with nonnormal distributions (step 9) ---------------------------------------------------------

      for (i in 1:k)
      {
         Data <- Data[sort.list(Data[,i]),]
         Data[,i] <- Distributions[,i]
      }

# Calculate RMSR correlation, compare to lowest value, take appropriate action (steps 10, 11, 12) --------------

      Reproduced.Corr <- cor(Data)
      Residual.Corr <- Target.Corr - Reproduced.Corr
      RMSR <- sqrt(sum(Residual.Corr[lower.tri(Residual.Corr)] * Residual.Corr[lower.tri(Residual.Corr)]) /
              (.5 * (k * k - k)))
      if (RMSR < Best.RMSR)
      {
         Best.RMSR <- RMSR
         Best.Corr <- Intermediate.Corr
         Best.Res <- Residual.Corr
         Intermediate.Corr <- Intermediate.Corr + Initial.Multiplier * Residual.Corr
         Trials.Without.Improvement <- 0
      }
      else
      {
         Trials.Without.Improvement <- Trials.Without.Improvement + 1
         Current.Multiplier <- Initial.Multiplier * .5 ^ Trials.Without.Improvement
         Intermediate.Corr <- Best.Corr + Current.Multiplier * Best.Res
      }
   }

# Construct the data set with the lowest RMSR correlation (step 13) --------------------------------------------

   Fact.Anal <- Factor.Analysis(Best.Corr, Corr.Matrix = TRUE, N.Factors = N.Factors)
   if (N.Factors == 1) Shared.Load[,1] <- Fact.Anal$loadings
   else
      for (i in 1:N.Factors)
         Shared.Load[,i] <- Fact.Anal$loadings[,i]
   Shared.Load[Shared.Load > 1] <- 1
   Shared.Load[Shared.Load < -1] <- -1
   if (Shared.Load[1,1] < 0) Shared.Load <- Shared.Load * -1
   for (i in 1:k)
      if (sum(Shared.Load[i,] * Shared.Load[i,]) < 1) Unique.Load[i,1] <-
         (1 - sum(Shared.Load[i,] * Shared.Load[i,]))
      else Unique.Load[i,1] <- 0
   Unique.Load <- sqrt(Unique.Load)
   for (i in 1:k)
      Data[,i] <- (Shared.Comp %*% t(Shared.Load))[,i] + Unique.Comp[,i] * Unique.Load[i,1]
   Data <- apply(Data, 2, scale) # standardizes each variable in the matrix
   for (i in 1:k)
   {
      Data <- Data[sort.list(Data[,i]),]
      Data[,i] <- Distributions[,i]
   }

# Report the results and return the simulated data set (step 14) -----------------------------------------------

   return(Data)
}


###########################################################################################################
# Factor analysis programs for R
# Written by Niels Waller, adapted for use with John Ruscio's taxometric programs
###########################################################################################################

Factor.Analysis <- function(Data, Corr.Matrix = FALSE, Max.Iter = 50, N.Factors = 0)
{
   Data <- as.matrix(Data)
   k <- dim(Data)[2]
   if (N.Factors == 0) N.Factors <- k
   if (!Corr.Matrix) Cor.Matrix <- cor(Data)
   else Cor.Matrix <- Data
   Criterion <- .001
   Old.H2 <- rep(99, k)
   H2 <- rep(0, k)
   Change <- 1
   Iter <- 0
   Factor.Loadings <- matrix(nrow = k, ncol = N.Factors)
   while ((Change >= Criterion) & (Iter < Max.Iter))
   {
      Iter <- Iter + 1
      Eig <- eigen(Cor.Matrix)
      L <- sqrt(Eig$values[1:N.Factors])
      for (i in 1:N.Factors)
         Factor.Loadings[,i] <- Eig$vectors[,i] * L[i]
      for (i in 1:k)
         H2[i] <- sum(Factor.Loadings[i,] * Factor.Loadings[i,])
      Change <- max(abs(Old.H2 - H2))
      Old.H2 <- H2
      diag(Cor.Matrix) <- H2
   }
   if (N.Factors == k) N.Factors <- sum(Eig$values > 1)
   return(list(loadings = Factor.Loadings[,1:N.Factors], factors = N.Factors))
}

Factor.Analysis.LMode <- function(Data, Max.Iter = 50)
{
	Data <- as.matrix(Data)
	Cor.Matrix <- cor(Data)
	Criterion <- .001
	Old.H2 <- rep(99,dim(Cor.Matrix)[2])
	Change <- 1
	Iter <- 0
	while ((Change >= Criterion) & (Iter < Max.Iter))
	{
		Iter <- Iter + 1
		Eig <- eigen(Cor.Matrix)
		L <- sqrt(Eig$values[1])
		Factor.Loadings <- Eig$vectors[,1] * L
		H2 <- Factor.Loadings * Factor.Loadings
		Change <- max(abs(Old.H2 - H2))
		Old.h2 <- H2
		diag(Cor.Matrix) <- H2
	}
	Factor.Scores <- apply(Data %*% Factor.Loadings %*% solve(t(Factor.Loadings) %*% Factor.Loadings), 2, scale)
	return(list(loadings = Factor.Loadings, scores = Factor.Scores))
}


###########################################################################################################
# Program used by L-Mode to calculate best RMSR fit for two densities, allowing horizontal shifts (a)
###########################################################################################################
Fit.Densities <- function(a, data)
{
   x1 <- x2 <- data[[1]]$x
   y1 <- data[[1]]$y
   y2 <- data[[2]]
   Nx <- length(x1)
   w <- (max(y1)-min(y1)) / (max(x1)-min(x1))
   x1 <- x1 * w
   x2 <- x2 * w
   dist <- rep(0, Nx)
   for (i in 1:Nx)
      dist[i] <- min(sqrt((a + x1[i] - x2)^2 + (y1[i] - y2)^2))
   fit <- sqrt(sum(dist^2) / Nx)
	return(fit)
}


###########################################################################################################
#
# CreateData routine for simulating data using on a handful of basic parameters
#
# Written by John Ruscio
# Last modified April 5, 2008
#
###########################################################################################################

CreateData <- function (Str, N = 600, k = 4, P = .50, d = 2.00, r = .00, Tax.r = .00, Comp.r = .00,
                        Skew = 0, Cuts = 0)
{
	if (Skew < 0) P <- 1 - P
	if (Str == "Dim")
	{
		newdata <- matrix(rep(0, N * k), ncol = k)
		if (r > 0)
			R.expected <- r
		else
		{
			r.wg <- mean(c(Tax.r, Comp.r))
			R.expected <- (P * (1 - P) * d ^ 2 + r.wg) / (1 + (P * (1 - P) * d ^ 2))
		}
	    	Loading.Common <- sqrt(R.expected)
	    	Loading.Error <- sqrt(1 - R.expected)
		Random.Common <- rnorm(N, mean = 0, sd = 1)
		for (i in 1:k)
			newdata[,i] <- Loading.Common * Random.Common + Loading.Error * rnorm(N, mean = 0, sd = 1)
	}
	else    # create categorical data
	{
		newdata <- matrix(rep(0, N * (k + 1)), ncol = (k + 1))
		N.Tax <- round(N * P)
		N.Comp <- N - N.Tax
		newdata[,(k + 1)] <- c(rep(2, N.Tax),rep(1, N.Comp))

		# create taxon (with desired within-group correlation)
		Loading.Common <- sqrt(Tax.r)
   		Loading.Error <- sqrt(1 - Tax.r)
		Random.Common <- rnorm(N.Tax, mean = 0, sd = 1)
		for (i in 1:k)
			newdata[1:N.Tax,i] <- Loading.Common * Random.Common + Loading.Error * rnorm(N.Tax, mean = 0, sd = 1)

		# create complement (with desired within-group correlation)
		Loading.Common <- sqrt(Comp.r)
   		Loading.Error <- sqrt(1 - Comp.r)
		Random.Common <- rnorm(N.Comp, mean = 0, sd = 1)
		for (i in 1:k)
			newdata[(N.Tax + 1):N,i] <- Loading.Common * Random.Common + Loading.Error * rnorm(N.Comp, mean = 0, sd = 1)

		# add in categorical separation
		if (Skew >= 0)
			for (i in 1:k)
				newdata[,i] <- newdata[,i] + (d * (newdata[, (k + 1)] - 1.5))
		else
			for (i in 1:k)
				newdata[,i] <- newdata[,i] - (d * (newdata[, (k + 1)] - 1.5))
	}
	if (Skew > 0)
		for (i in 1:k)
			newdata[,i] <- exp(newdata[,i]/Skew)
	if (Skew < 0)
	{
		for (i in 1:k)
			newdata[,i] <- -exp(newdata[,i]/Skew)
		if (Str == "Tax") newdata[,(k + 1)] <- 3 - newdata[,(k + 1)]
	}
	newdata[,1:k] <- apply(newdata[,1:k], 2, scale)
	if (Cuts > 0)
	{
		for (i in 1:k)
		{
			Sorted.X <- newdata[sort.list(newdata[,i]),i]
			Trim <- Sorted.X[round(.01 * N):round(.99 * N)]
			Dens <- density(Trim, from = min(Trim), to = max(Trim))
			Dens.N <- length(Dens$y)
			Cut.Pts <- round(seq(from = 0, to = Dens.N, length = (Cuts + 1)))
			Cut.Vals <- vector("numeric",Cuts)
			for (j in 1:Cuts)
				Cut.Vals[j] <- sum(Dens$y[(Cut.Pts[j] + 1):Cut.Pts[j + 1]])
			Vals <- round(Cut.Vals * (N / sum(Cut.Vals)))
			if (sum(Vals) != N) Vals[round(Cuts/2)] <- Vals[round(Cuts/2)] + (N - sum(Vals))
			for (j in 1:Cuts)
				Cut.Vals[j] <- Sorted.X[sum(Vals[1:j])]
			Cut.Vals <- c(-100,Cut.Vals,100)
			newdata[,i] <- cut(newdata[,i], breaks = Cut.Vals)[1:N]
		}
	}
	as.data.frame(newdata)
	return(newdata)
}


###########################################################################################################
#
# Indicator.Dist
#
# This replaces the older "Validity.Est" program
#
# Written by John Ruscio
# Last modified November 10, 2008
#
###########################################################################################################
#
# Program to calculate indicator distributions and correlations in the full sample as well as within groups
# as well as between-group separation.  Uses the final column of data as a classification variable (1 =
# complement, 2 = taxon).  Prints raw and standardized (using pooled variances, weighted by df) validity
# values for each indicator, followed by distributional summary statistics for each indicator and the
# indicator correlation matrix (in the full sample and within groups).
#
###########################################################################################################

Indicator.Dist <- function(Data)
{
# Read N (# cases), I (# items), Nt (number taxon members), Nc (number complement members)
	Data <- as.matrix(Data)
	N <- dim(Data)[1]
	I <- dim(Data)[2] - 1
	Nt <- sum(Data[,(I + 1)]) - N
	Nc <- N - Nt
	Data.f <- Data[,1:I]
	Data.t <- Data[(Data[,(I + 1)] == 2),1:I]
	Data.c <- Data[(Data[,(I + 1)] == 1),1:I]
   for (i in 1:I)
   {
      if (min(Data.t[,i]) == max(Data.t[,i])) Data.t[,i] <- Data.t[,i] + rnorm(Nt, 0, .0001)
      if (min(Data.c[,i]) == max(Data.c[,i])) Data.c[,i] <- Data.c[,i] + rnorm(Nc, 0, .0001)
   }

# Calculate distributions for each indicator (full sample, taxon, complement)
	Dist.Full <- matrix(nrow = I + 2, ncol = 4)
	Dist.Tax <- matrix(nrow = I + 2, ncol = 4)
	Dist.Comp <- matrix(nrow = I + 2, ncol = 4)
	Rows <- c(paste("Ind",1:I), "M", "SD")
	Cols <- c("M","SD","Skew","Kurtosis")
	dimnames(Dist.Full) <- list(Rows, Cols)
	dimnames(Dist.Tax) <- list(Rows, Cols)
	dimnames(Dist.Comp) <- list(Rows, Cols)
	for (i in 1:I)
	{
		Dist.Full[i,1] <- mean(Data.f[,i])
		Dist.Tax[i,1] <- mean(Data.t[,i])
		Dist.Comp[i,1] <- mean(Data.c[,i])
		Dist.Full[i,2] <- sd(Data.f[,i])
		Dist.Tax[i,2] <- sd(Data.t[,i])
		Dist.Comp[i,2] <- sd(Data.c[,i])
		Dist.Full[i,3] <- Skew(Data.f[,i])
		Dist.Tax[i,3] <- Skew(Data.t[,i])
		Dist.Comp[i,3] <- Skew(Data.c[,i])
		Dist.Full[i,4] <- Kurtosis(Data.f[,i])
		Dist.Tax[i,4] <- Kurtosis(Data.t[,i])
		Dist.Comp[i,4] <- Kurtosis(Data.c[,i])
	}
	for (i in 1:4)
	{
		Dist.Full[I + 1,i] <- mean(Dist.Full[1:I,i])
		Dist.Tax[I + 1,i] <- mean(Dist.Tax[1:I,i])
		Dist.Comp[I + 1,i] <- mean(Dist.Comp[1:I,i])
		Dist.Full[I + 2,i] <- sd(Dist.Full[1:I,i])
		Dist.Tax[I + 2,i] <- sd(Dist.Tax[1:I,i])
		Dist.Comp[I + 2,i] <- sd(Dist.Comp[1:I,i])
	}
	cat("\nIndicator distributions in the full sample ( N =",N,"):\n\n")
	print(round(Dist.Full,3))
	cat("\nIndicator distributions in the taxon ( n =",Nt,"):\n\n")
	print(round(Dist.Tax,3))
	cat("\nIndicator distributions in the complement ( n =",Nc,"):\n\n")
	print(round(Dist.Comp,3))

# calculate validity for each indicator
	Ind.Val <- matrix(nrow = I + 2, ncol = 2)
	Rows <- c(paste("Ind",1:I), "M", "SD")
	Cols <- c("Raw Units","Cohen's d")
	dimnames(Ind.Val) <- list(Rows, Cols)
	for (i in 1:I)
	{
		Ind.Val[i,1] <- Dist.Tax[i,1] - Dist.Comp[i,1]
		Ind.Val[i,2] <- Ind.Val[i,1] / sqrt(((Nt - 1) * Dist.Tax[i,2]^2 + (Nc - 1) * Dist.Comp[i,2]^2)
					/ (Nt + Nc - 2))
	}
	Ind.Val[I + 1,1] <- mean(Ind.Val[1:I,1])
	Ind.Val[I + 1,2] <- mean(Ind.Val[1:I,2])
	Ind.Val[I + 2,1] <- sd(Ind.Val[1:I,1])
	Ind.Val[I + 2,2] <- sd(Ind.Val[1:I,2])
	cat("\nBetween-group validity:\n\n")
	print(round(Ind.Val,3))

# Calculate, print, and summarize full-sample and within-group correlations
	Cor.Matrix <- cor(Data.f[,1:I])
	Rows <- paste("Ind",1:I)
	Cols <- paste("Ind",1:I)
	dimnames(Cor.Matrix) <- list(Rows, Cols)
	cat("\nIndicator correlations:\n\nFull Sample ( N =",N,"):\n\n")
	print(round(Cor.Matrix,3))
	Cor.Taxon <- cor(Data.t)
	Cor.Comp <- cor(Data.c)
	dimnames(Cor.Taxon) <- list(Rows, Cols)
	dimnames(Cor.Comp) <- list(Rows, Cols)
	cat("\nTaxon ( n =",Nt,"):\n\n")
	print(round(Cor.Taxon,3))
	cat("\nComplement ( n =",Nc,"):\n\n")
	print(round(Cor.Comp,3))
	Cor.Summary <- matrix(nrow = 3, ncol = 2)
	dimnames(Cor.Summary) <- list(c("Full Sample","Taxon", "Complement"), c("M", "SD"))
	Cor.Summary[1,1] <- mean(Cor.Matrix[lower.tri(Cor.Matrix)])
	Cor.Summary[2,1] <- mean(Cor.Taxon[lower.tri(Cor.Taxon)])
	Cor.Summary[3,1] <- mean(Cor.Comp[lower.tri(Cor.Comp)])
	Cor.Summary[1,2] <- sd(Cor.Matrix[lower.tri(Cor.Matrix)])
	Cor.Summary[2,2] <- sd(Cor.Taxon[lower.tri(Cor.Taxon)])
	Cor.Summary[3,2] <- sd(Cor.Comp[lower.tri(Cor.Comp)])
	cat("\nSummary of indicator correlations:\n\n")
	print(round(Cor.Summary,3))
	cat("\n\n")
}


###########################################################################################################
# Skewness and kurtosis programs for R
# Written by John Ruscio
###########################################################################################################

Skew <- function(x)
{
	N <- length(x)
	m1 <- mean(x)
	m2 <- sum((x - m1) ^ 2) / N
	m3 <- sum((x - m1) ^ 3) / N
	g1 <- m3 / (m2 ^ (3/2))
	G1 <- (sqrt(N * (N - 1)) / (N - 2)) * g1
	return(G1)
}

Kurtosis <- function(x)
{
	N <- length(x)
	m1 <- mean(x)
	m2 <- sum((x - m1) ^ 2) / N
	m3 <- sum((x - m1) ^ 3) / N
	m4 <- sum((x - m1) ^ 4) / N
	g2 <- (m4 / (m2 ^ 2)) - 3
	G2 <- ((N - 1) / ((N - 2) * (N - 3))) * ((N + 1) * g2 + 6)
	return(G2)
}


###########################################################################################################
#
# P.Classify program to implement the base-rate classification technique
#
# Written by John Ruscio
# Last modified November 13, 2011
#
###########################################################################################################

P.Classify <- function(x, P.est, cols = 0)
{
	N <- dim(x)[1]
	if (cols[1] == 0) cols <- 1:dim(x)[2]
    s <- apply(x[,cols], 1, sum)
	y <- cbind(x, rep(0,N), 1:N, s)
	N.Tax <- round(P.est * N)
	N.Comp <- N - N.Tax
	Classes <- c(rep(1,N.Comp), rep(2,N.Tax))
    last.col <- dim(y)[2]
	y <- y[sort.list(y[,last.col]),]
	y[,last.col - 2] <- Classes
    if (max(y[(y[,last.col - 2] == 1), last.col]) == min(y[(y[,last.col - 2] == 2), last.col]))
    {
        tied.score <- max(y[(y[,last.col - 2] == 1), last.col])
        tied.cases <- (y[,last.col] == tied.score)
        n.tied1 <- sum((y[,last.col - 2] == 1) & (tied.cases))
        n.tied2 <- sum((y[,last.col - 2] == 2) & (tied.cases))
        tied.min <- tied.score == min(y[,last.col])
        tied.max <- tied.score == max(y[,last.col])
        if (n.tied1 > n.tied2)
            if (!tied.max) y[tied.cases, last.col - 2] <- 1
                else y[tied.cases, last.col - 2] <- 2
        else
            if (!tied.min) y[tied.cases, last.col - 2] <- 2
                else y[tied.cases, last.col - 2] <- 1
    }
    y <- y[sort.list(y[,last.col - 1]),]
	return(cbind(x, y[,last.col - 2]))
}


###########################################################################################################
#
# Remove.Missing program to remove missing data (listwise deletion)
#
# Written by John Ruscio
# Last modified May 12, 2010
#
###########################################################################################################

Remove.Missing <- function(x)
{
   N <- dim(x)[1]
   k <- dim(x)[2]
   complete <- rep(T, N)
   for (i in 1:N)
      for (j in 1:k)
         if (is.na(x[i,j])) complete[i] <- F
   x.k <- x[complete,]
   n <- dim(x.k)[1]
   if (n < N)
      cat("\n*** ",n," out of ",N," cases (",round(100*n/N,2),"%) retained for analysis; ",
         N-n," cases (",round(100*(N-n)/N,2),"%) contained missing data.\n", sep = "")
   return(x.k)
}