thistleknot/BackStepBestSubsetZCAFilter.R

## BackStepBestSubsetZCAFilter.R

sig_table = matrix(0, ncol=ncol(newDF_t))
colnames(sig_table) <- colnames(newDF_t)
signs_table = matrix(0, ncol=ncol(newDF_t))
colnames(signs_table) <- colnames(newDF_t)

p_threshold = .05

New_Names = colnames(newDF_t)[2:length(colnames(newDF_t))]
iteration=0

dat <- 1:10
n=length(dat)

exclude <- c()

max_pvalue = 1

subset = newDF_t[,c(colnames(newDF_t) %notin% c(exclude))]

subset_w <- cbind(subset[,var_of_int,drop=FALSE],as.data.frame(whiten(as.matrix(subset[,c(colnames(newDF_t) %notin% c(var_of_int))]), method=c("ZCA"))))

colnames(subset_w) <- colnames(subset)
rownames(subset_w) <- rownames(subset)

corrplot(cor(subset_w))

set_ = subset_w[,c(colnames(newDF_t) %notin% c(var_of_int))]

while(max_pvalue>=p_threshold)
{
  p_values  <- (2 * (1 - pnorm(abs(cor(subset_w)[,var_of_int,drop=FALSE]), mean = 0, sd = 1/sqrt(nrow(subset)))))
  #p_values  <- (2 * (1 - pnorm(abs(PCOR(subset)[,var_of_int,drop=FALSE]), mean = 0, sd = 1/sqrt(nrow(subset)))))
  #p_values <- pcor(subset, method = c("spearman"))$p.value[,var_of_int,drop=FALSE]

  max_pname = rownames(p_values)[which.max(p_values)]
  max_pvalue = p_values[max_pname,]

  if (max_pvalue >= p_threshold)
  {
    print(max_pvalue)
    print(max_pname)
    temp <- dplyr::select(subset_w,-c(max_pname))
    temp_ <- cbind(subset_w[,var_of_int,drop=FALSE],as.data.frame(whiten(as.matrix(temp[,c(colnames(temp) %notin% c(var_of_int))]), method=c("ZCA"))))

  colnames(temp_) <- colnames(temp)
  rownames(temp_) <- rownames(temp)
  subset_w <- temp_


  }
}

winners = rownames(p_values)[rownames(p_values) %notin% c(var_of_int)]
sig_table = sig_table + as.integer(colnames(newDF_t) %in% winners)

corrplot(pcor(subset[,c(var_of_int,winners)], method = c("pearson"))$estimate
corrplot(cor(subset_w[,c(var_of_int,winners)]))
corrplot(cor(subset[,c(var_of_int,winners)]))

	sig_table = matrix(0, ncol=ncol(newDF_t))
	colnames(sig_table) <- colnames(newDF_t)
	signs_table = matrix(0, ncol=ncol(newDF_t))
	colnames(signs_table) <- colnames(newDF_t)

	p_threshold = .05

	New_Names = colnames(newDF_t)[2:length(colnames(newDF_t))]
	iteration=0

	dat <- 1:10
	n=length(dat)

	exclude <- c()

	max_pvalue = 1

	subset = newDF_t[,c(colnames(newDF_t) %notin% c(exclude))]

	subset_w <- cbind(subset[,var_of_int,drop=FALSE],as.data.frame(whiten(as.matrix(subset[,c(colnames(newDF_t) %notin% c(var_of_int))]), method=c("ZCA"))))

	colnames(subset_w) <- colnames(subset)
	rownames(subset_w) <- rownames(subset)

	corrplot(cor(subset_w))

	set_ = subset_w[,c(colnames(newDF_t) %notin% c(var_of_int))]

	while(max_pvalue>=p_threshold)
	{
	p_values <- (2 * (1 - pnorm(abs(cor(subset_w)[,var_of_int,drop=FALSE]), mean = 0, sd = 1/sqrt(nrow(subset)))))
	#p_values <- (2 * (1 - pnorm(abs(PCOR(subset)[,var_of_int,drop=FALSE]), mean = 0, sd = 1/sqrt(nrow(subset)))))
	#p_values <- pcor(subset, method = c("spearman"))$p.value[,var_of_int,drop=FALSE]

	max_pname = rownames(p_values)[which.max(p_values)]
	max_pvalue = p_values[max_pname,]

	if (max_pvalue >= p_threshold)
	{
	print(max_pvalue)
	print(max_pname)
	temp <- dplyr::select(subset_w,-c(max_pname))
	temp_ <- cbind(subset_w[,var_of_int,drop=FALSE],as.data.frame(whiten(as.matrix(temp[,c(colnames(temp) %notin% c(var_of_int))]), method=c("ZCA"))))

	colnames(temp_) <- colnames(temp)
	rownames(temp_) <- rownames(temp)
	subset_w <- temp_


	}
	}

	winners = rownames(p_values)[rownames(p_values) %notin% c(var_of_int)]
	sig_table = sig_table + as.integer(colnames(newDF_t) %in% winners)

	corrplot(pcor(subset[,c(var_of_int,winners)], method = c("pearson"))$estimate
	corrplot(cor(subset_w[,c(var_of_int,winners)]))
	corrplot(cor(subset[,c(var_of_int,winners)]))