Shixiang Wang (王诗翔) ShixiangWang

## platformMap.txt
"title"	"gpl"	"bioc_package"	"manufacturer"	"organism"	"data_row_count"
"Illumina Sentrix Array Matrix (SAM) - GoldenGate Methylation Cancer Panel I"	"GPL15380"	"GGHumanMethCancerPanelv1"	"Illumina"	"Homo sapiens"	1536
"Illumina HumanMethylation27 BeadChip (HumanMethylation27_270596_v.1.2)"	"GPL8490"	"IlluminaHumanMethylation27k"	"Illumina, Inc."	"Homo sapiens"	27578
"Illumina HumanMethylation450 BeadChip (HumanMethylation450_15017482)"	"GPL13534"	"IlluminaHumanMethylation450k"	"Illumina, Inc."	"Homo sapiens"	485577
"GE Healthcare/Amersham Biosciences CodeLink™   ADME Rat 16-Assay Bioarray"	"GPL2898"	"adme16cod"	"GE Healthcare"	"Rattus norvegicus"	1280
"[AG] Affymetrix Arabidopsis Genome Array"	"GPL71"	"ag"	"Affymetrix"	"Arabidopsis thaliana"	8297
"[ATH1-121501] Affymetrix Arabidopsis ATH1 Genome Array"	"GPL198"	"ath1121501"	"Affymetrix"	"Arabidopsis thaliana"	22810
"[Bovine] Affymetrix Bovine Genome Array"	"GPL2112"	"bovine"	"Affymetrix"	"Bos taurus"	24128
"[Canine] Affymetrix Canine Genome 1.0 Array"	"GPL39

## summary_fun.R
#' @function Summary functions for error bar plot.
#' @references http://www.jianshu.com/p/003138ac593b

## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,

## getID.R
getID = function(exp_value, method){
  # compute threshold according to method parameter
  if(method=="quartile"){
    symbol_stat <- summary(exp_value)
    ths1 <- as.numeric(symbol_stat[2])
    ths2 <- as.numeric(symbol_stat[5])
    down_id <- names(exp_value[exp_value<=ths1])
    up_id <- names(exp_value[exp_value>=ths2])
  }else if(method=="mean"){
    ths <- mean(exp_value)

## plotDifference.R
pltDiff = function(data=NULL, sample=NULL, target=NULL,
                   group1, group2, group1.name="group1",
                   group2.name="group2", title="data", x_lab="", y_lab="",
                   notch=TRUE){
  require(ggpubr, quietly = TRUE)

  if(!is.data.frame(data)) stop("Wrong input! Please check your dataset. The input data type must be data.frame.")
  a = data.frame(value=data[, target], group=ifelse(!(data[,sample]%in%c(group1,group2)),
                                                 NA, ifelse(data[,sample]%in%group1, group1.name, group2.name)))
  a = a[a$group%in%c(group1.name, group2.name), ]

## t_test.R

T_test = function(data, group1, group2, adj.method="fdr"){
  # the first column of data is identifier
  # the others columns cotain gene expression, samples' name should be colnames
  # group1, group2 are used to compare two groups of data
  # adj.method defines which method used to adjust p values

  colnames(data)[1] = "geneSymbol"
  # only get samples we wanna to compare, be sure
  # all names in group1 and group2 can match colnames of data

## survival-plot.R
#' @function survival analysis according to gene expression for primary tumor
#' @param geneSymbol identify gene symbol
#' @param exp TCGA format gene expression dataset
#' @param  cli Clinical information dataset
#' @param method method use to dive samples into groups. Options are "quantile", "median", "mean". the "quartile" use first and third quartile as threshold
#' @param trans transform the clinical IDs from '-' separate to '.' separate
#' @return include the plot and p value
#' @author Shixiang Wang

# input test data
	"title" "gpl" "bioc_package" "manufacturer" "organism" "data_row_count"
	"Illumina Sentrix Array Matrix (SAM) - GoldenGate Methylation Cancer Panel I" "GPL15380" "GGHumanMethCancerPanelv1" "Illumina" "Homo sapiens" 1536
	"Illumina HumanMethylation27 BeadChip (HumanMethylation27_270596_v.1.2)" "GPL8490" "IlluminaHumanMethylation27k" "Illumina, Inc." "Homo sapiens" 27578
	"Illumina HumanMethylation450 BeadChip (HumanMethylation450_15017482)" "GPL13534" "IlluminaHumanMethylation450k" "Illumina, Inc." "Homo sapiens" 485577
	"GE Healthcare/Amersham Biosciences CodeLink™ ADME Rat 16-Assay Bioarray" "GPL2898" "adme16cod" "GE Healthcare" "Rattus norvegicus" 1280
	"[AG] Affymetrix Arabidopsis Genome Array" "GPL71" "ag" "Affymetrix" "Arabidopsis thaliana" 8297
	"[ATH1-121501] Affymetrix Arabidopsis ATH1 Genome Array" "GPL198" "ath1121501" "Affymetrix" "Arabidopsis thaliana" 22810
	"[Bovine] Affymetrix Bovine Genome Array" "GPL2112" "bovine" "Affymetrix" "Bos taurus" 24128
	"[Canine] Affymetrix Canine Genome 1.0 Array" "GPL39
	#' @function Summary functions for error bar plot.
	#' @references http://www.jianshu.com/p/003138ac593b

	## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
	## data: a data frame.
	## measurevar: the name of a column that contains the variable to be summariezed
	## groupvars: a vector containing names of columns that contain grouping variables
	## na.rm: a boolean that indicates whether to ignore NA's
	## conf.interval: the percent range of the confidence interval (default is 95%)
	summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
	getID = function(exp_value, method){
	# compute threshold according to method parameter
	if(method=="quartile"){
	symbol_stat <- summary(exp_value)
	ths1 <- as.numeric(symbol_stat[2])
	ths2 <- as.numeric(symbol_stat[5])
	down_id <- names(exp_value[exp_value<=ths1])
	up_id <- names(exp_value[exp_value>=ths2])
	}else if(method=="mean"){
	ths <- mean(exp_value)
	pltDiff = function(data=NULL, sample=NULL, target=NULL,
	group1, group2, group1.name="group1",
	group2.name="group2", title="data", x_lab="", y_lab="",
	notch=TRUE){
	require(ggpubr, quietly = TRUE)

	if(!is.data.frame(data)) stop("Wrong input! Please check your dataset. The input data type must be data.frame.")
	a = data.frame(value=data[, target], group=ifelse(!(data[,sample]%in%c(group1,group2)),
	NA, ifelse(data[,sample]%in%group1, group1.name, group2.name)))
	a = a[a$group%in%c(group1.name, group2.name), ]

	T_test = function(data, group1, group2, adj.method="fdr"){
	# the first column of data is identifier
	# the others columns cotain gene expression, samples' name should be colnames
	# group1, group2 are used to compare two groups of data
	# adj.method defines which method used to adjust p values

	colnames(data)[1] = "geneSymbol"
	# only get samples we wanna to compare, be sure
	# all names in group1 and group2 can match colnames of data
	#' @function survival analysis according to gene expression for primary tumor
	#' @param geneSymbol identify gene symbol
	#' @param exp TCGA format gene expression dataset
	#' @param cli Clinical information dataset
	#' @param method method use to dive samples into groups. Options are "quantile", "median", "mean". the "quartile" use first and third quartile as threshold
	#' @param trans transform the clinical IDs from '-' separate to '.' separate
	#' @return include the plot and p value
	#' @author Shixiang Wang

	# input test data