Last active
December 4, 2017 11:26
-
-
Save ShixiangWang/c060346a6f98f75f7d5ac605814cc621 to your computer and use it in GitHub Desktop.
指定肿瘤表达数据矩阵和基因,根据基因分组绘制生存曲线,并按步长寻找最小p值(粗暴),需要先导入survival和survminer两个包
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' @function survival analysis according to gene expression for primary tumor | |
#' @param geneSymbol identify gene symbol | |
#' @param exp TCGA format gene expression dataset | |
#' @param cli Clinical information dataset | |
#' @param method method use to dive samples into groups. Options are "quantile", "median", "mean". the "quartile" use first and third quartile as threshold | |
#' @param trans transform the clinical IDs from '-' separate to '.' separate | |
#' @return include the plot and p value | |
#' @author Shixiang Wang | |
# input test data | |
# exp <- exp.tumor | |
# cli <- cli_info | |
# geneSymbol <- "PDL1" | |
# method="quartile" | |
# | |
surv_analysis <- function(geneSymbol=NULL, exp, cli, method="quartile", trans=FALSE, step=10){ | |
if(any(c(is.null(geneSymbol), is.null(exp), is.null(cli)))){ | |
stop("Wrong data! Please check your input.") | |
} | |
if(trans){ | |
cli$sampleID = gsub("-",".",cli$sampleID, fixed = TRUE) | |
} | |
tumorSample <- cli[cli$sample_type=="Primary Tumor",]$sampleID | |
# exp <- exp[, c(1, na.omit(match(tumorSample, colnames(exp) )))] | |
tumorSampleID <- tumorSample[ na.omit(match(colnames(exp), tumorSample)) ] | |
symbol_exp <- exp[exp$sample==geneSymbol, tumorSampleID] | |
exp_value <- unlist(symbol_exp) | |
group_surv <- list() | |
# compute threshold according to method parameter | |
if(method=="quartile"){ | |
symbol_stat <- summary(exp_value) | |
ths1 <- as.numeric(symbol_stat[2]) | |
ths2 <- as.numeric(symbol_stat[5]) | |
down_id <- names(exp_value[exp_value<ths1]) | |
up_id <- names(exp_value[exp_value>ths2]) | |
}else if(method=="mean"){ | |
ths <- mean(exp_value) | |
down_id <- names(exp_value[exp_value<=ths]) | |
up_id <- names(exp_value[exp_value>ths]) | |
}else if(method=="median"){ | |
ths <- median(exp_value) | |
down_id <- names(exp_value[exp_value<=ths]) | |
up_id <- names(exp_value[exp_value>ths]) | |
}else{ | |
stop("The method you input have something wrong! Here only 'quantile', 'mean', 'median' provided. Please check one of them.")} | |
# compute the cutoff which p value is minimal | |
os_mat <- subset(cli, sampleID%in%c(up_id, down_id)) | |
os_mat$group <- "high" | |
os_mat[os_mat$sampleID%in%down_id,]$group <- "low" | |
search_cutoff <- function(mat, step=20){ | |
# mat <- os_mat | |
days <- max(na.omit(mat$X_OS)) | |
cutoff_list <- {} | |
pval_list <- {} | |
cutoff <- list() | |
while(days > 300){ | |
mat <- mat[mat$X_OS<=days,] | |
sdf <- survdiff(Surv(X_OS, X_EVENT)~group,data = mat) | |
p.val <- 1 - pchisq(sdf$chisq, length(sdf$n) - 1) | |
cutoff_list <- c(cutoff_list, days) | |
pval_list <- c(pval_list, p.val) | |
days <- days - step | |
} | |
min_pval <- min(pval_list) | |
min_cutoff <- cutoff_list[which(pval_list==min_pval)] | |
cutoff$cutoff_list <- cutoff_list | |
cutoff$pval_list <- pval_list | |
cutoff$min_pval <- min_pval | |
cutoff$min_cutoff <- min_cutoff | |
return(cutoff) | |
} | |
cutoff <- search_cutoff(os_mat, step=step) | |
surv <- list() | |
surv$os_mat <- os_mat | |
surv$cutoff <- cutoff | |
return(surv) | |
} | |
plot_surv <- function(os_mat, cutoff, pval=TRUE){ | |
fit <- survfit(Surv(X_OS, X_EVENT) ~ group, data = os_mat[os_mat$X_OS<=cutoff,]) | |
ggsurv <- ggsurvplot( | |
fit, # survfit object with calculated statistics. | |
data = os_mat[os_mat$X_OS<=cutoff,], # data used to fit survival curves. | |
risk.table = FALSE, # show risk table. | |
pval = pval, # show p-value of log-rank test. | |
conf.int = FALSE, # show confidence intervals for | |
# point estimates of survival curves. | |
palette = c("red", "blue"), | |
# palette = c("#E7B800", "#2E9FDF"), | |
# xlim = c(0,5000), # present narrower X axis, but not affect | |
# survival estimates. | |
xlab = "", # customize X axis label. | |
ylab = "", | |
# break.time.by = 100, # break X axis in time intervals by 500. | |
# ggtheme = theme_light(), # customize plot and risk table with a theme. | |
risk.table.y.text.col = T,# colour risk table text annotations. | |
risk.table.height = 0.25, # the height of the risk table | |
risk.table.y.text = FALSE,# show bars instead of names in text annotations | |
# in legend of risk table. | |
ncensor.plot = FALSE, # plot the number of censored subjects at time t | |
ncensor.plot.height = 0.25, | |
# conf.int.style = "step", # customize style of confidence intervals | |
# surv.median.line = "hv", # add the median survival pointer. | |
legend.labs = | |
c("APOBEC3B high expression", "APOBEC3B low expression"), # change legend labels. | |
legend.title = "", | |
size = 0.5, | |
censor.size = 2 | |
) | |
ggsurv <- ggpar( | |
ggsurv, | |
font.caption = c(6, "plain", "black"), | |
font.tickslab = c(8, "plain", "black"), | |
font.legend = c(6, "plain", "black"), | |
ggtheme = theme_survminer()+theme(legend.background=element_rect(fill=NA), legend.key.height = unit(3,"mm"), | |
line = element_line(size=.1), legend.position =c(0.8, 0.9)) | |
) | |
ggsurv$plot | |
} | |
# tt <- surv_analysis("APOBEC3B", exp, cli, method = "mean") | |
# sort(tt$cutoff$pval_list)[1:20] | |
# tt$cutoff$cutoff_list[match(sort(tt$cutoff$pval_list)[1:20], tt$cutoff$pval_list)] | |
# | |
# | |
# max(tt_2$cutoff$cutoff_list[match(sort(tt_2$cutoff$pval_list)[1:90], tt_2$cutoff$pval_list)]) | |
# sort(tt_2$cutoff$cutoff_list[match(sort(tt_2$cutoff$pval_list)[1:90], tt_2$cutoff$pval_list)]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment