Skip to content

Instantly share code, notes, and snippets.

@wenjie1991
Last active September 14, 2017 06:08
Show Gist options
  • Save wenjie1991/5f8e93d506408c31ad936fca48edc5a2 to your computer and use it in GitHub Desktop.
Save wenjie1991/5f8e93d506408c31ad936fca48edc5a2 to your computer and use it in GitHub Desktop.
CNV_SRSF6.R
source("https://github.com/wenjie1991/bioinfo/blob/master/R/scatterBar.R")
source("https://github.com/wenjie1991/bioinfo/blob/master/R/ucscCancer_extractor.R")
#=================================================
# read data
#=================================================
# read_csv
suppressMessages(dat.CNV <- read.ucscCancer.dir("../../../UCSC_cancer/COADREAD/TCGA_COADREAD_gistic2-2015-02-24/"))
suppressMessages(dat.mRNA <- read.ucscCancer.dir("../../../UCSC_cancer/COADREAD/TCGA_COADREAD_exp_HiSeqV2-2015-02-24"))
#=================================================
# prepare data
#=================================================
gene <- "SRSF6"
gene.mRNA <- extractProbe.ucscCancer("SFRS6", dat.mRNA)$gm %>% unlist
# gene.mRNA <- extractProbe.ucscCancer(gene, dat.mRNA)$gm %>% unlist
gene.CNV <- extractProbe.ucscCancer(gene, dat.CNV)$gm %>% unlist
cdDT <- dat.CNV$cd %>% data.table
cdDT[grepl("01$", sampleID), sampleType := "tumor"]
cdDT[grepl("11$", sampleID), sampleType := "normal"]
cdDT$stage <- cdDT$pathologic_stage %>% revalue(c("Stage I" = "Stage I",
"Stage IA" = "Stage I",
"Stage II" = "Stage II",
"Stage IIA" = "Stage II",
"Stage IIB" = "Stage II",
"Stage IIC" = "Stage II",
"Stage III" = "Stage III",
"Stage IIIA" = "Stage III",
"Stage IIIB" = "Stage III",
"Stage IIIC" = "Stage III",
"Stage IV" = "Stage IV",
"Stage IVA" = "Stage IV",
"Stage IVB" = "Stage IV"))
cdDT[, osTime := X_OS]
cdDT[, osEvent := X_EVENT]
cdDT$MS <- cdDT$CDE_ID_3226963 %>% factor(levels = c("MSS", "MSI-L", "MSI-H"))
cdSubDT <- cdDT[, .(sampleID, sampleType, osTime, osEvent, MS, stage)]
datDT <- cdSubDT
setkey(datDT, "sampleID")
suppressWarnings(datDT[names(gene.CNV), CNV := gene.CNV %>% as.numeric])
suppressWarnings(datDT[names(gene.mRNA), mRNA := gene.mRNA %>% as.numeric])
write_tsv(datDT, "tmp/SRF6_sub.xls")
#=================================================
# CNV by phenotype
#=================================================
pdf("./tmp/CNV_phenotype.pdf")
par(bty = "L", lwd = 3, cex = 1.5)
## MS statue
datSubDT <- datDT[stage != "", .(CNV, MS)] %>% na.omit
x <- datSubDT$MS
y <- as.numeric(datSubDT$CNV)
fit <- scatterBar(x = x, y = y, xlab = "Micro-Satellite", ylab = expression("log2(CN) - 1"), main = "CNV by MicroSatellite",
pch = 20, col = rgb(0.3,0.5,1,0.5), lwd = 2, cex = 1.5)
axis(1, lwd = 2, labels = F)
axis(2, lwd = 2, labels = F)
abline(h = 0, col = rgb(1, 0.4, 0, 0.7), lwd = 4)
datDT[, .(median = median(CNV, na.rm = T), mean = mean(CNV, na.rm =T), sd = sd(CNV, na.rm=T), n = length(CNV)), by = MS]
aov(datDT$CNV ~ datDT$MS) %>% summary
t.test(datDT[MS == "MSS", CNV], datDT[MS == "MSI-L", CNV]) # p > 0.1
t.test(datDT[MS == "MSS", CNV], datDT[MS == "MSI-H", CNV]) # p < 0.001
t.test(datDT[MS == "MSI-L", CNV], datDT[MS == "MSI-H", CNV]) # p < 0.001
## stage
datSubDT <- datDT[stage != "" & MS != "MSI-H", .(CNV, stage)] %>% na.omit
x <- datSubDT$stage %>% factor(levels = c("Stage I", "Stage II", "Stage III", "Stage IV"))
y <- as.numeric(datSubDT$CNV)
scatterBar(x = x, y = y, xlab = "Stage", ylab = "log2(CN) - 1",
main = "CNV by Stage", pch = 20, col = rgb(0.3,0.5,1,0.5),
lwd = 2, cex = 1.5)
axis(1, lwd = 3, labels = F)
axis(2, lwd = 3, labels = F)
abline(h = 2, col = rgb(1, 0.4, 0, 0.7), lwd = 4)
datDT[, .(median = median(CNV), mean= mean(CNV, na.rm =T), sd=sd(CNV, na.rm=T), n = length(CNV)), by = stage]
aov(datSubDT$CNV ~ factor(datSubDT$stage)) %>% summary
## CNV survival
# datSubDT <- datDT[stage != "" & MS != "MSI-H", .(CNV, osTime, osEvent)] %>% na.omit
datSubDT <- datDT[stage != "", .(CNV, osTime, osEvent)] %>% na.omit
library(survival)
y <- as.numeric(datSubDT$CNV) > 0.3
fit <- with(datSubDT, survfit(Surv(time = osTime, event = osEvent) ~ y))
plot(fit, xlab = "Days", ylab = "Overall Survival", lty = 1:2, lwd = 3, main = "CNV")
axis(1, lwd = 2, labels = F)
axis(2, lwd = 2, labels = F)
legend("topright", legend = c("normal", "amplification"), lty = 1:2)
with(datSubDT, survdiff(Surv(time = osTime, event = osEvent) ~ y)) # p > 0.1
y <- as.numeric(datSubDT$CNV)
with(datSubDT, coxph(Surv(time = osTime, event = osEvent) ~ y)) # p > 0.1
## mRNA survival
datSubDT <- datDT[stage != "" & MS != "MSI-H", .(mRNA, osTime, osEvent)] %>% na.omit
library(survival)
y <- as.numeric(datSubDT$mRNA)
y <- cut(y, quantile(y, c(0, 0.50, 1)))
fit <- with(datSubDT, survfit(Surv(time = osTime, event = osEvent) ~ y))
plot(fit, xlab = "Days", ylab = "Overall Survival", lty = 1:2, lwd = 3, main = "mRNA")
axis(1, lwd = 2, labels = F)
axis(2, lwd = 2, labels = F)
legend("topright", legend = c("low expression, n=164", "high expression, n=165"), lty = 1:2)
fit <- with(datSubDT, survdiff(Surv(time = osTime, event = osEvent) ~ y)) # p > 0.1
y <- as.numeric(datSubDT$mRNA)
with(datSubDT, coxph(Surv(time = osTime, event = osEvent) ~ y)) # p > 0.1
## mRNA
datSubDT <- datDT[MS != "MSI-H"]
with(datSubDT, plot(CNV, mRNA, pch = 20, col = rgb(0.3, 0.5, 1, 0.5), cex = 1.5,
xlab = "log2(CN) - 1", ylab = "mRNA expression"))
fit <- with(datSubDT, lm(mRNA ~ CNV))
abline(fit, col = rgb(1, 0.4, 0, 0.7))
axis(1, lwd = 2, labels = F)
axis(2, lwd = 2, labels = F)
summary(fit) # p < 0.001; adjusted R^2 = 0.3438
suppressMessages(dev.off())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment