|
--- |
|
title: "THBS1 in clinical Colon cancer" |
|
author: "Wenjie Sun" |
|
date: 2022-3-5 |
|
output: |
|
html_document: |
|
toc: true |
|
theme: united |
|
number_sections: true |
|
always_allow_html: yes |
|
--- |
|
|
|
|
|
```{r echo=F} |
|
library(knitr) |
|
opts_chunk$set(echo=T, TOC=T, warning = F, dev = 'pdf') |
|
``` |
|
|
|
```{r} |
|
library(data.table) |
|
library(ggplot2) |
|
library(plyr) |
|
library(magrittr) |
|
library(readr) |
|
library(stringr) |
|
theme0 <- theme_bw() + theme( |
|
text = element_text(size = 15), |
|
line = element_line(size = 1), |
|
axis.line = element_line(size = 1), |
|
axis.ticks.length = unit(3, units = "mm"), axis.text.x = element_text( |
|
margin = margin(t = 2, unit = "mm") |
|
, angle = 60, vjust = 1, size = 15, hjust = 1), |
|
axis.text.y = element_text(margin = margin(r = 3, l = 5, unit = "mm")), |
|
legend.position = "right", |
|
) |
|
``` |
|
|
|
# Download data |
|
|
|
```{bash, eval=F} |
|
curl "http://api.smallysun.com/sv_datatable?gene=THBS1&tumor=COAD&cd=sampletype&area=exon&gene_sort=THBS1" | gzcat > tcga_m_THBS1.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=CTGF&tumor=COAD&cd=sampletype&area=exon&gene_sort=CTGF" | gzcat > tcga_m_CTGF.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=WWC2&tumor=COAD&cd=sampletype&area=exon&gene_sort=WWC2" | gzcat > tcga_m_WWC2.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=LGR5&tumor=COAD&cd=sampletype&area=exon&gene_sort=LGR5" | gzcat > tcga_m_LGR5.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=CYR61&tumor=COAD&cd=sampletype&area=exon&gene_sort=CYR61" | gzcat > tcga_m_CYR61.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=GAPDH&tumor=COAD&cd=sampletype&area=exon&gene_sort=GAPDH" | gzcat > tcga_m_GAPDH.tsv |
|
``` |
|
|
|
```{bash, eval=F} |
|
curl "http://api.smallysun.com/sv_datatable?gene=CDH1&tumor=COAD&cd=sampletype&area=exon&gene_sort=CDH1" | gzcat > tcga_m_CDH1.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=SNAI1&tumor=COAD&cd=sampletype&area=exon&gene_sort=SNAI1" | gzcat > tcga_m_SNAI1.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=B2M&tumor=COAD&cd=sampletype&area=exon&gene_sort=B2M" | gzcat > tcga_m_B2M.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=ACTB&tumor=COAD&cd=sampletype&area=exon&gene_sort=ACTB" | gzcat > tcga_m_ACTB.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=GAPDH&tumor=COAD&cd=sampletype&area=exon&gene_sort=GAPDH" | gzcat > tcga_m_GAPDH.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=KRAS&tumor=COAD&cd=sampletype&area=exon&gene_sort=KRAS" | gzcat > tcga_m_KRAS.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=APC&tumor=COAD&cd=sampletype&area=exon&gene_sort=APC" | gzcat > tcga_m_APC.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=TP53&tumor=COAD&cd=sampletype&area=exon&gene_sort=TP53" | gzcat > tcga_m_TP53.tsv |
|
``` |
|
|
|
```{bash, eval=F} |
|
curl "http://api.smallysun.com/sv_datatable?gene=AXIN2&tumor=COAD&cd=sampletype&area=exon&gene_sort=AXIN2" | gzcat > tcga_m_AXIN2 |
|
curl "http://api.smallysun.com/sv_datatable?gene=CD44&tumor=COAD&cd=sampletype&area=exon&gene_sort=CD44" | gzcat > tcga_m_CD44.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=SP5&tumor=COAD&cd=sampletype&area=exon&gene_sort=SP5" | gzcat > tcga_m_SP5.tsv |
|
``` |
|
|
|
```{bash, eval=T} |
|
curl "http://api.smallysun.com/sv_datatable?gene=MGST2&tumor=COAD&cd=sampletype&area=exon&gene_sort=MGST2" | gzcat > tcga_m_MGST2.tsv |
|
curl "http://api.smallysun.com/sv_datatable?gene=MYC&tumor=COAD&cd=sampletype&area=exon&gene_sort=MYC" | gzcat > tcga_m_MYC.tsv |
|
``` |
|
|
|
# THBS1 with Yap target genes |
|
|
|
Custom the PerformanceAnalytics function |
|
|
|
```{r} |
|
m_cor = function (R, histogram = TRUE, method = c("pearson", "kendall", |
|
"spearman"), ...) |
|
{ |
|
x = checkData(R, method = "matrix") |
|
if (missing(method)) |
|
method = method[1] |
|
cormeth <- method |
|
panel.cor <- function(x, y, digits = 7, prefix = "", use = "pairwise.complete.obs", |
|
method = cormeth, cex.cor, ...) { |
|
usr <- par("usr") |
|
on.exit(par(usr)) |
|
par(usr = c(0, 1, 0, 1)) |
|
r <- cor(x, y, use = use, method = method) |
|
txt <- format(c(r, 0.123456789), digits = digits)[1] |
|
txt <- paste(prefix, txt, sep = "") |
|
if (missing(cex.cor)) |
|
cex <- 0.8/strwidth(txt) |
|
test <- cor.test(as.numeric(x), as.numeric(y), method = method) |
|
Signif <- symnum(test$p.value, corr = FALSE, na = FALSE, |
|
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", |
|
"**", "*", ".", " ")) |
|
text(0.5, 0.5, txt, cex = cex) |
|
# text(0.8, 0.8, Signif, cex = cex, col = 2) |
|
text(0.5, 0.8, paste0("p=", format(test$p.value, digits = 5)), cex = cex / 3, col = 1) |
|
} |
|
f <- function(t) { |
|
dnorm(t, mean = mean(x), sd = sd.xts(x)) |
|
} |
|
dotargs <- list(...) |
|
dotargs$method <- NULL |
|
rm(method) |
|
hist.panel = function(x, ... = NULL) { |
|
par(new = TRUE) |
|
hist(x, col = "light gray", probability = TRUE, axes = FALSE, |
|
main = "", breaks = "FD") |
|
lines(density(x, na.rm = TRUE), col = "red", lwd = 1) |
|
rug(x) |
|
} |
|
if (histogram) |
|
pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor, |
|
diag.panel = hist.panel) |
|
else pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor) |
|
} |
|
``` |
|
|
|
TCGA Legacy |
|
|
|
```{r} |
|
fs = dir("./", 'tcga_m', full=T) |
|
d_l = fs %>% lapply(fread) |
|
d_wide = Reduce(function(x, y) { |
|
names(x) %>% print |
|
names(y[, c(1, 11), with=F]) %>% print |
|
merge(x, y[, c(1, 11), with=F], by = "sampleID") |
|
}, d_l[-1], d_l[[1]][, c(1, 11), with=F] |
|
) |
|
names(d_wide)[-1] %<>% sub("gene_", "", .) |
|
``` |
|
|
|
```{r} |
|
library("PerformanceAnalytics") |
|
gene_v = c( |
|
"THBS1" |
|
, "CTGF" |
|
, "WWC2" |
|
, "CYR61" |
|
, 'LGR5' |
|
, 'SP5' |
|
) |
|
d = d_wide[grepl("-01.$", sampleID)][, gene_v, with=F] %>% data.frame |
|
d = log2(d + 1) |
|
m_cor(d, histogram=F, pch=19, method = "spearman", size = 0.2, cex.cor = 0.2) |
|
``` |
|
|
|
Figure for reviewer |
|
|
|
```{r} |
|
library("PerformanceAnalytics") |
|
gene_v = c( |
|
"THBS1" |
|
, "MYC" |
|
, "B2M" |
|
, "ACTB" |
|
, 'GAPDH' |
|
, 'APC' |
|
, 'TP53' |
|
, 'SNAI1' |
|
, 'CDH1' |
|
) |
|
d = d_wide[grepl("-01.$", sampleID), gene_v, with=F] %>% data.frame |
|
d = log2(d + 1) |
|
m_cor(d, histogram=F, pch=19, method = "spearman", size = 0.2, cex.cor = 0.2) |
|
|
|
gene_v = c( |
|
"THBS1" |
|
, "CTGF" |
|
, "B2M" |
|
, "ACTB" |
|
, 'GAPDH' |
|
, 'SP5' |
|
, 'AXIN2' |
|
, 'CD44' |
|
, 'MGST2' |
|
) |
|
d = d_wide[grepl("-01.$", sampleID), gene_v, with=F] %>% data.frame |
|
d = log2(d + 1) |
|
m_cor(d, histogram=F, pch=19, method = "spearman", size = 0.2, cex.cor = 0.2) |
|
``` |