Skip to content

Instantly share code, notes, and snippets.

@wenjie1991
Last active May 9, 2022 09:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wenjie1991/6ff60b3edd5f61d0bd2ebe4f9404e46e to your computer and use it in GitHub Desktop.
Save wenjie1991/6ff60b3edd5f61d0bd2ebe4f9404e46e to your computer and use it in GitHub Desktop.
THBS1 in Colon cancer

This script is used for cancer gene expression analysis of paper: Paracrine signalling between intestinal epithelial and tumour cells induces a regenerative programme eLife2022;11:e76541 DOI: https://doi.org/10.7554/eLife.76541

Licence

TCGA_expression.Rmd - TCGA gene expression analysis Written in 2022 by Wenjie SUN sunwjie@gmail.com

To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see http://creativecommons.org/publicdomain/zero/1.0/.

---
title: "THBS1 in clinical Colon cancer"
author: "Wenjie Sun"
date: 2022-3-5
output:
html_document:
toc: true
theme: united
number_sections: true
always_allow_html: yes
---
```{r echo=F}
library(knitr)
opts_chunk$set(echo=T, TOC=T, warning = F, dev = 'pdf')
```
```{r}
library(data.table)
library(ggplot2)
library(plyr)
library(magrittr)
library(readr)
library(stringr)
theme0 <- theme_bw() + theme(
text = element_text(size = 15),
line = element_line(size = 1),
axis.line = element_line(size = 1),
axis.ticks.length = unit(3, units = "mm"), axis.text.x = element_text(
margin = margin(t = 2, unit = "mm")
, angle = 60, vjust = 1, size = 15, hjust = 1),
axis.text.y = element_text(margin = margin(r = 3, l = 5, unit = "mm")),
legend.position = "right",
)
```
# Download data
```{bash, eval=F}
curl "http://api.smallysun.com/sv_datatable?gene=THBS1&tumor=COAD&cd=sampletype&area=exon&gene_sort=THBS1" | gzcat > tcga_m_THBS1.tsv
curl "http://api.smallysun.com/sv_datatable?gene=CTGF&tumor=COAD&cd=sampletype&area=exon&gene_sort=CTGF" | gzcat > tcga_m_CTGF.tsv
curl "http://api.smallysun.com/sv_datatable?gene=WWC2&tumor=COAD&cd=sampletype&area=exon&gene_sort=WWC2" | gzcat > tcga_m_WWC2.tsv
curl "http://api.smallysun.com/sv_datatable?gene=LGR5&tumor=COAD&cd=sampletype&area=exon&gene_sort=LGR5" | gzcat > tcga_m_LGR5.tsv
curl "http://api.smallysun.com/sv_datatable?gene=CYR61&tumor=COAD&cd=sampletype&area=exon&gene_sort=CYR61" | gzcat > tcga_m_CYR61.tsv
curl "http://api.smallysun.com/sv_datatable?gene=GAPDH&tumor=COAD&cd=sampletype&area=exon&gene_sort=GAPDH" | gzcat > tcga_m_GAPDH.tsv
```
```{bash, eval=F}
curl "http://api.smallysun.com/sv_datatable?gene=CDH1&tumor=COAD&cd=sampletype&area=exon&gene_sort=CDH1" | gzcat > tcga_m_CDH1.tsv
curl "http://api.smallysun.com/sv_datatable?gene=SNAI1&tumor=COAD&cd=sampletype&area=exon&gene_sort=SNAI1" | gzcat > tcga_m_SNAI1.tsv
curl "http://api.smallysun.com/sv_datatable?gene=B2M&tumor=COAD&cd=sampletype&area=exon&gene_sort=B2M" | gzcat > tcga_m_B2M.tsv
curl "http://api.smallysun.com/sv_datatable?gene=ACTB&tumor=COAD&cd=sampletype&area=exon&gene_sort=ACTB" | gzcat > tcga_m_ACTB.tsv
curl "http://api.smallysun.com/sv_datatable?gene=GAPDH&tumor=COAD&cd=sampletype&area=exon&gene_sort=GAPDH" | gzcat > tcga_m_GAPDH.tsv
curl "http://api.smallysun.com/sv_datatable?gene=KRAS&tumor=COAD&cd=sampletype&area=exon&gene_sort=KRAS" | gzcat > tcga_m_KRAS.tsv
curl "http://api.smallysun.com/sv_datatable?gene=APC&tumor=COAD&cd=sampletype&area=exon&gene_sort=APC" | gzcat > tcga_m_APC.tsv
curl "http://api.smallysun.com/sv_datatable?gene=TP53&tumor=COAD&cd=sampletype&area=exon&gene_sort=TP53" | gzcat > tcga_m_TP53.tsv
```
```{bash, eval=F}
curl "http://api.smallysun.com/sv_datatable?gene=AXIN2&tumor=COAD&cd=sampletype&area=exon&gene_sort=AXIN2" | gzcat > tcga_m_AXIN2
curl "http://api.smallysun.com/sv_datatable?gene=CD44&tumor=COAD&cd=sampletype&area=exon&gene_sort=CD44" | gzcat > tcga_m_CD44.tsv
curl "http://api.smallysun.com/sv_datatable?gene=SP5&tumor=COAD&cd=sampletype&area=exon&gene_sort=SP5" | gzcat > tcga_m_SP5.tsv
```
```{bash, eval=T}
curl "http://api.smallysun.com/sv_datatable?gene=MGST2&tumor=COAD&cd=sampletype&area=exon&gene_sort=MGST2" | gzcat > tcga_m_MGST2.tsv
curl "http://api.smallysun.com/sv_datatable?gene=MYC&tumor=COAD&cd=sampletype&area=exon&gene_sort=MYC" | gzcat > tcga_m_MYC.tsv
```
# THBS1 with Yap target genes
Custom the PerformanceAnalytics function
```{r}
m_cor = function (R, histogram = TRUE, method = c("pearson", "kendall",
"spearman"), ...)
{
x = checkData(R, method = "matrix")
if (missing(method))
method = method[1]
cormeth <- method
panel.cor <- function(x, y, digits = 7, prefix = "", use = "pairwise.complete.obs",
method = cormeth, cex.cor, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y, use = use, method = method)
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste(prefix, txt, sep = "")
if (missing(cex.cor))
cex <- 0.8/strwidth(txt)
test <- cor.test(as.numeric(x), as.numeric(y), method = method)
Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***",
"**", "*", ".", " "))
text(0.5, 0.5, txt, cex = cex)
# text(0.8, 0.8, Signif, cex = cex, col = 2)
text(0.5, 0.8, paste0("p=", format(test$p.value, digits = 5)), cex = cex / 3, col = 1)
}
f <- function(t) {
dnorm(t, mean = mean(x), sd = sd.xts(x))
}
dotargs <- list(...)
dotargs$method <- NULL
rm(method)
hist.panel = function(x, ... = NULL) {
par(new = TRUE)
hist(x, col = "light gray", probability = TRUE, axes = FALSE,
main = "", breaks = "FD")
lines(density(x, na.rm = TRUE), col = "red", lwd = 1)
rug(x)
}
if (histogram)
pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor,
diag.panel = hist.panel)
else pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor)
}
```
TCGA Legacy
```{r}
fs = dir("./", 'tcga_m', full=T)
d_l = fs %>% lapply(fread)
d_wide = Reduce(function(x, y) {
names(x) %>% print
names(y[, c(1, 11), with=F]) %>% print
merge(x, y[, c(1, 11), with=F], by = "sampleID")
}, d_l[-1], d_l[[1]][, c(1, 11), with=F]
)
names(d_wide)[-1] %<>% sub("gene_", "", .)
```
```{r}
library("PerformanceAnalytics")
gene_v = c(
"THBS1"
, "CTGF"
, "WWC2"
, "CYR61"
, 'LGR5'
, 'SP5'
)
d = d_wide[grepl("-01.$", sampleID)][, gene_v, with=F] %>% data.frame
d = log2(d + 1)
m_cor(d, histogram=F, pch=19, method = "spearman", size = 0.2, cex.cor = 0.2)
```
Figure for reviewer
```{r}
library("PerformanceAnalytics")
gene_v = c(
"THBS1"
, "MYC"
, "B2M"
, "ACTB"
, 'GAPDH'
, 'APC'
, 'TP53'
, 'SNAI1'
, 'CDH1'
)
d = d_wide[grepl("-01.$", sampleID), gene_v, with=F] %>% data.frame
d = log2(d + 1)
m_cor(d, histogram=F, pch=19, method = "spearman", size = 0.2, cex.cor = 0.2)
gene_v = c(
"THBS1"
, "CTGF"
, "B2M"
, "ACTB"
, 'GAPDH'
, 'SP5'
, 'AXIN2'
, 'CD44'
, 'MGST2'
)
d = d_wide[grepl("-01.$", sampleID), gene_v, with=F] %>% data.frame
d = log2(d + 1)
m_cor(d, histogram=F, pch=19, method = "spearman", size = 0.2, cex.cor = 0.2)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment