MrHedmad/ANOVA_merged.R Secret

## README.md

      
    Raw
  

              README.md
            
          
    Interpreting these files

The versions.txt contains package and interpreter versions for both Python and R installed at the moment of the analysis.
Formatting of data_sources.txt

The data_sources.txt file follows a specific formatting:
[<filename>] :: `<url_or_pipeline>`
    On: <Date>
    Desc: <description>
    Preprocessing: `<command>`

<filename> is the expected filename for the scripts in the ./data/ folder of the analysis. The <url_or_pipeline> is the url or pipeline that made the file. If a pipeline, it is prefixed with pipe:<name_of_pipeline> and a file in included name_of_pipeline.pipe with more information on the pipeline. The Desc: <description> line is optional. The preprocessing command command is assumed to be run on the file while in the ./data/ folder; e.g. the file might need to be unzipped before the analysis.
The ./data/ folder is assumed to be shallow.
The files with the actual analyses are ANOVA_merged.R and point_mutations.Rmd

  
## ANOVA_merged.R
# The anova analysis for RAP1A
# Made hastily once we deemed Deseq2 not the best for the job
# Therefore, it is sloppy and bad. Sorry to whomever has to read this.

library(tidyverse)
library(cowplot)    # Plot grids
library(ggsignif)   # Significance bars on the plots
library(PMCMRplus)  # Dunn test

# The expression of the two genes
data <- read_tsv("./data/TTG_filtered_data")

# Metadata for TCGA and GTEx
clinical_data <- read_csv("./data/clean_clinical_metadata.csv")
gtex_metadata <- read_tsv("./data/GTEX_phenotype")

#### << Functions >> ###

#' Make simpler clinical metadata and filtering for the different cohorts
preprocess_metadata <- function(clinical_data) {
  # Get only the cols of interest
  metadata <- clinical_data[, c("tumor_id", "submitter_id")]

  # Generate a new column "status_metastatic"
  status <- rep("non_metastatic", length(clinical_data$tumor_id))

  figo_stage <- as.character(clinical_data$figo_stage)
  figo_tf <- startsWith(figo_stage, "Stage III") | startsWith(figo_stage, "Stage IV")
  figo_tf[is.na(figo_tf)] <- FALSE
  # The levels to select were inspected manually.
  status_key <-
    ! clinical_data$ajcc_clinical_m %in% c(NA, "M0", "MX") |
    ! clinical_data$ajcc_pathologic_m %in% c(NA, "cM0 (i+)", "M0", "MX") |
    clinical_data$ajcc_clinical_stage %in% c("Stage IV", "Stage IVA", "Stage IVB", "Stage IVC") |
    figo_tf

  status[status_key] <- "metastatic"

  metadata$status_metastatic <- factor(status, c("metastatic", "non_metastatic"))

  # Generate a new column "status_limph"
  status <- rep("primary_tumor", length(clinical_data$tumor_id))

  figo_stage <- as.character(clinical_data$figo_stage)
  figo_tf <- startsWith(figo_stage, "Stage III") | startsWith(figo_stage, "Stage IV")
  figo_tf[is.na(figo_tf)] <- FALSE
  # The levels to select were inspected manually.
  status_key <-
    ! clinical_data$ajcc_clinical_m %in% c(NA, "M0", "MX") |
    ! clinical_data$ajcc_pathologic_m %in% c(NA, "cM0 (i+)", "M0", "MX") |
    ! clinical_data$ajcc_clinical_n %in% c(NA, "N0", "NX") |
    ! clinical_data$ajcc_pathologic_n %in% c(NA, "N0", "N0 (i-)", "N0 (i+)", "N0 (mol+)", "NX") |
    clinical_data$ajcc_clinical_stage %in% c("Stage IV", "Stage IVA", "Stage IVB", "Stage IVC") |
    figo_tf

  status[status_key] <- "limph_nodes+metastatic"
  metadata$status_limph <- factor(status, c("limph_nodes+metastatic", "primary_tumor"))

  return(metadata)
}

#' Trunc the TCGA ID to just the patient id
trunc_to_pid <- function(x) {
  return(str_trunc(x, 12, side = "right", ellipsis = ""))
}

#' Get the metadata from the whole metadata set from the set of IDs.
get_metadata_from_id <- function(x) {
  if (startsWith(x, "GTEX")) {
    if (! x %in% gtex_metadata$Sample) {print(paste("Cannot find", x)); return(NULL)}
    return(list(x, gtex_metadata$`_primary_site`[gtex_metadata$Sample == x], "healthy", "healthy"))
  }
  if (startsWith(x, "TCGA")) {
    if (! trunc_to_pid(x) %in% simple_clinical_data$submitter_id) {print(paste("Cannot find", x)); return(NULL)}
    return(list(
      x,
      simple_clinical_data$tumor_id[simple_clinical_data$submitter_id == trunc_to_pid(x)],
      as.character(simple_clinical_data$status_metastatic[simple_clinical_data$submitter_id == trunc_to_pid(x)]),
      as.character(simple_clinical_data$status_limph[simple_clinical_data$submitter_id == trunc_to_pid(x)])
    ))
  }
}

#' Bind the result of `get_metadata_from_id` together.
bind <- function(x, y) {
  if (typeof(x) == "list") {
    x <- unlist(x)
  }
  y <- unlist(y)

  if (startsWith(y[3], "TCGA")) {print(y)}
  return(rbind(x, y))
}

calc_median_diff <- function(data, x, y) {
  # We do x - y
  return(median(data$exprs[data$status == x]) - median(data$exprs[data$status == y]))
}

get_diff <- function(data, formula) {
  splif <- unlist(str_split(formula, "-"))
  return(calc_median_diff(data, splif[1], splif[2]))
}

#' Analyze some data with an exprs col and a status col
analyze <- function(data) {
  # Numerosity
  table(data$source, data$status) |> print()

  # Normality of the data
  print("NORMALITY")
  data[, c("exprs", "status")] |> group_by(status) |>
    group_map(\(x, y) {ks.test(x$exprs, "rnorm") |> print(); ks.test(x$exprs, "rnorm")$p.value}) |>
    unlist() -> ks.pvals

  if (any(ks.pvals < 0.05 & table(data$status) < 30)) {
    print("Running in non-parametric mode.")
    kru.mod <- kruskal.test(exprs ~ status, data = data)
    dunn_res <- kwAllPairsDunnTest(exprs ~ status, data = data, p.adjust.method = "bonferroni")

    m <- dunn_res$p.value
    dunn_rnms <- paste0(rownames(m)[row(m)[lower.tri(m, diag = TRUE)]], "-", colnames(m)[col(m)[lower.tri(m, diag = TRUE)]])
    dunn_rest <- data.frame(
      `p.adj` = m[lower.tri(m, diag = TRUE)],
      row.names = dunn_rnms
    )

    # Calculate differences in the median
    dunn_rest$diff <- sapply(dunn_rnms, get_diff, data = data)

    print(as.data.frame(dunn_rest))

    return(as.data.frame(dunn_rest))

  } else {
    # Make anova model
    anov.mod <- aov(exprs ~ status, data = data)

    # Variance of the data
    print("VARIANCE")
    plot(anov.mod, 1) |> print()

    # summary of anova
    summary(anov.mod) |> print()

    # Post test
    tukey_res <- TukeyHSD(anov.mod)
    tukey_res |> print()
    tukey_res |> plot()

    colnames(tukey_res$status) <- c("diff", "lwr", "upr", "p.adj")
    return(as.data.frame(tukey_res$status))
  }

}

# Deanonimize for pipes
to_frame <- \(x) {return(data.frame(sample_id = x[,1], source = x[,2], status_metastatic = x[,3], status_limph = x[,4]))}
omit_null <- \(x) {return(x[! sapply(x, is.null)])}
prreduce <- purrr::reduce

prep_for_analysis <- function(x, stat_type) {
  data.frame(exprs = x$exprs, source = x$source, status = as.factor(x[, stat_type]))
}

#### <<< END Functions >>> ###

simple_clinical_data <- preprocess_metadata(clinical_data)

all_colnames <- colnames(data)
# We clear out the `TARGET` Ids - we don't need them
intr_colnames <- all_colnames[! startsWith(all_colnames, "TARGET")]

# We make the metadata for our samples
intr_colnames |> map(get_metadata_from_id) |>
  omit_null() |> prreduce(bind) |> to_frame() |>
  mutate(status_metastatic = as.factor(status_metastatic), status_limph = as.factor(status_limph)) |>
  remove_rownames() -> metadata

data |> remove_rownames() |> column_to_rownames("sample") -> data

# Ready for the analysis

RAP1 <- data["RAP1A", , drop=FALSE]
TRPM8 <- data["TRPM8", , drop=FALSE]

# Reshape
RAP1r <- data.frame(exprs = unlist(RAP1[1,]))
rownames(RAP1r) <- colnames(RAP1)

TRPMr <- data.frame(exprs = unlist(TRPM8[1,]))
rownames(TRPMr) <- colnames(TRPM8)

# Merge with metadata
merge(TRPMr, metadata, by.x = 0, by.y = "sample_id") |>
  remove_rownames() |> column_to_rownames("Row.names") -> TRPMrm

merge(RAP1r, metadata, by.x = 0, by.y = "sample_id") |>
  remove_rownames() |> column_to_rownames("Row.names") -> RAP1rm

results <- list()

# Running on the "meta/non meta" labelling
# Run this one at a time to check for assumptions (plots + messages)
RAP1rm |> filter(source == "Breast" | source == "TCGA-BRCA") |>
  prep_for_analysis("status_metastatic") |> analyze() -> results$RBRCA_meta
RAP1rm |> filter(source == "Cervix Uteri" | source == "Uterus" | source == "TCGA-UCEC") |>
  prep_for_analysis("status_metastatic") |> analyze() -> results$RUCEC_meta
RAP1rm |> filter(source == "Prostate" | source == "TCGA-PRAD") |>
  prep_for_analysis("status_metastatic") |> analyze() -> results$RPRAD_meta

TRPMrm |> filter(source == "Breast" | source == "TCGA-BRCA") |>
  prep_for_analysis("status_metastatic") |> analyze() -> results$TBRCA_meta
TRPMrm |> filter(source == "Cervix Uteri" | source == "Uterus" | source == "TCGA-UCEC") |>
  prep_for_analysis("status_metastatic") |> analyze() -> results$TUCEC_meta
TRPMrm |> filter(source == "Prostate" | source == "TCGA-PRAD") |>
  prep_for_analysis("status_metastatic") |> analyze() -> results$TPRAD_meta

# Running on the "limph/non limph" labelling
RAP1rm |> filter(source == "Breast" | source == "TCGA-BRCA") |>
  prep_for_analysis("status_limph") |> analyze() -> results$RBRCA_limph
RAP1rm |> filter(source == "Cervix Uteri" | source == "Uterus" | source == "TCGA-UCEC") |>
  prep_for_analysis("status_limph") |> analyze() -> results$RUCEC_limph
RAP1rm |> filter(source == "Prostate" | source == "TCGA-PRAD") |>
  prep_for_analysis("status_limph") |> analyze() -> results$RPRAD_limph

TRPMrm |> filter(source == "Breast" | source == "TCGA-BRCA") |>
  prep_for_analysis("status_limph") |> analyze() -> results$TBRCA_limph
TRPMrm |> filter(source == "Cervix Uteri" | source == "Uterus" | source == "TCGA-UCEC") |>
  prep_for_analysis("status_limph") |> analyze() -> results$TUCEC_limph
TRPMrm |> filter(source == "Prostate" | source == "TCGA-PRAD") |>
  prep_for_analysis("status_limph") |> analyze() -> results$TPRAD_limph


### <<< Plotting and tabulating results >>> ###
# Add significance
sig <- function(pval) {
  if (pval < 0.001) {"***"} else if (pval < 0.01) {"**"} else if (pval < 0.05) {"*"} else {"-"}
}

add_sig <- function(x) {
  x$sig <- sapply(x$`p.adj`, sig)
  return(x)
}

results2 <- lapply(results, add_sig)

# See a summary
see_res <- function() {
  oo <- options(digits = 3)
  on.exit(options(oo))

  sr <- function(x) {
    # Select and round down
    x |> mutate(across(everything(), format(digits = 3)))
  }

  print("RAP1A - PRAD - META")
  results2$RPRAD_meta[, c("diff", "p.adj", "sig")] |> print()
  cat("\n")
  print("TRPM8 - PRAD - META")
  results2$TPRAD_meta[, c("diff", "p.adj", "sig")] |> print()
  cat("\n")

  print("RAP1A - BRCA - META")
  results2$RBRCA_meta[, c("diff", "p.adj", "sig")] |> print()
  cat("\n")
  print("TRPM8 - BRCA - META")
  results2$TBRCA_meta[, c("diff", "p.adj", "sig")] |> print()
  cat("\n")

  print("RAP1A - UCEC - META")
  results2$RUCEC_meta[, c("diff", "p.adj", "sig")] |> print()
  cat("\n")
  print("TRPM8 - UCEC - META")
  results2$TUCEC_meta[, c("diff", "p.adj", "sig")] |> print()
  cat("\n")
  cat("---------------\n")

  ord <- c(2, 1, 3)

  print("RAP1A - PRAD - LIMPH")
  results2$RPRAD_limph[ord, c("diff", "p.adj", "sig")] |> print()
  cat("\n")
  print("TRPM8 - PRAD - LIMPH")
  results2$TPRAD_limph[ord, c("diff", "p.adj", "sig")] |> print()
  cat("\n")

  print("RAP1A - BRCA - LIMPH")
  results2$RBRCA_limph[ord, c("diff", "p.adj", "sig")] |> print()
  cat("\n")
  print("TRPM8 - BRCA - LIMPH")
  results2$TBRCA_limph[ord, c("diff", "p.adj", "sig")] |> print()
  cat("\n")

  print("RAP1A - UCEC - LIMPH")
  results2$RUCEC_limph[ord, c("diff", "p.adj", "sig")] |> print()
  cat("\n")
  print("TRPM8 - UCEC - LIMPH")
  results2$TUCEC_limph[ord, c("diff", "p.adj", "sig")] |> print()
  cat("\n")

}

see_res()

# For some reason this is needed or the plot saving later fails.
graphics.off()


### PLOTS
bplot <- function(
  data, title,
  extra.colours = c("#048ab3", "lightblue"), comps = c(TRUE, TRUE, TRUE), addn = TRUE
) {
  give.n <- function(x){
    return(c(y = min(data$exprs), label = length(x)))
  }

  # Reorder data groups
  if ("non_metastatic" %in% levels(data$status)) {
    data$status <- factor(data$status, levels = c("healthy", "non_metastatic", "metastatic"))
    all_comps <- list(c("healthy", "metastatic"), c("healthy", "non_metastatic"), c("non_metastatic", "metastatic"))
  } else {
    data$status <- factor(data$status, levels = c("healthy", "primary_tumor", "limph_nodes+metastatic"))
    all_comps <- list(c("healthy", "primary_tumor"), c("healthy", "limph_nodes+metastatic"), c("primary_tumor", "limph_nodes+metastatic"))
  }

  p <- ggplot(data = data, aes(x = status, y = exprs, fill = status)) +
    scale_fill_manual(values = c("lightgray", extra.colours)) +
    geom_violin() +
    geom_boxplot(width = 0.1, outlier.color = "#00ff1a", outlier.alpha = 0.5, outlier.size = 0.8, outlier.shape = 4) +
    ggtitle(title) +
    ylab("Normalized Expression (log2)") +
    xlab("Status") +
    theme_bw() +
    theme(legend.position = "none", plot.title = element_text(size = 10), axis.title = element_text(size = 8)) +
    geom_signif(
      comparisons = all_comps[comps],
      map_signif_level = TRUE,
      step_increase = 0.1,
      vjust = 0.5
    )

  if (addn) {
    p <- p +
      stat_summary(
        fun.data = give.n, geom = "text",
        position = position_nudge(y = - 0.5), size = 2.5,
        colour = "black",
      )
  }

  return(p)
}


regenerate_plots <- function(stat_type, code) {
  print(stat_type)
  print(code)
  plots <- list()
  RAP1rm |> filter(source == "Prostate" | source == "TCGA-PRAD") |>
    prep_for_analysis(stat_type) |>
    bplot("RAP1A - Prostate Dataset", c("#c40000", "#e83e00"), c(FALSE, FALSE, FALSE)) -> plots$RPRO
  TRPMrm |> filter(source == "Prostate" | source == "TCGA-PRAD") |>
    prep_for_analysis(stat_type) |>
    bplot("TRPM8 - Prostate Dataset", c("#c40000", "#e83e00"), c(FALSE, TRUE, FALSE)) -> plots$TPRO

  RAP1rm |> filter(source == "Breast" | source == "TCGA-BRCA") |>
    prep_for_analysis(stat_type) |>
    bplot("RAP1A - Breast Dataset", comps = c(FALSE, FALSE, FALSE)) -> plots$RBRE
  TRPMrm |> filter(source == "Breast" | source == "TCGA-BRCA") |>
    prep_for_analysis(stat_type) |>
    bplot("TRPM8 - Breast Dataset", comps = c(FALSE, TRUE, FALSE)) -> plots$TBRE

  RAP1rm |> filter(source == "Cervix Uteri" | source == "Uterus" | source == "TCGA-UCEC") |>
    prep_for_analysis(stat_type) |>
    bplot("RAP1A - Uterus Dataset", c("#8f3199", "#bf41cc"), c(TRUE, TRUE, FALSE)) -> plots$RUTE
  TRPMrm |> filter(source == "Cervix Uteri" | source == "Uterus" | source == "TCGA-UCEC") |>
    prep_for_analysis(stat_type) |>
    bplot("TRPM8 - Uterus Dataset", c("#8f3199", "#bf41cc"), c(TRUE, TRUE, FALSE)) -> plots$TUTE

  pdf(file = paste0('./results/', code, '_results.pdf'), height = 11, width = 9)
  tiff(file = paste0('./results/', code, '_results.tiff'), height = 6600, width = 5400, res = 600)
  png(file = paste0('./results/', code, '_results.tiff'), height = 6600, width = 5400, res = 600)
  for (i in 1:3) {
    print(i)
    print(dev.cur())
    plot_grid(
      plots$RPRO, plots$TPRO,
      plots$RBRE, plots$TBRE,
      plots$RUTE, plots$TUTE,
      ncol = 2
    ) |> print()
    # Cycle through the devices
    dev.set(dev.prev())
  }
  graphics.off()
}

regenerate_all_plots <- function() {
  regenerate_plots("status_limph", "LIMPH")
  regenerate_plots("status_metastatic", "NOLIMPH")
}

regenerate_all_plots()

## EXTRAS ##
# See the TRPM8 zero counts. Not exactly zero, but less than 1

count_n <- function(x, y, fun) {
  matching <- sum(fun(x$exprs))
  percent <- (matching / length(x$exprs)) * 100
  print(paste0(y$status[1], ": ", matching, " - ", round(percent, 2), "%"))
}

TRPMrm |> filter(source == "Cervix Uteri" | source == "Uterus" | source == "TCGA-UCEC") |>
  group_by(status) |> group_walk(count_n, fun = \(x) {x < 1})

TRPMrm |> filter(source == "Breast" | source == "TCGA-BRCA") |>
  group_by(status) |> group_walk(count_n, fun = \(x) {x < 1})

## data_sources.txt

[GTEX_phenotype] :: https://toil-xena-hub.s3.us-east-1.amazonaws.com/download/GTEX_phenotype.gz
    On: 22.02.24-17:52:46
    Desc: The metadata for the GTEX patients in TcgaTargetGtex_gene_expected_count
    Prep: gunzip -k ${in}
    Hash: 32449abd7951f44b469e1a103acce0ff


[clean_clinical_metadata.csv] :: TCGA_clean_clinical_data.pipe
    On: 22.02.25-10:43:34
    Desc: All (cleaned) clinical metadata from the TCGA consortium.
    Prep:
    Hash: 7dfd9078f6fc69d96d2187e3cb24b0b7


[GDC-PANCAN.mutect2_snv.tsv] :: https://gdc-hub.s3.us-east-1.amazonaws.com/download/GDC-PANCAN.mutect2_snv.tsv.gz
    On: 22.02.25-10:46:21
    Desc: All mutations from the TCGA project detected with Mutect2
    Prep: gunzip -k ${in}
    Hash: acfbcfa6ff059f35eb76306e4b1ec933


[TTG_filtered_data] :: https://toil-xena-hub.s3.us-east-1.amazonaws.com/download/TcgaTargetGtex_RSEM_Hugo_norm_count.gz
    On: 22.03.04-11:41:55
    Desc: Just the two genes of interest from the Whole normalized counts of TCGA/TARGET/GTEX dataset from Xena
    Prep: gunzip -c ${in} | awk "/^RAP1A/ || /^TRPM8/ || NR==1 p" > ./data/TTG_filtered_data
    Hash: a4b479b80f98e26a93f10b22729a000f

## point_mutations.Rmd
---
title: "RAP1A and TRPM8 point mutations"
output: html_notebook
---

We are looking for mutations in TRPM8 in positions `Y240` and `E207`, and in RAP1A in positions `K31` and `Y32`.

```{r}
library(tidyverse)

clinical_data <- read.csv("./data/clean_clinical_metadata.csv")
all_mutations <- read_tsv("./data/GDC-PANCAN.mutect2_snv.tsv")
```

We add the annotations for the tumor type to the mutation database.

```{r}
all_mutations$patient_id <- sapply(all_mutations$Sample_ID, str_trunc, width = 12, side = "right", ellipsis = "")

all_mutations <- merge(all_mutations, clinical_data[,c("tumor_id", "submitter_id")], by.x = "patient_id", by.y = "submitter_id")
```

We start by keeping only putative missense mutations.

```{r}
all_mutations |>
  filter(gene == "RAP1A" | gene == "TRPM8", effect == "missense_variant") ->
  filtered_mutations
```

See the numerosities:

```{r}
cat(paste0(
  "Unique patients: ", length(unique(all_mutations$patient_id)), "\n",
  "Samples: ", length(unique(all_mutations$Sample_ID)), "\n",
  "Mutations: ", nrow(all_mutations), "\n",
  "RAP1 miss mutations (pancan): ", nrow(filter(filtered_mutations, gene == "RAP1A")), "\n",
  "TRPM8 miss mutations (pancan): ", nrow(filter(filtered_mutations, gene == "TRPM8")), "\n",
  "RAP1 mm (pancan) norm: ", round((nrow(filter(filtered_mutations, gene == "RAP1A")) / 184) * 100, 2), "%\n",
  "TRPM8 mm (pancan) norm: ", round((nrow(filter(filtered_mutations, gene == "TRPM8")) / 1104) * 100, 2), "%\n"
))
```

We look for the mutations of interest:

```{r}
filtered_mutations |>
  filter(grepl("E207", Amino_Acid_Change) | grepl("Y240", Amino_Acid_Change), gene == "TRPM8") |>
  print()
```

```{r}
filtered_mutations |>
  filter(grepl("K31", Amino_Acid_Change) | grepl("Y32", Amino_Acid_Change), gene == "RAP1A") |>
  print()
```

We find just one mutation of interest. Prep the data to look for mutations in a region:

```{r}
filtered_mutations$Amino_Acid_Change |> str_match("([0-9]+)") -> p
  p[,1] |> as.numeric() -> filtered_mutations$amino_acid_position
```

Find mutations around the positions of interest:

```{r}
# Window of 10 AAs
filtered_mutations |>
  filter(
    gene == "TRPM8",
    amino_acid_position >= 235, amino_acid_position <= 245
  ) |> print()

# Window of 10 AAs
filtered_mutations |>
  filter(
    gene == "TRPM8",
    amino_acid_position >= 202, amino_acid_position <= 212
  ) |> print()

# Window of 10 AAs
filtered_mutations |>
  filter(
    gene == "RAP1A",
    amino_acid_position >= 25, amino_acid_position <= 35
  ) |> print()
```

## versions.txt
## PYTHON ##
Interpreter: Python 3.10.2
Packages:
	appdirs==1.4.4
	application-utility==1.3.2
	asttokens==2.0.5
	backcall==0.2.0
	black==22.1.0
	btrfsutil==5.16.2
	build==0.7.0
	CacheControl==0.12.6
	ceph==1.0.0
	ceph-volume==1.0.0
	cephfs==2.0.0
	cephfs-shell==0.0.1
	certifi==2021.10.8
	cffi==1.15.0
	chardet==4.0.0
	click==8.0.3
	colorama==0.4.4
	contextlib2==0.6.0.post1
	cryptography==36.0.1
	decorator==5.1.1
	distlib==0.3.4
	distro==1.7.0
	dnspython==2.2.0
	docopt==0.6.2
	Edmund @ file:///home/hedmad/Files/repos/Edmund/dist/Edmund-0.0.1-py3-none-any.whl
	executing==0.8.2
	filelock==3.4.1
	guake==3.8.6.dev0
	gufw==21.4.0
	html5lib==1.1
	idna==3.3
	importlib-metadata==4.8.1
	ipython==8.0.1
	jedi==0.17.2
	keyutils==0.6
	lightdm-gtk-greeter-settings==1.2.2
	lit==13.0.1.dev0
	manjaro-sdk==0.1
	Markdown==3.3.6
	matplotlib-inline==0.1.3
	menulibre==2.2.3
	more-itertools==8.10.0
	msgpack==1.0.3
	mugshot==0.4.3
	mypy-extensions==0.4.3
	npyscreen==4.10.5
	ordered-set==4.0.2
	packaging==20.9
	pacman-mirrors==4.23.2
	parso==0.7.1
	pathspec==0.9.0
	pbr==5.8.1
	pep517==0.12.0
	pexpect==4.8.0
	pickleshare==0.7.5
	platformdirs==2.4.1
	pluggy==1.0.0
	ply==3.11
	progress==1.6
	prompt-toolkit==3.0.28
	psutil==5.9.0
	ptyprocess==0.7.0
	pure-eval==0.2.2
	pycairo==1.20.1
	pycparser==2.21
	Pygments==2.11.2
	PyGObject==3.42.0
	pyOpenSSL==21.0.0
	pyparsing==3.0.0
	PySide6==6.2.3
	python-jsonrpc-server==0.4.0
	python-language-server==0.36.2
	pyxdg==0.27
	PyYAML==6.0
	rados==2.0.0
	rbd==2.0.0
	requests==2.27.1
	resolvelib==0.5.5
	retrying==1.3.3
	rgw==2.0.0
	shiboken6==6.2.3
	six==1.16.0
	stack-data==0.1.4
	team==1.0
	toml==0.10.2
	tomli==2.0.0
	traitlets==5.1.1
	udiskie==2.4.1
	ufw==0.36.1
	ujson==5.1.0
	urllib3==1.26.8
	virtualenv==20.11.0
	wcwidth==0.2.5
	webencodings==0.5.1
	zipp==3.7.0

## R ##
Interpreter: R version 4.1.2 (2021-11-01) -- "Bird Hippie"
Packages:
	annotate==1.72.0
	AnnotationDbi==1.56.2
	askpass==1.1
	assertthat==0.2.1
	backports==1.4.1
	base64enc==0.1-3
	BH==1.78.0-0
	Biobase==2.54.0
	BiocGenerics==0.40.0
	BiocManager==1.30.16
	BiocParallel==1.28.3
	BiocVersion==3.14.0
	Biostrings==2.62.0
	bit==4.0.4
	bit64==4.0.5
	bitops==1.0-7
	blob==1.2.2
	broom==0.7.12
	BWStest==0.2.2
	cachem==1.0.6
	callr==3.7.0
	cellranger==1.1.0
	cli==3.2.0
	clipr==0.7.1
	colorspace==2.0-3
	cowplot==1.1.1
	cpp11==0.4.2
	crayon==1.5.0
	curl==4.3.2
	data.table==1.14.2
	DBI==1.1.2
	dbplyr==2.1.1
	DelayedArray==0.20.0
	DESeq2==1.34.0
	digest==0.6.29
	dplyr==1.0.8
	dtplyr==1.2.1
	ellipsis==0.3.2
	evaluate==0.15
	fansi==1.0.2
	farver==2.1.0
	fastmap==1.1.0
	forcats==0.5.1
	formatR==1.11
	fs==1.5.2
	futile.logger==1.4.3
	futile.options==1.0.1
	gargle==1.2.0
	genefilter==1.76.0
	geneplotter==1.72.0
	generics==0.1.2
	GenomeInfoDb==1.30.1
	GenomeInfoDbData==1.2.7
	GenomicRanges==1.46.1
	ggplot2==3.3.5
	ggsignif==0.6.3
	glue==1.6.1
	gmp==0.6-4
	googledrive==2.0.0
	googlesheets4==1.0.0
	gridExtra==2.3
	gtable==0.3.0
	haven==2.4.3
	highr==0.9
	hms==1.1.1
	htmltools==0.5.2
	httr==1.4.2
	ids==1.0.1
	IRanges==2.28.0
	isoband==0.2.5
	jquerylib==0.1.4
	jsonlite==1.7.3
	KEGGREST==1.34.0
	knitr==1.37
	kSamples==1.2-9
	labeling==0.4.2
	lambda.r==1.2.4
	lifecycle==1.0.1
	limma==3.50.1
	locfit==1.5-9.4
	logger==0.2.2
	lubridate==1.8.0
	magrittr==2.0.2
	markdown==1.1
	MatrixGenerics==1.6.0
	matrixStats==0.61.0
	memoise==2.0.1
	mime==0.12
	modelr==0.1.8
	multcomp==1.4-18
	multcompView==0.1-8
	munsell==0.5.0
	mvtnorm==1.1-3
	openssl==1.4.6
	pillar==1.7.0
	pkgconfig==2.0.3
	plogr==0.2.0
	plyr==1.8.6
	PMCMRplus==1.9.3
	png==0.1-7
	prettyunits==1.1.1
	processx==3.5.2
	progress==1.2.2
	ps==1.6.0
	purrr==0.3.4
	R6==2.5.1
	rappdirs==0.3.3
	RColorBrewer==1.1-2
	Rcpp==1.0.8
	RcppArmadillo==0.10.8.1.0
	RCurl==1.98-1.6
	readr==2.1.2
	readxl==1.3.1
	rematch==1.0.1
	rematch2==2.1.2
	renv==0.15.2
	reprex==2.0.1
	reshape2==1.4.4
	rlang==1.0.1
	rmarkdown==2.11
	Rmpfr==0.8-7
	RSQLite==2.2.10
	rstudioapi==0.13
	rvest==1.0.2
	S4Vectors==0.32.3
	sandwich==3.0-1
	scales==1.1.1
	selectr==0.4-2
	snow==0.4-4
	stringi==1.7.6
	stringr==1.4.0
	SummarizedExperiment==1.24.0
	SuppDists==1.1-9.7
	sys==3.4
	TH.data==1.1-0
	tibble==3.1.6
	tidyr==1.2.0
	tidyselect==1.1.2
	tidyverse==1.3.1
	tinytex==0.37
	tzdb==0.2.0
	utf8==1.2.2
	uuid==1.0-3
	vctrs==0.3.8
	viridisLite==0.4.0
	vroom==1.5.7
	withr==2.4.3
	xfun==0.29
	XML==3.99-0.8
	xml2==1.3.3
	xtable==1.8-4
	XVector==0.34.0
	yaml==2.3.5
	zlibbioc==1.40.0
	zoo==1.8-9
	base==4.1.2
	boot==1.3-28
	class==7.3-19
	cluster==2.1.2
	codetools==0.2-18
	compiler==4.1.2
	datasets==4.1.2
	foreign==0.8-81
	graphics==4.1.2
	grDevices==4.1.2
	grid==4.1.2
	KernSmooth==2.23-20
	lattice==0.20-45
	MASS==7.3-54
	Matrix==1.3-4
	methods==4.1.2
	mgcv==1.8-38
	nlme==3.1-153
	nnet==7.3-16
	parallel==4.1.2
	rpart==4.1-15
	spatial==7.3-14
	splines==4.1.2
	stats==4.1.2
	stats4==4.1.2
	survival==3.2-13
	tcltk==4.1.2
	tools==4.1.2
	utils==4.1.2
	# The anova analysis for RAP1A
	# Made hastily once we deemed Deseq2 not the best for the job
	# Therefore, it is sloppy and bad. Sorry to whomever has to read this.

	library(tidyverse)
	library(cowplot) # Plot grids
	library(ggsignif) # Significance bars on the plots
	library(PMCMRplus) # Dunn test

	# The expression of the two genes
	data <- read_tsv("./data/TTG_filtered_data")

	# Metadata for TCGA and GTEx
	clinical_data <- read_csv("./data/clean_clinical_metadata.csv")
	gtex_metadata <- read_tsv("./data/GTEX_phenotype")

	#### << Functions >> ###

	#' Make simpler clinical metadata and filtering for the different cohorts
	preprocess_metadata <- function(clinical_data) {
	# Get only the cols of interest
	metadata <- clinical_data[, c("tumor_id", "submitter_id")]

	# Generate a new column "status_metastatic"
	status <- rep("non_metastatic", length(clinical_data$tumor_id))

	figo_stage <- as.character(clinical_data$figo_stage)
	figo_tf <- startsWith(figo_stage, "Stage III") \| startsWith(figo_stage, "Stage IV")
	figo_tf[is.na(figo_tf)] <- FALSE
	# The levels to select were inspected manually.
	status_key <-
	! clinical_data$ajcc_clinical_m %in% c(NA, "M0", "MX") \|
	! clinical_data$ajcc_pathologic_m %in% c(NA, "cM0 (i+)", "M0", "MX") \|
	clinical_data$ajcc_clinical_stage %in% c("Stage IV", "Stage IVA", "Stage IVB", "Stage IVC") \|
	figo_tf

	status[status_key] <- "metastatic"

	metadata$status_metastatic <- factor(status, c("metastatic", "non_metastatic"))

	# Generate a new column "status_limph"
	status <- rep("primary_tumor", length(clinical_data$tumor_id))

	figo_stage <- as.character(clinical_data$figo_stage)
	figo_tf <- startsWith(figo_stage, "Stage III") \| startsWith(figo_stage, "Stage IV")
	figo_tf[is.na(figo_tf)] <- FALSE
	# The levels to select were inspected manually.
	status_key <-
	! clinical_data$ajcc_clinical_m %in% c(NA, "M0", "MX") \|
	! clinical_data$ajcc_pathologic_m %in% c(NA, "cM0 (i+)", "M0", "MX") \|
	! clinical_data$ajcc_clinical_n %in% c(NA, "N0", "NX") \|
	! clinical_data$ajcc_pathologic_n %in% c(NA, "N0", "N0 (i-)", "N0 (i+)", "N0 (mol+)", "NX") \|
	clinical_data$ajcc_clinical_stage %in% c("Stage IV", "Stage IVA", "Stage IVB", "Stage IVC") \|
	figo_tf

	status[status_key] <- "limph_nodes+metastatic"
	metadata$status_limph <- factor(status, c("limph_nodes+metastatic", "primary_tumor"))

	return(metadata)
	}

	#' Trunc the TCGA ID to just the patient id
	trunc_to_pid <- function(x) {
	return(str_trunc(x, 12, side = "right", ellipsis = ""))
	}

	#' Get the metadata from the whole metadata set from the set of IDs.
	get_metadata_from_id <- function(x) {
	if (startsWith(x, "GTEX")) {
	if (! x %in% gtex_metadata$Sample) {print(paste("Cannot find", x)); return(NULL)}
	return(list(x, gtex_metadata$`_primary_site`[gtex_metadata$Sample == x], "healthy", "healthy"))
	}
	if (startsWith(x, "TCGA")) {
	if (! trunc_to_pid(x) %in% simple_clinical_data$submitter_id) {print(paste("Cannot find", x)); return(NULL)}
	return(list(
	x,
	simple_clinical_data$tumor_id[simple_clinical_data$submitter_id == trunc_to_pid(x)],
	as.character(simple_clinical_data$status_metastatic[simple_clinical_data$submitter_id == trunc_to_pid(x)]),
	as.character(simple_clinical_data$status_limph[simple_clinical_data$submitter_id == trunc_to_pid(x)])
	))
	}
	}

	#' Bind the result of `get_metadata_from_id` together.
	bind <- function(x, y) {
	if (typeof(x) == "list") {
	x <- unlist(x)
	}
	y <- unlist(y)

	if (startsWith(y[3], "TCGA")) {print(y)}
	return(rbind(x, y))
	}

	calc_median_diff <- function(data, x, y) {
	# We do x - y
	return(median(data$exprs[data$status == x]) - median(data$exprs[data$status == y]))
	}

	get_diff <- function(data, formula) {
	splif <- unlist(str_split(formula, "-"))
	return(calc_median_diff(data, splif[1], splif[2]))
	}

	#' Analyze some data with an exprs col and a status col
	analyze <- function(data) {
	# Numerosity
	table(data$source, data$status) \|> print()

	# Normality of the data
	print("NORMALITY")
	data[, c("exprs", "status")] \|> group_by(status) \|>
	group_map(\(x, y) {ks.test(x$exprs, "rnorm") \|> print(); ks.test(x$exprs, "rnorm")$p.value}) \|>
	unlist() -> ks.pvals

	if (any(ks.pvals < 0.05 & table(data$status) < 30)) {
	print("Running in non-parametric mode.")
	kru.mod <- kruskal.test(exprs ~ status, data = data)
	dunn_res <- kwAllPairsDunnTest(exprs ~ status, data = data, p.adjust.method = "bonferroni")

	m <- dunn_res$p.value
	dunn_rnms <- paste0(rownames(m)[row(m)[lower.tri(m, diag = TRUE)]], "-", colnames(m)[col(m)[lower.tri(m, diag = TRUE)]])
	dunn_rest <- data.frame(
	`p.adj` = m[lower.tri(m, diag = TRUE)],
	row.names = dunn_rnms
	)

	# Calculate differences in the median
	dunn_rest$diff <- sapply(dunn_rnms, get_diff, data = data)

	print(as.data.frame(dunn_rest))

	return(as.data.frame(dunn_rest))

	} else {
	# Make anova model
	anov.mod <- aov(exprs ~ status, data = data)

	# Variance of the data
	print("VARIANCE")
	plot(anov.mod, 1) \|> print()

	# summary of anova
	summary(anov.mod) \|> print()

	# Post test
	tukey_res <- TukeyHSD(anov.mod)
	tukey_res \|> print()
	tukey_res \|> plot()

	colnames(tukey_res$status) <- c("diff", "lwr", "upr", "p.adj")
	return(as.data.frame(tukey_res$status))
	}

	}

	# Deanonimize for pipes
	to_frame <- \(x) {return(data.frame(sample_id = x[,1], source = x[,2], status_metastatic = x[,3], status_limph = x[,4]))}
	omit_null <- \(x) {return(x[! sapply(x, is.null)])}
	prreduce <- purrr::reduce

	prep_for_analysis <- function(x, stat_type) {
	data.frame(exprs = x$exprs, source = x$source, status = as.factor(x[, stat_type]))
	}

	#### <<< END Functions >>> ###

	simple_clinical_data <- preprocess_metadata(clinical_data)

	all_colnames <- colnames(data)
	# We clear out the `TARGET` Ids - we don't need them
	intr_colnames <- all_colnames[! startsWith(all_colnames, "TARGET")]

	# We make the metadata for our samples
	intr_colnames \|> map(get_metadata_from_id) \|>
	omit_null() \|> prreduce(bind) \|> to_frame() \|>
	mutate(status_metastatic = as.factor(status_metastatic), status_limph = as.factor(status_limph)) \|>
	remove_rownames() -> metadata

	data \|> remove_rownames() \|> column_to_rownames("sample") -> data

	# Ready for the analysis

	RAP1 <- data["RAP1A", , drop=FALSE]
	TRPM8 <- data["TRPM8", , drop=FALSE]

	# Reshape
	RAP1r <- data.frame(exprs = unlist(RAP1[1,]))
	rownames(RAP1r) <- colnames(RAP1)

	TRPMr <- data.frame(exprs = unlist(TRPM8[1,]))
	rownames(TRPMr) <- colnames(TRPM8)

	# Merge with metadata
	merge(TRPMr, metadata, by.x = 0, by.y = "sample_id") \|>
	remove_rownames() \|> column_to_rownames("Row.names") -> TRPMrm

	merge(RAP1r, metadata, by.x = 0, by.y = "sample_id") \|>
	remove_rownames() \|> column_to_rownames("Row.names") -> RAP1rm

	results <- list()

	# Running on the "meta/non meta" labelling
	# Run this one at a time to check for assumptions (plots + messages)
	RAP1rm \|> filter(source == "Breast" \| source == "TCGA-BRCA") \|>
	prep_for_analysis("status_metastatic") \|> analyze() -> results$RBRCA_meta
	RAP1rm \|> filter(source == "Cervix Uteri" \| source == "Uterus" \| source == "TCGA-UCEC") \|>
	prep_for_analysis("status_metastatic") \|> analyze() -> results$RUCEC_meta
	RAP1rm \|> filter(source == "Prostate" \| source == "TCGA-PRAD") \|>
	prep_for_analysis("status_metastatic") \|> analyze() -> results$RPRAD_meta

	TRPMrm \|> filter(source == "Breast" \| source == "TCGA-BRCA") \|>
	prep_for_analysis("status_metastatic") \|> analyze() -> results$TBRCA_meta
	TRPMrm \|> filter(source == "Cervix Uteri" \| source == "Uterus" \| source == "TCGA-UCEC") \|>
	prep_for_analysis("status_metastatic") \|> analyze() -> results$TUCEC_meta
	TRPMrm \|> filter(source == "Prostate" \| source == "TCGA-PRAD") \|>
	prep_for_analysis("status_metastatic") \|> analyze() -> results$TPRAD_meta

	# Running on the "limph/non limph" labelling
	RAP1rm \|> filter(source == "Breast" \| source == "TCGA-BRCA") \|>
	prep_for_analysis("status_limph") \|> analyze() -> results$RBRCA_limph
	RAP1rm \|> filter(source == "Cervix Uteri" \| source == "Uterus" \| source == "TCGA-UCEC") \|>
	prep_for_analysis("status_limph") \|> analyze() -> results$RUCEC_limph
	RAP1rm \|> filter(source == "Prostate" \| source == "TCGA-PRAD") \|>
	prep_for_analysis("status_limph") \|> analyze() -> results$RPRAD_limph

	TRPMrm \|> filter(source == "Breast" \| source == "TCGA-BRCA") \|>
	prep_for_analysis("status_limph") \|> analyze() -> results$TBRCA_limph
	TRPMrm \|> filter(source == "Cervix Uteri" \| source == "Uterus" \| source == "TCGA-UCEC") \|>
	prep_for_analysis("status_limph") \|> analyze() -> results$TUCEC_limph
	TRPMrm \|> filter(source == "Prostate" \| source == "TCGA-PRAD") \|>
	prep_for_analysis("status_limph") \|> analyze() -> results$TPRAD_limph


	### <<< Plotting and tabulating results >>> ###
	# Add significance
	sig <- function(pval) {
	if (pval < 0.001) {"*"} else if (pval < 0.01) {""} else if (pval < 0.05) {"*"} else {"-"}
	}

	add_sig <- function(x) {
	x$sig <- sapply(x$`p.adj`, sig)
	return(x)
	}

	results2 <- lapply(results, add_sig)

	# See a summary
	see_res <- function() {
	oo <- options(digits = 3)
	on.exit(options(oo))

	sr <- function(x) {
	# Select and round down
	x \|> mutate(across(everything(), format(digits = 3)))
	}

	print("RAP1A - PRAD - META")
	results2$RPRAD_meta[, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")
	print("TRPM8 - PRAD - META")
	results2$TPRAD_meta[, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")

	print("RAP1A - BRCA - META")
	results2$RBRCA_meta[, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")
	print("TRPM8 - BRCA - META")
	results2$TBRCA_meta[, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")

	print("RAP1A - UCEC - META")
	results2$RUCEC_meta[, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")
	print("TRPM8 - UCEC - META")
	results2$TUCEC_meta[, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")
	cat("---------------\n")

	ord <- c(2, 1, 3)

	print("RAP1A - PRAD - LIMPH")
	results2$RPRAD_limph[ord, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")
	print("TRPM8 - PRAD - LIMPH")
	results2$TPRAD_limph[ord, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")

	print("RAP1A - BRCA - LIMPH")
	results2$RBRCA_limph[ord, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")
	print("TRPM8 - BRCA - LIMPH")
	results2$TBRCA_limph[ord, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")

	print("RAP1A - UCEC - LIMPH")
	results2$RUCEC_limph[ord, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")
	print("TRPM8 - UCEC - LIMPH")
	results2$TUCEC_limph[ord, c("diff", "p.adj", "sig")] \|> print()
	cat("\n")

	}

	see_res()

	# For some reason this is needed or the plot saving later fails.
	graphics.off()


	### PLOTS
	bplot <- function(
	data, title,
	extra.colours = c("#048ab3", "lightblue"), comps = c(TRUE, TRUE, TRUE), addn = TRUE
	) {
	give.n <- function(x){
	return(c(y = min(data$exprs), label = length(x)))
	}

	# Reorder data groups
	if ("non_metastatic" %in% levels(data$status)) {
	data$status <- factor(data$status, levels = c("healthy", "non_metastatic", "metastatic"))
	all_comps <- list(c("healthy", "metastatic"), c("healthy", "non_metastatic"), c("non_metastatic", "metastatic"))
	} else {
	data$status <- factor(data$status, levels = c("healthy", "primary_tumor", "limph_nodes+metastatic"))
	all_comps <- list(c("healthy", "primary_tumor"), c("healthy", "limph_nodes+metastatic"), c("primary_tumor", "limph_nodes+metastatic"))
	}

	p <- ggplot(data = data, aes(x = status, y = exprs, fill = status)) +
	scale_fill_manual(values = c("lightgray", extra.colours)) +
	geom_violin() +
	geom_boxplot(width = 0.1, outlier.color = "#00ff1a", outlier.alpha = 0.5, outlier.size = 0.8, outlier.shape = 4) +
	ggtitle(title) +
	ylab("Normalized Expression (log2)") +
	xlab("Status") +
	theme_bw() +
	theme(legend.position = "none", plot.title = element_text(size = 10), axis.title = element_text(size = 8)) +
	geom_signif(
	comparisons = all_comps[comps],
	map_signif_level = TRUE,
	step_increase = 0.1,
	vjust = 0.5
	)

	if (addn) {
	p <- p +
	stat_summary(
	fun.data = give.n, geom = "text",
	position = position_nudge(y = - 0.5), size = 2.5,
	colour = "black",
	)
	}

	return(p)
	}


	regenerate_plots <- function(stat_type, code) {
	print(stat_type)
	print(code)
	plots <- list()
	RAP1rm \|> filter(source == "Prostate" \| source == "TCGA-PRAD") \|>
	prep_for_analysis(stat_type) \|>
	bplot("RAP1A - Prostate Dataset", c("#c40000", "#e83e00"), c(FALSE, FALSE, FALSE)) -> plots$RPRO
	TRPMrm \|> filter(source == "Prostate" \| source == "TCGA-PRAD") \|>
	prep_for_analysis(stat_type) \|>
	bplot("TRPM8 - Prostate Dataset", c("#c40000", "#e83e00"), c(FALSE, TRUE, FALSE)) -> plots$TPRO

	RAP1rm \|> filter(source == "Breast" \| source == "TCGA-BRCA") \|>
	prep_for_analysis(stat_type) \|>
	bplot("RAP1A - Breast Dataset", comps = c(FALSE, FALSE, FALSE)) -> plots$RBRE
	TRPMrm \|> filter(source == "Breast" \| source == "TCGA-BRCA") \|>
	prep_for_analysis(stat_type) \|>
	bplot("TRPM8 - Breast Dataset", comps = c(FALSE, TRUE, FALSE)) -> plots$TBRE

	RAP1rm \|> filter(source == "Cervix Uteri" \| source == "Uterus" \| source == "TCGA-UCEC") \|>
	prep_for_analysis(stat_type) \|>
	bplot("RAP1A - Uterus Dataset", c("#8f3199", "#bf41cc"), c(TRUE, TRUE, FALSE)) -> plots$RUTE
	TRPMrm \|> filter(source == "Cervix Uteri" \| source == "Uterus" \| source == "TCGA-UCEC") \|>
	prep_for_analysis(stat_type) \|>
	bplot("TRPM8 - Uterus Dataset", c("#8f3199", "#bf41cc"), c(TRUE, TRUE, FALSE)) -> plots$TUTE

	pdf(file = paste0('./results/', code, '_results.pdf'), height = 11, width = 9)
	tiff(file = paste0('./results/', code, '_results.tiff'), height = 6600, width = 5400, res = 600)
	png(file = paste0('./results/', code, '_results.tiff'), height = 6600, width = 5400, res = 600)
	for (i in 1:3) {
	print(i)
	print(dev.cur())
	plot_grid(
	plots$RPRO, plots$TPRO,
	plots$RBRE, plots$TBRE,
	plots$RUTE, plots$TUTE,
	ncol = 2
	) \|> print()
	# Cycle through the devices
	dev.set(dev.prev())
	}
	graphics.off()
	}

	regenerate_all_plots <- function() {
	regenerate_plots("status_limph", "LIMPH")
	regenerate_plots("status_metastatic", "NOLIMPH")
	}

	regenerate_all_plots()

	## EXTRAS ##
	# See the TRPM8 zero counts. Not exactly zero, but less than 1

	count_n <- function(x, y, fun) {
	matching <- sum(fun(x$exprs))
	percent <- (matching / length(x$exprs)) * 100
	print(paste0(y$status[1], ": ", matching, " - ", round(percent, 2), "%"))
	}

	TRPMrm \|> filter(source == "Cervix Uteri" \| source == "Uterus" \| source == "TCGA-UCEC") \|>
	group_by(status) \|> group_walk(count_n, fun = \(x) {x < 1})

	TRPMrm \|> filter(source == "Breast" \| source == "TCGA-BRCA") \|>
	group_by(status) \|> group_walk(count_n, fun = \(x) {x < 1})

	[GTEX_phenotype] :: https://toil-xena-hub.s3.us-east-1.amazonaws.com/download/GTEX_phenotype.gz
	On: 22.02.24-17:52:46
	Desc: The metadata for the GTEX patients in TcgaTargetGtex_gene_expected_count
	Prep: gunzip -k ${in}
	Hash: 32449abd7951f44b469e1a103acce0ff


	[clean_clinical_metadata.csv] :: TCGA_clean_clinical_data.pipe
	On: 22.02.25-10:43:34
	Desc: All (cleaned) clinical metadata from the TCGA consortium.
	Prep:
	Hash: 7dfd9078f6fc69d96d2187e3cb24b0b7


	[GDC-PANCAN.mutect2_snv.tsv] :: https://gdc-hub.s3.us-east-1.amazonaws.com/download/GDC-PANCAN.mutect2_snv.tsv.gz
	On: 22.02.25-10:46:21
	Desc: All mutations from the TCGA project detected with Mutect2
	Prep: gunzip -k ${in}
	Hash: acfbcfa6ff059f35eb76306e4b1ec933


	[TTG_filtered_data] :: https://toil-xena-hub.s3.us-east-1.amazonaws.com/download/TcgaTargetGtex_RSEM_Hugo_norm_count.gz
	On: 22.03.04-11:41:55
	Desc: Just the two genes of interest from the Whole normalized counts of TCGA/TARGET/GTEX dataset from Xena
	Prep: gunzip -c ${in} \| awk "/^RAP1A/ \|\| /^TRPM8/ \|\| NR==1 p" > ./data/TTG_filtered_data
	Hash: a4b479b80f98e26a93f10b22729a000f
	---
	title: "RAP1A and TRPM8 point mutations"
	output: html_notebook
	---

	We are looking for mutations in TRPM8 in positions `Y240` and `E207`, and in RAP1A in positions `K31` and `Y32`.

	```{r}
	library(tidyverse)

	clinical_data <- read.csv("./data/clean_clinical_metadata.csv")
	all_mutations <- read_tsv("./data/GDC-PANCAN.mutect2_snv.tsv")
	```

	We add the annotations for the tumor type to the mutation database.

	```{r}
	all_mutations$patient_id <- sapply(all_mutations$Sample_ID, str_trunc, width = 12, side = "right", ellipsis = "")

	all_mutations <- merge(all_mutations, clinical_data[,c("tumor_id", "submitter_id")], by.x = "patient_id", by.y = "submitter_id")
	```

	We start by keeping only putative missense mutations.

	```{r}
	all_mutations \|>
	filter(gene == "RAP1A" \| gene == "TRPM8", effect == "missense_variant") ->
	filtered_mutations
	```

	See the numerosities:

	```{r}
	cat(paste0(
	"Unique patients: ", length(unique(all_mutations$patient_id)), "\n",
	"Samples: ", length(unique(all_mutations$Sample_ID)), "\n",
	"Mutations: ", nrow(all_mutations), "\n",
	"RAP1 miss mutations (pancan): ", nrow(filter(filtered_mutations, gene == "RAP1A")), "\n",
	"TRPM8 miss mutations (pancan): ", nrow(filter(filtered_mutations, gene == "TRPM8")), "\n",
	"RAP1 mm (pancan) norm: ", round((nrow(filter(filtered_mutations, gene == "RAP1A")) / 184) * 100, 2), "%\n",
	"TRPM8 mm (pancan) norm: ", round((nrow(filter(filtered_mutations, gene == "TRPM8")) / 1104) * 100, 2), "%\n"
	))
	```

	We look for the mutations of interest:

	```{r}
	filtered_mutations \|>
	filter(grepl("E207", Amino_Acid_Change) \| grepl("Y240", Amino_Acid_Change), gene == "TRPM8") \|>
	print()
	```

	```{r}
	filtered_mutations \|>
	filter(grepl("K31", Amino_Acid_Change) \| grepl("Y32", Amino_Acid_Change), gene == "RAP1A") \|>
	print()
	```

	We find just one mutation of interest. Prep the data to look for mutations in a region:

	```{r}
	filtered_mutations$Amino_Acid_Change \|> str_match("([0-9]+)") -> p
	p[,1] \|> as.numeric() -> filtered_mutations$amino_acid_position
	```

	Find mutations around the positions of interest:

	```{r}
	# Window of 10 AAs
	filtered_mutations \|>
	filter(
	gene == "TRPM8",
	amino_acid_position >= 235, amino_acid_position <= 245
	) \|> print()

	# Window of 10 AAs
	filtered_mutations \|>
	filter(
	gene == "TRPM8",
	amino_acid_position >= 202, amino_acid_position <= 212
	) \|> print()

	# Window of 10 AAs
	filtered_mutations \|>
	filter(
	gene == "RAP1A",
	amino_acid_position >= 25, amino_acid_position <= 35
	) \|> print()
	```
	## PYTHON ##
	Interpreter: Python 3.10.2
	Packages:
	appdirs==1.4.4
	application-utility==1.3.2
	asttokens==2.0.5
	backcall==0.2.0
	black==22.1.0
	btrfsutil==5.16.2
	build==0.7.0
	CacheControl==0.12.6
	ceph==1.0.0
	ceph-volume==1.0.0
	cephfs==2.0.0
	cephfs-shell==0.0.1
	certifi==2021.10.8
	cffi==1.15.0
	chardet==4.0.0
	click==8.0.3
	colorama==0.4.4
	contextlib2==0.6.0.post1
	cryptography==36.0.1
	decorator==5.1.1
	distlib==0.3.4
	distro==1.7.0
	dnspython==2.2.0
	docopt==0.6.2
	Edmund @ file:///home/hedmad/Files/repos/Edmund/dist/Edmund-0.0.1-py3-none-any.whl
	executing==0.8.2
	filelock==3.4.1
	guake==3.8.6.dev0
	gufw==21.4.0
	html5lib==1.1
	idna==3.3
	importlib-metadata==4.8.1
	ipython==8.0.1
	jedi==0.17.2
	keyutils==0.6
	lightdm-gtk-greeter-settings==1.2.2
	lit==13.0.1.dev0
	manjaro-sdk==0.1
	Markdown==3.3.6
	matplotlib-inline==0.1.3
	menulibre==2.2.3
	more-itertools==8.10.0
	msgpack==1.0.3
	mugshot==0.4.3
	mypy-extensions==0.4.3
	npyscreen==4.10.5
	ordered-set==4.0.2
	packaging==20.9
	pacman-mirrors==4.23.2
	parso==0.7.1
	pathspec==0.9.0
	pbr==5.8.1
	pep517==0.12.0
	pexpect==4.8.0
	pickleshare==0.7.5
	platformdirs==2.4.1
	pluggy==1.0.0
	ply==3.11
	progress==1.6
	prompt-toolkit==3.0.28
	psutil==5.9.0
	ptyprocess==0.7.0
	pure-eval==0.2.2
	pycairo==1.20.1
	pycparser==2.21
	Pygments==2.11.2
	PyGObject==3.42.0
	pyOpenSSL==21.0.0
	pyparsing==3.0.0
	PySide6==6.2.3
	python-jsonrpc-server==0.4.0
	python-language-server==0.36.2
	pyxdg==0.27
	PyYAML==6.0
	rados==2.0.0
	rbd==2.0.0
	requests==2.27.1
	resolvelib==0.5.5
	retrying==1.3.3
	rgw==2.0.0
	shiboken6==6.2.3
	six==1.16.0
	stack-data==0.1.4
	team==1.0
	toml==0.10.2
	tomli==2.0.0
	traitlets==5.1.1
	udiskie==2.4.1
	ufw==0.36.1
	ujson==5.1.0
	urllib3==1.26.8
	virtualenv==20.11.0
	wcwidth==0.2.5
	webencodings==0.5.1
	zipp==3.7.0

	## R ##
	Interpreter: R version 4.1.2 (2021-11-01) -- "Bird Hippie"
	Packages:
	annotate==1.72.0
	AnnotationDbi==1.56.2
	askpass==1.1
	assertthat==0.2.1
	backports==1.4.1
	base64enc==0.1-3
	BH==1.78.0-0
	Biobase==2.54.0
	BiocGenerics==0.40.0
	BiocManager==1.30.16
	BiocParallel==1.28.3
	BiocVersion==3.14.0
	Biostrings==2.62.0
	bit==4.0.4
	bit64==4.0.5
	bitops==1.0-7
	blob==1.2.2
	broom==0.7.12
	BWStest==0.2.2
	cachem==1.0.6
	callr==3.7.0
	cellranger==1.1.0
	cli==3.2.0
	clipr==0.7.1
	colorspace==2.0-3
	cowplot==1.1.1
	cpp11==0.4.2
	crayon==1.5.0
	curl==4.3.2
	data.table==1.14.2
	DBI==1.1.2
	dbplyr==2.1.1
	DelayedArray==0.20.0
	DESeq2==1.34.0
	digest==0.6.29
	dplyr==1.0.8
	dtplyr==1.2.1
	ellipsis==0.3.2
	evaluate==0.15
	fansi==1.0.2
	farver==2.1.0
	fastmap==1.1.0
	forcats==0.5.1
	formatR==1.11
	fs==1.5.2
	futile.logger==1.4.3
	futile.options==1.0.1
	gargle==1.2.0
	genefilter==1.76.0
	geneplotter==1.72.0
	generics==0.1.2
	GenomeInfoDb==1.30.1
	GenomeInfoDbData==1.2.7
	GenomicRanges==1.46.1
	ggplot2==3.3.5
	ggsignif==0.6.3
	glue==1.6.1
	gmp==0.6-4
	googledrive==2.0.0
	googlesheets4==1.0.0
	gridExtra==2.3
	gtable==0.3.0
	haven==2.4.3
	highr==0.9
	hms==1.1.1
	htmltools==0.5.2
	httr==1.4.2
	ids==1.0.1
	IRanges==2.28.0
	isoband==0.2.5
	jquerylib==0.1.4
	jsonlite==1.7.3
	KEGGREST==1.34.0
	knitr==1.37
	kSamples==1.2-9
	labeling==0.4.2
	lambda.r==1.2.4
	lifecycle==1.0.1
	limma==3.50.1
	locfit==1.5-9.4
	logger==0.2.2
	lubridate==1.8.0
	magrittr==2.0.2
	markdown==1.1
	MatrixGenerics==1.6.0
	matrixStats==0.61.0
	memoise==2.0.1
	mime==0.12
	modelr==0.1.8
	multcomp==1.4-18
	multcompView==0.1-8
	munsell==0.5.0
	mvtnorm==1.1-3
	openssl==1.4.6
	pillar==1.7.0
	pkgconfig==2.0.3
	plogr==0.2.0
	plyr==1.8.6
	PMCMRplus==1.9.3
	png==0.1-7
	prettyunits==1.1.1
	processx==3.5.2
	progress==1.2.2
	ps==1.6.0
	purrr==0.3.4
	R6==2.5.1
	rappdirs==0.3.3
	RColorBrewer==1.1-2
	Rcpp==1.0.8
	RcppArmadillo==0.10.8.1.0
	RCurl==1.98-1.6
	readr==2.1.2
	readxl==1.3.1
	rematch==1.0.1
	rematch2==2.1.2
	renv==0.15.2
	reprex==2.0.1
	reshape2==1.4.4
	rlang==1.0.1
	rmarkdown==2.11
	Rmpfr==0.8-7
	RSQLite==2.2.10
	rstudioapi==0.13
	rvest==1.0.2
	S4Vectors==0.32.3
	sandwich==3.0-1
	scales==1.1.1
	selectr==0.4-2
	snow==0.4-4
	stringi==1.7.6
	stringr==1.4.0
	SummarizedExperiment==1.24.0
	SuppDists==1.1-9.7
	sys==3.4
	TH.data==1.1-0
	tibble==3.1.6
	tidyr==1.2.0
	tidyselect==1.1.2
	tidyverse==1.3.1
	tinytex==0.37
	tzdb==0.2.0
	utf8==1.2.2
	uuid==1.0-3
	vctrs==0.3.8
	viridisLite==0.4.0
	vroom==1.5.7
	withr==2.4.3
	xfun==0.29
	XML==3.99-0.8
	xml2==1.3.3
	xtable==1.8-4
	XVector==0.34.0
	yaml==2.3.5
	zlibbioc==1.40.0
	zoo==1.8-9
	base==4.1.2
	boot==1.3-28
	class==7.3-19
	cluster==2.1.2
	codetools==0.2-18
	compiler==4.1.2
	datasets==4.1.2
	foreign==0.8-81
	graphics==4.1.2
	grDevices==4.1.2
	grid==4.1.2
	KernSmooth==2.23-20
	lattice==0.20-45
	MASS==7.3-54
	Matrix==1.3-4
	methods==4.1.2
	mgcv==1.8-38
	nlme==3.1-153
	nnet==7.3-16
	parallel==4.1.2
	rpart==4.1-15
	spatial==7.3-14
	splines==4.1.2
	stats==4.1.2
	stats4==4.1.2
	survival==3.2-13
	tcltk==4.1.2
	tools==4.1.2
	utils==4.1.2