Created
October 26, 2022 15:08
-
-
Save andreagrioni/a0c42745bf297293b1233866da41a688 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### BiomaRt Biotype Analysis | |
See BioMartR documentation [here](https://cran.r-project.org/web/packages/biomartr/vignettes/Functional_Annotation.html) | |
Load ensembl db annotation. | |
```{r} | |
# load ensembl | |
ensembl <- useEnsembl( | |
biomart = "genes", | |
dataset = "hsapiens_gene_ensembl" | |
) | |
``` | |
retrieve annotation for select proteins. | |
```{r} | |
mapping <- getBM( | |
attributes = c( | |
'ensembl_gene_id', | |
#'ensembl_transcript_id', | |
'hgnc_symbol', | |
"go_id", "namespace_1003", "name_1006"), | |
filters = 'hgnc_symbol', | |
values = my_annotation$EntrezGeneSymbol %>% unique, | |
mart = ensembl | |
) | |
# map | |
mapping | |
``` | |
Sort Biomart Annotation and filter for the three most common terms. | |
```{r} | |
# sort and filter | |
mapping %>% | |
# look for biological processes only | |
dplyr::filter(namespace_1003 == "biological_process") %>% | |
dplyr::group_by(name_1006) %>% | |
dplyr::tally() %>% | |
dplyr::arrange(dplyr::desc(n)) %>% | |
dplyr::slice_head(n=40) %>% | |
dplyr::pull(name_1006) -> my_terms | |
# print out | |
glue::glue("my_terms are:\t{my_terms}") | |
# filter table | |
mapping %>% | |
dplyr::filter(namespace_1003 == "biological_process", name_1006 %in% my_terms) -> placeholder | |
# | |
placeholder %>% | |
dplyr::select(hgnc_symbol, name_1006) %>% | |
dplyr::mutate(value = 1) %>% | |
tidyr::pivot_wider(names_from=name_1006, values_fn=min ) -> my_biomart | |
my_biomart %>% | |
as.data.frame -> my_biomart_df | |
rownames(my_biomart_df) <- my_biomart_df$hgnc_symbol | |
# replace NA with 0 | |
my_biomart_df[is.na(my_biomart_df)] <- 0 | |
``` | |
set color code for heatmap | |
```{r} | |
# set minimum | |
min_val <- -10 | |
# set max | |
max_val <- +10 | |
# Sets the minimum (0), the maximum (15), and the increasing steps (+1) for the color scale | |
# Note: if some of your genes are outside of this range, they will appear white on the heatmap | |
breaksList = seq(min_val, max_val, by = 0.1) | |
# Defines the vector of colors for the legend (it has to be of the same lenght of breaksList) | |
color_code = colorRampPalette(c("blue", "white", "red"))(length(breaksList)) | |
``` | |
make pheatmap | |
```{r, eval=FALSE} | |
# create categorical color vector for terms | |
color_vctr <- c( "N" = "white", "Y" = "red") | |
annoCol <- rep(list(color_vctr), 10) | |
names(annoCol) <- my_terms | |
# draw heatmap | |
pheatmap::pheatmap( | |
my_matrix, | |
scale="row", | |
cluster_rows=T, | |
cluster_cols=T, | |
show_colnames=FALSE, | |
color=color_code, | |
breaks=breaksList, | |
cellwidth=3, | |
cellheight=5, | |
fontsize_row=4, | |
fontsize_col=4, | |
treeheight_row=0.0, | |
treeheight_col=0.0, | |
annotation_row = my_biomart_df %>% dplyr::select(-hgnc_symbol), | |
annotation_col = my_metadata %>% dplyr::select(ARMCD), | |
annotation_colors = annoCol, | |
annotation_names_row=TRUE, | |
annotation_legend=FALSE, | |
# annotation_row=my_annotation %>% dplyr::select(EntrezGeneSymbol), | |
filename=fs::path(output_img, "hm_common.png"), | |
#width = 5, | |
#height = 5 | |
) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment