Skip to content

Instantly share code, notes, and snippets.

@crazyhottommy
Last active January 2, 2017 21:04
Show Gist options
  • Save crazyhottommy/4681a30700b2c0c1ee02cbc875e7c4e9 to your computer and use it in GitHub Desktop.
Save crazyhottommy/4681a30700b2c0c1ee02cbc875e7c4e9 to your computer and use it in GitHub Desktop.
### Define intronic, exonic and intergenic regions
```{r}
library(AnnotationHub)
library(dplyr) ## for %>%
ah = AnnotationHub()
possibleDates(ah)
AnnotationHub::query(ah, c("gtf", "Homo_sapiens", "GRCh37"))
GRCh37.gtf<- ah[['AH10684']]
## subset the gtf files for only protein_coding genes and lincRNAs
# GRCh37.gtf<- GRCh37.gtf[GRCh37.gtf$gene_biotype %in% c("protein_coding", "lincRNA")]
table(GRCh37.gtf$gene_biotype)
## make a txdb
library(GenomicFeatures)
GRCh37.txdb <- makeTxDbFromGRanges(GRCh37.gtf)
## exons
exonsBy(GRCh37.txdb, "gene") %>% unlist() %>% reduce()
exons(GRCh37.txdb) %>% reduce()
## introns
intronsByTranscript(GRCh37.txdb) %>% unlist() %>% reduce()
### get intergenic regions
chrom_grngs <- as(seqinfo(GRCh37.txdb), "GRanges")
collapsed_tx <- reduce(transcripts(GRCh37.txdb))
## or use unstrand()
strand(collapsed_tx) <- "*"
intergenic <- GenomicRanges::setdiff(chrom_grngs, collapsed_tx)
fiveUTRsByTranscript(GRCh37.txdb)
threeUTRsByTranscript(GRCh37.txdb)
```
### CpG islands, shores,
```{r}
library(AnnotationHub)
ah = AnnotationHub()
possibleDates(ah)
head(unique(ah$rdataclass))
cgi<- ah[["AH5086"]]
## follow http://chitka-kalyan.blogspot.com/2014/03/cpg-island-shelves.html
###############################################################
# Extract CpG island shores
###############################################################
# extract the shore defined by 2000 bp upstream of cpg islands
shore1<- trim(flank(cgi, width = 2000, start= TRUE))
# extract the shore defined by 2000 bp downstream of cpg islands
shore2<- trim(flank(cgi, width = 2000, start = FALSE))
# perform intersection and combine the shores where they overlap
shore1_2<- reduce(c(shore1,shore2))
# extract the features (ranges) that are present in shores only and not in CpG islands (ie., shores not overlapping islands)
cpgi_shores<- setdiff(shore1_2, cgi)
cpgi_shores$name<- paste("shore", 1: length(cpgi_shores), sep ="_")
###############################################################
# Extract CpG island shelves
###############################################################
# extract the shore defined by 4000 bp upstream of cpg islands
shelves1<- trim(flank(cgi, 4000))
# extract the shore defined by 2000 bp downstream of cpg islands
shelves2<- trim(flank(cgi, 4000, FALSE))
# perform intersection and combine the shelves where they overlap
shelves1_2<- reduce(c(shelves1,shelves2))
# create a set of ranges consisting CpG Islands, Shores
island_shores<- c(cgi, cpgi_shores)
# extract the features (ranges) that are present in shelves only and not in cpg_islands or shores(ie., shelves not overlapping islands or shores)
cpgi_shelves<- setdiff(shelves1_2, island_shores)
cpgi_shelves$name= paste("shelf", 1: length(cpgi_shelves), sep ="_")
#### use annotatr bioc Package!!
library(annotatr)
## built-in short cut for cpg island, shores and shelves
annots<- 'hg19_cpgs'
cpg.annotations<- build_annotations(genome = 'hg19', annotations = annots)
table(annotations$type)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment