Last active
January 2, 2017 21:04
-
-
Save crazyhottommy/4681a30700b2c0c1ee02cbc875e7c4e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Define intronic, exonic and intergenic regions | |
```{r} | |
library(AnnotationHub) | |
library(dplyr) ## for %>% | |
ah = AnnotationHub() | |
possibleDates(ah) | |
AnnotationHub::query(ah, c("gtf", "Homo_sapiens", "GRCh37")) | |
GRCh37.gtf<- ah[['AH10684']] | |
## subset the gtf files for only protein_coding genes and lincRNAs | |
# GRCh37.gtf<- GRCh37.gtf[GRCh37.gtf$gene_biotype %in% c("protein_coding", "lincRNA")] | |
table(GRCh37.gtf$gene_biotype) | |
## make a txdb | |
library(GenomicFeatures) | |
GRCh37.txdb <- makeTxDbFromGRanges(GRCh37.gtf) | |
## exons | |
exonsBy(GRCh37.txdb, "gene") %>% unlist() %>% reduce() | |
exons(GRCh37.txdb) %>% reduce() | |
## introns | |
intronsByTranscript(GRCh37.txdb) %>% unlist() %>% reduce() | |
### get intergenic regions | |
chrom_grngs <- as(seqinfo(GRCh37.txdb), "GRanges") | |
collapsed_tx <- reduce(transcripts(GRCh37.txdb)) | |
## or use unstrand() | |
strand(collapsed_tx) <- "*" | |
intergenic <- GenomicRanges::setdiff(chrom_grngs, collapsed_tx) | |
fiveUTRsByTranscript(GRCh37.txdb) | |
threeUTRsByTranscript(GRCh37.txdb) | |
``` | |
### CpG islands, shores, | |
```{r} | |
library(AnnotationHub) | |
ah = AnnotationHub() | |
possibleDates(ah) | |
head(unique(ah$rdataclass)) | |
cgi<- ah[["AH5086"]] | |
## follow http://chitka-kalyan.blogspot.com/2014/03/cpg-island-shelves.html | |
############################################################### | |
# Extract CpG island shores | |
############################################################### | |
# extract the shore defined by 2000 bp upstream of cpg islands | |
shore1<- trim(flank(cgi, width = 2000, start= TRUE)) | |
# extract the shore defined by 2000 bp downstream of cpg islands | |
shore2<- trim(flank(cgi, width = 2000, start = FALSE)) | |
# perform intersection and combine the shores where they overlap | |
shore1_2<- reduce(c(shore1,shore2)) | |
# extract the features (ranges) that are present in shores only and not in CpG islands (ie., shores not overlapping islands) | |
cpgi_shores<- setdiff(shore1_2, cgi) | |
cpgi_shores$name<- paste("shore", 1: length(cpgi_shores), sep ="_") | |
############################################################### | |
# Extract CpG island shelves | |
############################################################### | |
# extract the shore defined by 4000 bp upstream of cpg islands | |
shelves1<- trim(flank(cgi, 4000)) | |
# extract the shore defined by 2000 bp downstream of cpg islands | |
shelves2<- trim(flank(cgi, 4000, FALSE)) | |
# perform intersection and combine the shelves where they overlap | |
shelves1_2<- reduce(c(shelves1,shelves2)) | |
# create a set of ranges consisting CpG Islands, Shores | |
island_shores<- c(cgi, cpgi_shores) | |
# extract the features (ranges) that are present in shelves only and not in cpg_islands or shores(ie., shelves not overlapping islands or shores) | |
cpgi_shelves<- setdiff(shelves1_2, island_shores) | |
cpgi_shelves$name= paste("shelf", 1: length(cpgi_shelves), sep ="_") | |
#### use annotatr bioc Package!! | |
library(annotatr) | |
## built-in short cut for cpg island, shores and shelves | |
annots<- 'hg19_cpgs' | |
cpg.annotations<- build_annotations(genome = 'hg19', annotations = annots) | |
table(annotations$type) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment