Last active
August 29, 2015 14:04
-
-
Save stephenturner/297212d4d68a25c6ee43 to your computer and use it in GitHub Desktop.
Annotation examples from ISMB 2014
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Basic work flows: from SYMBOL to gene model and sequence | |
# Orig from http://master.bioconductor.org/help/course-materials/2014/ISMB2014/brca1.R | |
library(org.Hs.eg.db) | |
eid <- select(org.Hs.eg.db, "BRCA1", "ENTREZID", "SYMBOL")[["ENTREZID"]] | |
library(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene | |
txid <- select(txdb, eid, "TXNAME", "GENEID")[["TXNAME"]] | |
cds <- cdsBy(txdb, by="tx", use.names=TRUE) | |
brca1cds <- cds[names(cds) %in% txid] | |
library(Gviz) | |
plotTracks(list(GenomeAxisTrack(), AnnotationTrack(brca1cds)) | |
library(BSgenome.Hsapiens.UCSC.hg19) | |
genome <- BSgenome.Hsapiens.UCSC.hg19 | |
tx_seq <- extractTranscriptSeqs(genome, brca1cds) | |
tx_seq |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# AnnotationHub work flow: evolutionarily conserved enhancer SNPs near genes on chr17 | |
# Orig: http://master.bioconductor.org/help/course-materials/2014/ISMB2014/enhancer-snps.R | |
## | |
## 1. Retrieve enhancers, SNPs from AnnotationHub, gene coordinates | |
## from TxDb.*; harmonize genome and chromosome names | |
## | |
library(AnnotationHub) | |
hub <- AnnotationHub() | |
hg19 <- hub[metadata(hub)$Genome %in% c("hg19", "GRCh37")] | |
## Enhancers, from ENCODE BroadHmm track | |
## - details of track | |
## http://hgwdev.sdsc.edu/cgi-bin/hgTrackUi?db=hg19&g=wgEncodeBroadHmm | |
## http://www.ncbi.nlm.nih.gov/pubmed/21441907 | |
hmm <- hub$goldenpath.hg18.database.wgEncodeBroadHmm_0.0.1.RData | |
## wrong 'seqlengths' -- 'hg18' in title, but actually liftOver to hg19 | |
library(BSgenome.Hsapiens.UCSC.hg19) | |
seqlevels(hmm, force=TRUE) <- paste0("chr", 1:22) | |
seqlengths(hmm) <- | |
seqlengths(BSgenome.Hsapiens.UCSC.hg19)[names(seqlengths(hmm))] | |
genome(hmm) <- "hg19" | |
enhancers <- hmm[grep("Enhancer", hmm$name)] | |
## SNPs, e.g., from chr1 of CEU population | |
snps <- query(hub, "dbSNP.*17.*CEU")[[1]] | |
## true (length 1) SNPs with single ALT allele | |
snps <- snps[width(snps) == 1 & elementLengths(alt(snps)) == 1] | |
seqlevelsStyle(snps) <- "UCSC" | |
genome(snps) <- "hg19" | |
## gene coordinates | |
library(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
genes <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
genes <- genes[seqnames(genes) == "chr17"] | |
## | |
## 2. Download (large!) conservation track as BED file from UCSC, | |
## query for chromosome 1 using rtracklayer | |
## | |
library(rtracklayer) | |
url <- paste("http://hgdownload.cse.ucsc.edu/goldenPath/hg19", | |
"phastCons100way/hg19.100way.phastCons.bw", sep="/") | |
dest <- "~/ISMB-2014/resources/hg19.100way.phastCons.bw" | |
## download.file(url, dest) | |
bw <- BigWigFile(dest) | |
chr17 <- as(seqinfo(BigWigFile(dest)), "GRanges")["chr17"] | |
score <- import(bw, which=chr17, as="NumericList")[["chr17"]] | |
## | |
## 4. subset SNPs by enhancers | |
## | |
enhancerSnps <- subsetByOverlaps(snps, enhancers) | |
## | |
## 5. Annotate enhancer SNPs with evolutionary conservation score | |
## | |
score <- score[start(enhancerSnps)] | |
## info(enhancerSnps)$score <- score[start(enhancerSnps)] | |
## | |
## 6. find nearest and distanceToNearest genes | |
## | |
hits <- distanceToNearest(enhancerSnps, genes) | |
df <- data.frame( | |
Near=names(genes)[subjectHits(hits)], | |
Distance=mcols(hits)$distance, | |
Score=score, stringsAsFactors=FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment