Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Last active August 29, 2015 14:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stephenturner/297212d4d68a25c6ee43 to your computer and use it in GitHub Desktop.
Save stephenturner/297212d4d68a25c6ee43 to your computer and use it in GitHub Desktop.
Annotation examples from ISMB 2014
# Basic work flows: from SYMBOL to gene model and sequence
# Orig from http://master.bioconductor.org/help/course-materials/2014/ISMB2014/brca1.R
library(org.Hs.eg.db)
eid <- select(org.Hs.eg.db, "BRCA1", "ENTREZID", "SYMBOL")[["ENTREZID"]]
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
txid <- select(txdb, eid, "TXNAME", "GENEID")[["TXNAME"]]
cds <- cdsBy(txdb, by="tx", use.names=TRUE)
brca1cds <- cds[names(cds) %in% txid]
library(Gviz)
plotTracks(list(GenomeAxisTrack(), AnnotationTrack(brca1cds))
library(BSgenome.Hsapiens.UCSC.hg19)
genome <- BSgenome.Hsapiens.UCSC.hg19
tx_seq <- extractTranscriptSeqs(genome, brca1cds)
tx_seq
# AnnotationHub work flow: evolutionarily conserved enhancer SNPs near genes on chr17
# Orig: http://master.bioconductor.org/help/course-materials/2014/ISMB2014/enhancer-snps.R
##
## 1. Retrieve enhancers, SNPs from AnnotationHub, gene coordinates
## from TxDb.*; harmonize genome and chromosome names
##
library(AnnotationHub)
hub <- AnnotationHub()
hg19 <- hub[metadata(hub)$Genome %in% c("hg19", "GRCh37")]
## Enhancers, from ENCODE BroadHmm track
## - details of track
## http://hgwdev.sdsc.edu/cgi-bin/hgTrackUi?db=hg19&g=wgEncodeBroadHmm
## http://www.ncbi.nlm.nih.gov/pubmed/21441907
hmm <- hub$goldenpath.hg18.database.wgEncodeBroadHmm_0.0.1.RData
## wrong 'seqlengths' -- 'hg18' in title, but actually liftOver to hg19
library(BSgenome.Hsapiens.UCSC.hg19)
seqlevels(hmm, force=TRUE) <- paste0("chr", 1:22)
seqlengths(hmm) <-
seqlengths(BSgenome.Hsapiens.UCSC.hg19)[names(seqlengths(hmm))]
genome(hmm) <- "hg19"
enhancers <- hmm[grep("Enhancer", hmm$name)]
## SNPs, e.g., from chr1 of CEU population
snps <- query(hub, "dbSNP.*17.*CEU")[[1]]
## true (length 1) SNPs with single ALT allele
snps <- snps[width(snps) == 1 & elementLengths(alt(snps)) == 1]
seqlevelsStyle(snps) <- "UCSC"
genome(snps) <- "hg19"
## gene coordinates
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
genes <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene)
genes <- genes[seqnames(genes) == "chr17"]
##
## 2. Download (large!) conservation track as BED file from UCSC,
## query for chromosome 1 using rtracklayer
##
library(rtracklayer)
url <- paste("http://hgdownload.cse.ucsc.edu/goldenPath/hg19",
"phastCons100way/hg19.100way.phastCons.bw", sep="/")
dest <- "~/ISMB-2014/resources/hg19.100way.phastCons.bw"
## download.file(url, dest)
bw <- BigWigFile(dest)
chr17 <- as(seqinfo(BigWigFile(dest)), "GRanges")["chr17"]
score <- import(bw, which=chr17, as="NumericList")[["chr17"]]
##
## 4. subset SNPs by enhancers
##
enhancerSnps <- subsetByOverlaps(snps, enhancers)
##
## 5. Annotate enhancer SNPs with evolutionary conservation score
##
score <- score[start(enhancerSnps)]
## info(enhancerSnps)$score <- score[start(enhancerSnps)]
##
## 6. find nearest and distanceToNearest genes
##
hits <- distanceToNearest(enhancerSnps, genes)
df <- data.frame(
Near=names(genes)[subjectHits(hits)],
Distance=mcols(hits)$distance,
Score=score, stringsAsFactors=FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment