Skip to content

Instantly share code, notes, and snippets.

@mtmorgan
mtmorgan / cigarAlign.R
Created November 13, 2012 17:32
Represent aligned DNA sequences as a DNAStringSet based on position and CIGAR
library(Rsamtools)
.cigarAlignInput <-
function(file, param, what)
{
result <- readBamGappedAlignments(file, param=param)
names(mcols(result))[names(mcols(result)) == what] <- "what"
result
}
@mtmorgan
mtmorgan / global.R
Created December 5, 2012 23:21
shiny AnnotationTable
library(shiny)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(org.Dm.eg.db)
db <- c(Human="org.Hs.eg.db", Mouse="org.Mm.eg.db",
Drosophila="org.Dm.eg.db")
map <- lapply(db, function(elt) tryCatch({
library(elt, quietly=TRUE, character.only=TRUE)
@mtmorgan
mtmorgan / ucscAnnotateGenome.R
Created September 29, 2014 02:02
Retrieve UCSC genomes() and their latin bionomial by scraping UCSC web pages, and translate these to NCBI taxonomyId through entrez eutils calls
loadNamespace("rtracklayer")
loadNamespace("XML")
.organismToTaxid <- function(organism=character())
{
## query NCBI for taxonomy ID
.eutils <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils"
## 1. ids
uorganism <- unique(organism[!is.na(organism)])
@mtmorgan
mtmorgan / methods.R
Last active August 29, 2015 14:16
wrap methods() to report S3 and S4 methods for generic or class
## compatibility
if (!exists("lengths"))
lengths <- function(x) vapply(x, length, integer(1))
##
## methods
##
.S4methodsForClass <-
## Imports: GEOquery, Biobase
acc <- "GSE62944"
if (!file.exists(acc))
GEOquery::getGEOSuppFiles(acc)
setwd(acc)
clinvar <- local({
message("clinvar")
fl <- "GSE62944_TCGA_20_420_Clinical_Variables_7706_Samples.txt.gz"
@mtmorgan
mtmorgan / readKallisto.R
Last active December 4, 2017 11:51
read kallisto RNA-seq quantification into R / Bioconductor data structures
.require <-
function(pkg)
{
withCallingHandlers({
suppressPackageStartupMessages({
require(pkg, character.only=TRUE, quietly=TRUE)
})
}, warning=function(w) {
invokeRestart("muffleWarning")
}) || {
@mtmorgan
mtmorgan / gist:bd147505b89e42a151f9
Created June 24, 2015 18:54
dplyr / SummarizedExperiment compatibility layer -- half baked
## dplyr compatibility
as.data.frame.RangedSummarizedExperiment <-
function(x, row.names=NULL, optional=FALSE, ...)
{
colData <- colData(x)
rownames(colData) <- NULL
cbind(as.data.frame(rowRanges(x)[as.vector(row(x))]),
as.data.frame(colData[as.vector(col(x)),, drop=FALSE]),
sapply(assays(x), as.vector))
@mtmorgan
mtmorgan / phred2ASCIIOffset.R
Created March 15, 2016 14:44
Translate integer or ASCII character fastq phred score encodings to integer offsets useful in Rsamtools
.ascii_offset <- function()
setNames(33:126 - 33L, strsplit(rawToChar(as.raw(33:126)), "")[[1]])
.phred2ascii_int <-
function(x, scheme)
{
## See https://en.wikipedia.org/wiki/FASTQ_format#Encoding
ascii <- .ascii_offset()
switch(scheme, "Illumina 1.8+" = {
## L - Illumina 1.8+ Phred+33, raw reads typically (0, 41)
@mtmorgan
mtmorgan / DisGeNET.R
Created April 7, 2016 09:04
Query DisGeNET disease / gene database from R
#' Query DisGeNET disease / gene database
#'
#' Based on a script by jpinero@imim.es, retrieved from
#' http://www.disgenet.org/ds/DisGeNET/scripts/disgenet.R on 7 April,
#' 2016. This version is meant for interactive use within an R
#' session, and makes a single query to DisGeNET rather than one query
#' for each input symbol.
#'
#' @param input: character vector of gene or disease identifiers
#'
@mtmorgan
mtmorgan / grangesToSymbol
Created July 29, 2016 20:50
Extract ranges of genes, and map ranges to corresponding genes
geneRanges <-
function(db, column="ENTREZID")
{
g <- genes(db, columns=column)
col <- mcols(g)[[column]]
genes <- granges(g)[rep(seq_along(g), elementNROWS(col))]
mcols(genes)[[column]] <- as.character(unlist(col))
genes
}