Skip to content

Instantly share code, notes, and snippets.

@mtmorgan
mtmorgan / ucscAnnotateGenome.R
Created September 29, 2014 02:02
Retrieve UCSC genomes() and their latin bionomial by scraping UCSC web pages, and translate these to NCBI taxonomyId through entrez eutils calls
loadNamespace("rtracklayer")
loadNamespace("XML")
.organismToTaxid <- function(organism=character())
{
## query NCBI for taxonomy ID
.eutils <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils"
## 1. ids
uorganism <- unique(organism[!is.na(organism)])
@mtmorgan
mtmorgan / methods.R
Last active August 29, 2015 14:16
wrap methods() to report S3 and S4 methods for generic or class
## compatibility
if (!exists("lengths"))
lengths <- function(x) vapply(x, length, integer(1))
##
## methods
##
.S4methodsForClass <-
## Imports: GEOquery, Biobase
acc <- "GSE62944"
if (!file.exists(acc))
GEOquery::getGEOSuppFiles(acc)
setwd(acc)
clinvar <- local({
message("clinvar")
fl <- "GSE62944_TCGA_20_420_Clinical_Variables_7706_Samples.txt.gz"
@mtmorgan
mtmorgan / cigarAlign.R
Created November 13, 2012 17:32
Represent aligned DNA sequences as a DNAStringSet based on position and CIGAR
library(Rsamtools)
.cigarAlignInput <-
function(file, param, what)
{
result <- readBamGappedAlignments(file, param=param)
names(mcols(result))[names(mcols(result)) == what] <- "what"
result
}
@mtmorgan
mtmorgan / global.R
Created December 5, 2012 23:21
shiny AnnotationTable
library(shiny)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(org.Dm.eg.db)
db <- c(Human="org.Hs.eg.db", Mouse="org.Mm.eg.db",
Drosophila="org.Dm.eg.db")
map <- lapply(db, function(elt) tryCatch({
library(elt, quietly=TRUE, character.only=TRUE)
@mtmorgan
mtmorgan / phred2ASCIIOffset.R
Created March 15, 2016 14:44
Translate integer or ASCII character fastq phred score encodings to integer offsets useful in Rsamtools
.ascii_offset <- function()
setNames(33:126 - 33L, strsplit(rawToChar(as.raw(33:126)), "")[[1]])
.phred2ascii_int <-
function(x, scheme)
{
## See https://en.wikipedia.org/wiki/FASTQ_format#Encoding
ascii <- .ascii_offset()
switch(scheme, "Illumina 1.8+" = {
## L - Illumina 1.8+ Phred+33, raw reads typically (0, 41)
@mtmorgan
mtmorgan / DisGeNET.R
Created April 7, 2016 09:04
Query DisGeNET disease / gene database from R
#' Query DisGeNET disease / gene database
#'
#' Based on a script by jpinero@imim.es, retrieved from
#' http://www.disgenet.org/ds/DisGeNET/scripts/disgenet.R on 7 April,
#' 2016. This version is meant for interactive use within an R
#' session, and makes a single query to DisGeNET rather than one query
#' for each input symbol.
#'
#' @param input: character vector of gene or disease identifiers
#'

These functions are in response to StackOverflow questions like [this][], wanting to fill in missing NA values with preceeding values, optionally by group

fill_down <- function(v) {
    if (length(v) > 1) {
        keep <- c(TRUE, !is.na(v[-1]))
        v[keep][cumsum(keep)]
    } else v
@mtmorgan
mtmorgan / g_range.R
Last active December 10, 2016 15:47
endomorphic overlaps, etc
suppressPackageStartupMessages({
library(tibble)
library(GenomicRanges)
})
.g_range <- setClass("g_range", contains="GRanges")
.g_columns <- c("chr", "start", "end", "strand")
setAs("GRanges", "g_range", function(from) {
dna <- getFastaSeq()
## function for finding orfs. Returned as IRanges.
ORFdef <- find_in_frame_ORFs(dna, longestORF = FALSE, minimumLength = 8)
ORFdef <- ORFdef[lengths(ORFdef) > 0]
## Map (mapply) on each five prime leader
uORFs <- Map(
function(granges, tx_name, ORFdef) {
map_granges(ORFdef, granges, tx_name)
},