Martin Morgan mtmorgan

## ucscAnnotateGenome.R
loadNamespace("rtracklayer")
loadNamespace("XML")

.organismToTaxid <- function(organism=character())
{
    ## query NCBI for taxonomy ID
    .eutils <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils"

    ## 1. ids
    uorganism <- unique(organism[!is.na(organism)])

## methods.R
## compatibility

if (!exists("lengths"))
    lengths <- function(x) vapply(x, length, integer(1))

##
## methods
##

.S4methodsForClass <-

## GSE62944_TCGA_Rusbread_featureCounts.R
## Imports: GEOquery, Biobase

acc <- "GSE62944"
if (!file.exists(acc))
    GEOquery::getGEOSuppFiles(acc)
setwd(acc)

clinvar <- local({
    message("clinvar")
    fl <- "GSE62944_TCGA_20_420_Clinical_Variables_7706_Samples.txt.gz"

## cigarAlign.R
library(Rsamtools)

.cigarAlignInput <-
    function(file, param, what)
{
    result <- readBamGappedAlignments(file, param=param)
    names(mcols(result))[names(mcols(result)) == what] <- "what"
    result
}

## global.R
library(shiny)

library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(org.Dm.eg.db)

db <- c(Human="org.Hs.eg.db", Mouse="org.Mm.eg.db",
        Drosophila="org.Dm.eg.db")
map <- lapply(db, function(elt) tryCatch({
    library(elt, quietly=TRUE, character.only=TRUE)

## phred2ASCIIOffset.R
.ascii_offset <- function()
    setNames(33:126 - 33L, strsplit(rawToChar(as.raw(33:126)), "")[[1]])

.phred2ascii_int <-
    function(x, scheme)
{
    ## See https://en.wikipedia.org/wiki/FASTQ_format#Encoding
    ascii <- .ascii_offset()
    switch(scheme, "Illumina 1.8+" = {
        ## L - Illumina 1.8+ Phred+33,  raw reads typically (0, 41)

## DisGeNET.R
#' Query DisGeNET disease / gene database
#'
#' Based on a script by jpinero@imim.es, retrieved from
#' http://www.disgenet.org/ds/DisGeNET/scripts/disgenet.R on 7 April,
#' 2016. This version is meant for interactive use within an R
#' session, and makes a single query to DisGeNET rather than one query
#' for each input symbol.
#'
#' @param input: character vector of gene or disease identifiers
#'

## fill_down.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mtmorgan
                / fill_down.md
            
            
              Last active
              August 21, 2016 21:01
            
          
    These functions are in response to StackOverflow questions like
[this][], wanting to fill in missing NA values with preceeding values,
optionally by group
fill_down <- function(v) {
    if (length(v) > 1) {
        keep <- c(TRUE, !is.na(v[-1]))
        v[keep][cumsum(keep)]
    } else v


## g_range.R
suppressPackageStartupMessages({
    library(tibble)
    library(GenomicRanges)
})

.g_range <- setClass("g_range", contains="GRanges")

.g_columns <- c("chr", "start", "end", "strand")

setAs("GRanges", "g_range", function(from) {

## orf.R
dna <- getFastaSeq()
## function for finding orfs. Returned as IRanges.
ORFdef <- find_in_frame_ORFs(dna, longestORF = FALSE, minimumLength = 8)
ORFdef <- ORFdef[lengths(ORFdef) > 0]

## Map (mapply) on each five prime leader
uORFs <- Map(
    function(granges, tx_name, ORFdef) {
        map_granges(ORFdef, granges, tx_name)
    },
	loadNamespace("rtracklayer")
	loadNamespace("XML")

	.organismToTaxid <- function(organism=character())
	{
	## query NCBI for taxonomy ID
	.eutils <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils"

	## 1. ids
	uorganism <- unique(organism[!is.na(organism)])
	## compatibility

	if (!exists("lengths"))
	lengths <- function(x) vapply(x, length, integer(1))

	##
	## methods
	##

	.S4methodsForClass <-
	## Imports: GEOquery, Biobase

	acc <- "GSE62944"
	if (!file.exists(acc))
	GEOquery::getGEOSuppFiles(acc)
	setwd(acc)

	clinvar <- local({
	message("clinvar")
	fl <- "GSE62944_TCGA_20_420_Clinical_Variables_7706_Samples.txt.gz"
	library(Rsamtools)

	.cigarAlignInput <-
	function(file, param, what)
	{
	result <- readBamGappedAlignments(file, param=param)
	names(mcols(result))[names(mcols(result)) == what] <- "what"
	result
	}
	library(shiny)

	library(org.Hs.eg.db)
	library(org.Mm.eg.db)
	library(org.Dm.eg.db)

	db <- c(Human="org.Hs.eg.db", Mouse="org.Mm.eg.db",
	Drosophila="org.Dm.eg.db")
	map <- lapply(db, function(elt) tryCatch({
	library(elt, quietly=TRUE, character.only=TRUE)
	.ascii_offset <- function()
	setNames(33:126 - 33L, strsplit(rawToChar(as.raw(33:126)), "")[[1]])

	.phred2ascii_int <-
	function(x, scheme)
	{
	## See https://en.wikipedia.org/wiki/FASTQ_format#Encoding
	ascii <- .ascii_offset()
	switch(scheme, "Illumina 1.8+" = {
	## L - Illumina 1.8+ Phred+33, raw reads typically (0, 41)
	#' Query DisGeNET disease / gene database
	#'
	#' Based on a script by jpinero@imim.es, retrieved from
	#' http://www.disgenet.org/ds/DisGeNET/scripts/disgenet.R on 7 April,
	#' 2016. This version is meant for interactive use within an R
	#' session, and makes a single query to DisGeNET rather than one query
	#' for each input symbol.
	#'
	#' @param input: character vector of gene or disease identifiers
	#'
	suppressPackageStartupMessages({
	library(tibble)
	library(GenomicRanges)
	})

	.g_range <- setClass("g_range", contains="GRanges")

	.g_columns <- c("chr", "start", "end", "strand")

	setAs("GRanges", "g_range", function(from) {
	dna <- getFastaSeq()
	## function for finding orfs. Returned as IRanges.
	ORFdef <- find_in_frame_ORFs(dna, longestORF = FALSE, minimumLength = 8)
	ORFdef <- ORFdef[lengths(ORFdef) > 0]

	## Map (mapply) on each five prime leader
	uORFs <- Map(
	function(granges, tx_name, ORFdef) {
	map_granges(ORFdef, granges, tx_name)
	},