% Title % Name % Date
List
# dataframetools | |
# A few simple functions for performing simple tasks with data.frames | |
# --- | |
# Includes functions for: | |
# | |
# - reordering data.frames | |
# - identifying invariant or blank columns | |
# - identifying groups of columns that are redundant with each other | |
# - converting all columns of class factor to class character |
#' Modified version of the ggplot2 plotmatrix function that accepts additional | |
#' variables for aesthetic mapping. | |
#' | |
#' example | |
#' data(iris) | |
#' iris.vars <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") | |
#' ggpairs(data = iris, facet.vars = iris.vars, | |
#' mapping = aes(color = Species, shape = Species)) | |
ggpairs <- function (data, facet.vars = colnames(data), facet.scale = "free", |
# Code written by brentp in response to BioStars question: | |
# http://www.biostars.org/post/show/6544/ | |
import random | |
import sys | |
def write_random_records(fqa, fqb, N=100000): | |
""" get N random headers from a fastq file without reading the | |
whole thing into memory""" | |
records = sum(1 for _ in open(fqa)) / 4 |
#' Generate data.frame of feature annotations | |
#' | |
#' Use bioconductor annotation packages to create a data.frame of feature/probe | |
#' annotations. | |
#' | |
#' @param chip character string identifying chip model (e.g., "illuminaHumanv2") | |
#' @param features optional character vector of chip features (i.e., probeset ids) | |
#' @param vars character vector of desired annotations. These must match objects | |
#' provided by the annotation package (e.g., "CHR") | |
#' @param duplicate.values how should duplicate values be handled? The default |
#' FTP tree mapper | |
#' Save an FTP site's directory stucture as a list. | |
#' @author Aaron Wolen | |
#' | |
#' @example | |
#' url <- 'ftp://ftp.genboree.org/EpigenomeAtlas/Current-Release/experiment-sample' | |
#' roadmap <- map_ftp(url = url, dirs = "Histone_H2BK120ac", recursive = TRUE) | |
map_ftp <- function(url, dirs, recursive = FALSE) { | |
require(RCurl, quietly = TRUE) |
Name | URL | |
---|---|---|
ENCODE | ftp://encodeftp.cse.ucsc.edu/pipeline/hg19/ | |
ENCODE (test) | http://hgdownload-test.cse.ucsc.edu/goldenPath/hg19/encodeDCC/ | |
RoadMap | ftp://ftp.genboree.org/EpigenomeAtlas/Current-Release |
library(IRanges) | |
library(GenomicRanges) | |
library(rtracklayer) | |
# Select a BigWig file | |
bw.dir <- "/home/chromatin/roadmap/DNase_hypersensitivity/brain_fetal" | |
bw.file <- dir(bw.dir, full.names = TRUE, pattern = "*.bigWig")[1] | |
# Specify a genomic range | |
selection <- GRanges(seqnames = "chr4", |
install.packages(c("RCurl", "XML")) | |
bioc.v <- tools:::.BioC_version_associated_with_R_version | |
repos <- tools:::.read_repositories(file.path(R.home("etc"), "repositories")) | |
bioc.repo <- repos["BioCsoft",]$URL | |
bioc.repo <- sub("2\\.\\d+", bioc.v, bioc.repo) | |
packages <- c("zlibbioc", "BiocGenerics", "Biobase", "IRanges", | |
"AnnotationDbi", "GenomicRanges", "Biostrings", "Rsamtools", |
clipboard <- function(x, sep.lines = FALSE){ | |
clipboard <- pipe('pbcopy', 'w') | |
if(sep.lines){ | |
x <- unlist(strsplit(as.character(x), split = ",")) | |
x <- sub(" ", "", x) | |
} | |
write.table(x, clipboard, sep = "\t", | |
quote = FALSE, col.names = FALSE, row.names = FALSE) |