Stephen Turner stephenturner

## build-install-bcftools-home.sh
# Compile and install htslib, bcftools, samtools 1.9 to home directory

# Set up dirs where stuff will be installed
mkdir -p ${HOME}/bin/htslib
mkdir -p ${HOME}/bin/bcftools
mkdir -p ${HOME}/bin/samtools

# Make a dir to build
mkdir -p ${HOME}/tmp

## bedtools_cheatsheet.md

      
              1 file
            
          
              20 forks
            
          
                0 comments
              
            
              79 stars
            
          
                ilevantis
                / bedtools_cheatsheet.md
            
            
              Last active
              March 5, 2025 00:43
            
              
                Bedtools cheatsheet
              
          
    Bedtools Cheatsheet

General:


Tools
Description


flank
Create new intervals from the flanks of existing intervals.


slop
Adjust the size of intervals.


shift
Adjust the position of intervals.


subtract
Remove intervals based on overlaps b/w two files.


## msigdf_clusterprofiler.R
## devtools::install_github("stephenturner/msigdf")
library(msigdf)
library(dplyr)
library(clusterProfiler)

c2 <- msigdf.human %>%
    filter(collection == "c2") %>% select(geneset, entrez) %>% as.data.frame

data(geneList)
de <- names(geneList)[1:100]

## install-gcc48-linuxbrew-centos6.md

      
              1 file
            
          
              36 forks
            
          
                25 comments
              
            
              125 stars
            
          
                stephenturner
                / install-gcc48-linuxbrew-centos6.md
            
            
              Last active
              January 8, 2025 06:27
            
              
                Installing gcc 4.8 and Linuxbrew on CentOS 6
              
          
    Installing gcc 4.8 and Linuxbrew on CentOS 6

The GCC distributed with CentOS 6 is 4.4.7, which is pretty outdated. I'd like to use gcc 4.8+. Also, when trying to install Linuxbrew you run into a dependency loop where Homebrew's gcc depends on zlib, which depends on gcc. Here's how I solved the problem.
Note: Requires sudo privileges.
Resources:


http://superuser.com/a/676337/88393: Forum response on using CERN's open Scientific Linux distribution of RHEL's developer toolset.
http://linux.web.cern.ch/linux/devtoolset/: CERN's developer toolset installation instructions.


## r-fcsn-in-wild-search.md

      
              1 file
            
          
              0 forks
            
          
                1 comment
              
            
              15 stars
            
          
                jennybc
                / r-fcsn-in-wild-search.md
            
            
              Last active
              January 6, 2020 08:09
            
              
                Search for "natural" usage of a function across all CRAN packages
              
          
    What if a function in a package has no examples? Or is poorly exampled? Wouldn't it be nice to find functioning instances of it "in the wild"?
Via Twitter, Noam Ross taught me a clever way to do such searches on GitHub. Put this into the GitHub search box to see people using the llply() function from plyr:
"llply" user:cran language:R
Or just click here.

  
## missing.R
# A quick function to save a PBM (http://en.wikipedia.org/wiki/Netpbm_format)
# visualize *a lot* of missing data pretty quickly (outside of R).

writeMissingPBM <- function(x, file) {
  dims <- dim(x)
  x[] <- as.integer(is.na(x))
  con <- file(file, open="wt")
  writeLines(sprintf("P1\n%d %d", ncol(x), nrow(x)), con)
  write.table(x, file=con, sep=" ", col.names=FALSE, row.names=FALSE, quote=FALSE)
  close(con)

## README.md

      
              3 files
            
          
              1 fork
            
          
                0 comments
              
            
              4 stars
            
          
                seandavi
                / README.md
            
            
              Last active
              December 28, 2023 02:46
            
              
                snpEff on the NIH Biowulf cluster
              
          
    Usage

To use these scripts:

Clone this repository: git clone https://gist.github.com/95a4b2ab3b90f6f0bfd9.git snpEffScript
cd snpEffScript
make appropriate changes to setup.sh
call snpEff.sh like so:


## cell-line-workflow.sh
export SAMPLES="2484-AJ-0001 2484-AJ-0002 2484-AJ-0003"

######################################
# Make FASTQ
######################################
export OVHOME=/home/arq5x/cphg-home/cphg-quinlan/projects/ov-cell-lines
export STEPNAME=ovc-fastq
for sample in `echo $SAMPLES`
do
export QSUB="qsub -W group_list=cphg_arq5x -q arq5xlab -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu";

## CologneR.R
require(reshape2)

# data.table commit (1048)
require(data.table)
# Loading required package: data.table
# data.table 1.8.11  For help type: help("data.table")

set.seed(1)
N <- 2e7 # size of DT

## idat2lumibatch.R
idat2lumibatch <- function(filenames) {
  # filenames is a character vector of iDAT filenames
  require(illuminaio)
  require(lumi)
  idatlist = lapply(filenames,readIDAT)
  exprs = sapply(idatlist,function(x) {
    return(x$Quants$MeanBinData)})
  se.exprs = sapply(idatlist,function(x) {
    return(x$Quants$DevBinData/sqrt(x$Quants$NumGoodBeadsBinData))})
  beadNum = sapply(idatlist,function(x) {
	# Compile and install htslib, bcftools, samtools 1.9 to home directory

	# Set up dirs where stuff will be installed
	mkdir -p ${HOME}/bin/htslib
	mkdir -p ${HOME}/bin/bcftools
	mkdir -p ${HOME}/bin/samtools

	# Make a dir to build
	mkdir -p ${HOME}/tmp
Tools	Description
flank	Create new intervals from the flanks of existing intervals.
slop	Adjust the size of intervals.
shift	Adjust the position of intervals.
subtract	Remove intervals based on overlaps b/w two files.
	## devtools::install_github("stephenturner/msigdf")
	library(msigdf)
	library(dplyr)
	library(clusterProfiler)

	c2 <- msigdf.human %>%
	filter(collection == "c2") %>% select(geneset, entrez) %>% as.data.frame

	data(geneList)
	de <- names(geneList)[1:100]
	# A quick function to save a PBM (http://en.wikipedia.org/wiki/Netpbm_format)
	# visualize a lot of missing data pretty quickly (outside of R).

	writeMissingPBM <- function(x, file) {
	dims <- dim(x)
	x[] <- as.integer(is.na(x))
	con <- file(file, open="wt")
	writeLines(sprintf("P1\n%d %d", ncol(x), nrow(x)), con)
	write.table(x, file=con, sep=" ", col.names=FALSE, row.names=FALSE, quote=FALSE)
	close(con)
	export SAMPLES="2484-AJ-0001 2484-AJ-0002 2484-AJ-0003"

	######################################
	# Make FASTQ
	######################################
	export OVHOME=/home/arq5x/cphg-home/cphg-quinlan/projects/ov-cell-lines
	export STEPNAME=ovc-fastq
	for sample in `echo $SAMPLES`
	do
	export QSUB="qsub -W group_list=cphg_arq5x -q arq5xlab -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu";
	require(reshape2)

	# data.table commit (1048)
	require(data.table)
	# Loading required package: data.table
	# data.table 1.8.11 For help type: help("data.table")

	set.seed(1)
	N <- 2e7 # size of DT
	idat2lumibatch <- function(filenames) {
	# filenames is a character vector of iDAT filenames
	require(illuminaio)
	require(lumi)
	idatlist = lapply(filenames,readIDAT)
	exprs = sapply(idatlist,function(x) {
	return(x$Quants$MeanBinData)})
	se.exprs = sapply(idatlist,function(x) {
	return(x$Quants$DevBinData/sqrt(x$Quants$NumGoodBeadsBinData))})
	beadNum = sapply(idatlist,function(x) {