Michael Silverstein michaelsilverstein

## run_python_batchjob.sh
#!/bin/bash -l

# USAGE: bash run_python_batchjob.qsub [PYTHON FILE]

# Make job script
if [ -z "$1" ]
then
        echo "ERROR: MUST PROVIDE INPUT PATH"
        echo "USAGE: bash run_python_batchjob.qsub [PYTHON FILE]"
        exit

## stackedbarplot.py
def stackedbarplot(data, stack_order=None, palette=None, **barplot_kws):
    """
    Create a stacked barplot
    Inputs:
    | data <pd.DataFrame>: A wideform dataframe where the index is the variable to stack, the columns are different samples (x-axis), and the cells the counts (y-axis)
    | stack_order <array-like>: The order for bars to be stacked (Default: given order)
    | palette <array-like>: The colors to use for each value of `stack_order` (Default: husl)
    | barplot_kws: Arguments to pass to sns.barplot()

    Author: Michael Silverstein

## deinterleave_dir.sh
#!/bin/bash

# Uses this incredible script, deinterleave_fastq.sh, and associated comments: https://gist.github.com/nathanhaigh/3521724

# Usage: deinterleave_dir.sh indir outdir

# Deinterleave entire directory of compressed .fastq.gz files from `indir` and re-compresses mates with
# _R1.fastq.gz and _R2.fastq.gz suffixes to `ourdir`

# Download deinterleave_fastq.sh

## zebra.py
import matplotlib.pyplot as plt

def zebra(ax=None, orient='v', color='gray', alpha=.3, zorder=0, **kwargs):
    """
    Stripe figure - Color every other x position with `fill_between()`
    If no ax provided, use current ax
    Input:
    | ax: Axes handle
    | orient: 'v' for vertical stripes or 'h' for horizontal stripes
    | Any other argument accepted by `ax.fillbetween{x}()`

## permutation_test.py
import numpy as np
def permutation_test(a, b, n=1000):
    """
    Two-sided permutation test
    Input:
    | {a, b}: 1xm arrays of data
    | n: Number of permutations
    Output:
    | p: Two-sided pvalue: mean of |mean(a) - mean(b)| > |mean(perm(a)) - mean(perm((b)))|_i for i permutations


## explode_list.py
import pandas as pd
# Data
data = [['T1', ['Round', 'Red']],
       ['T2', ['Round', 'Green']],
       ['T3', None],
       ['T4', ['Square']]]
# Make dataframe and set index
df = pd.DataFrame(data, columns=['thing', 'desc']).set_index('thing')
# "Explode" list
exploded = df.desc.apply(pd.Series)

## Vectorized_residuals.m
%%Search parameter space
[xx,yy] = meshgrid([-5:.1:9],[-5:.1:9]);

% Calculate residual for every pair of parameters
%Setup matrix where each row is a single set of parameters
X = [xx(:),yy(:)];
%Calculate residual of each pair of guesses of parameters
res = b-A*X';
r = [];
%Calculate norm^2 of residuals from each guess

## b_vector.m

b =

1.0e+03 *

    5.6000    2.5200    2.2400    1.9600    1.6800    1.4000    1.1200
    2.8000         0         0         0         0         0         0
    2.8000         0         0         0         0         0         0
    2.8000         0         0         0         0         0         0
    2.8000         0         0         0         0         0         0

## pairplot.py
def pairplot(df,strain,hue=None,view='show'):
    #Pairplot http://seaborn.pydata.org/generated/seaborn.pairplot.html
    #hue: color plot by 'condition' (on). Default None (off)
    print 'Generating pairplot for %s...'%strain
    cols = list(df.columns)
    cols.remove('concentration') #No need to clutter
    if type(df.condition.iloc[0]) != str: #Apply condition name
        df.condition = df.condition.apply(lambda x: code[x])# code={0:'Wm',1:'Wc',2:'Wmc'}
    sns.pairplot(df[cols][df.strain==strain],hue=hue)
    plt.title(strain)
	#!/bin/bash -l

	# USAGE: bash run_python_batchjob.qsub [PYTHON FILE]

	# Make job script
	if [ -z "$1" ]
	then
	echo "ERROR: MUST PROVIDE INPUT PATH"
	echo "USAGE: bash run_python_batchjob.qsub [PYTHON FILE]"
	exit
	def stackedbarplot(data, stack_order=None, palette=None, **barplot_kws):
	"""
	Create a stacked barplot
	Inputs:
	\| data <pd.DataFrame>: A wideform dataframe where the index is the variable to stack, the columns are different samples (x-axis), and the cells the counts (y-axis)
	\| stack_order <array-like>: The order for bars to be stacked (Default: given order)
	\| palette <array-like>: The colors to use for each value of `stack_order` (Default: husl)
	\| barplot_kws: Arguments to pass to sns.barplot()

	Author: Michael Silverstein
	#!/bin/bash

	# Uses this incredible script, deinterleave_fastq.sh, and associated comments: https://gist.github.com/nathanhaigh/3521724

	# Usage: deinterleave_dir.sh indir outdir

	# Deinterleave entire directory of compressed .fastq.gz files from `indir` and re-compresses mates with
	# _R1.fastq.gz and _R2.fastq.gz suffixes to `ourdir`

	# Download deinterleave_fastq.sh
	import matplotlib.pyplot as plt

	def zebra(ax=None, orient='v', color='gray', alpha=.3, zorder=0, **kwargs):
	"""
	Stripe figure - Color every other x position with `fill_between()`
	If no ax provided, use current ax
	Input:
	\| ax: Axes handle
	\| orient: 'v' for vertical stripes or 'h' for horizontal stripes
	\| Any other argument accepted by `ax.fillbetween{x}()`
	import numpy as np
	def permutation_test(a, b, n=1000):
	"""
	Two-sided permutation test
	Input:
	\| {a, b}: 1xm arrays of data
	\| n: Number of permutations
	Output:
	\| p: Two-sided pvalue: mean of \|mean(a) - mean(b)\| > \|mean(perm(a)) - mean(perm((b)))\|_i for i permutations
	import pandas as pd
	# Data
	data = [['T1', ['Round', 'Red']],
	['T2', ['Round', 'Green']],
	['T3', None],
	['T4', ['Square']]]
	# Make dataframe and set index
	df = pd.DataFrame(data, columns=['thing', 'desc']).set_index('thing')
	# "Explode" list
	exploded = df.desc.apply(pd.Series)
	%%Search parameter space
	[xx,yy] = meshgrid([-5:.1:9],[-5:.1:9]);

	% Calculate residual for every pair of parameters
	%Setup matrix where each row is a single set of parameters
	X = [xx(:),yy(:)];
	%Calculate residual of each pair of guesses of parameters
	res = b-A*X';
	r = [];
	%Calculate norm^2 of residuals from each guess

	b =

	1.0e+03 *

	5.6000 2.5200 2.2400 1.9600 1.6800 1.4000 1.1200
	2.8000 0 0 0 0 0 0
	2.8000 0 0 0 0 0 0
	2.8000 0 0 0 0 0 0
	2.8000 0 0 0 0 0 0
	def pairplot(df,strain,hue=None,view='show'):
	#Pairplot http://seaborn.pydata.org/generated/seaborn.pairplot.html
	#hue: color plot by 'condition' (on). Default None (off)
	print 'Generating pairplot for %s...'%strain
	cols = list(df.columns)
	cols.remove('concentration') #No need to clutter
	if type(df.condition.iloc[0]) != str: #Apply condition name
	df.condition = df.condition.apply(lambda x: code[x])# code={0:'Wm',1:'Wc',2:'Wmc'}
	sns.pairplot(df[cols][df.strain==strain],hue=hue)
	plt.title(strain)