Skip to content

Instantly share code, notes, and snippets.

annotated_bedtool_header = ['chrom', 'start', "stop", "name", "score", "strand", "annotation", "gene_id"]
full_header = ["chrom", "start", "stop", "full_name", "ip_reads", "input_reads", "p_val", "chisq", "test_type",
"enrichment", "log10_p_val", "log2_fold_change"]
def get_full_from_annotated(fn):
stripped_fn = ".".join(fn.split(".")[:-3])
return stripped_fn + ".full.compressed2.bed.full"
def calculate_entropy(row, total_ip_reads, total_input_reads):
p_ip = float(row.ip_reads) / total_ip_reads
p_input = float(row.input_reads) / total_input_reads
total_datasets = len(merged_data.groupby(level=0))
num_cols = 4
num_rows = (total_datasets / 4) + 1
count = 0
with dataviz.Figure(os.path.join(img_dir, "increase_in_enriched_regions.svg"), figsize=(4* num_cols, 4*num_rows)) as fig:
for uID, df in merged_data.groupby(level='uID'):
count += 1
ax = fig.add_subplot(num_rows, num_cols, count)
class ArrayJob():
def __init__(self):
self._epilogue = "eval ${cmd[$PBS_ARRAYID]}"
def _prologue(self, name, count, run_dir, ppn=1, walltime=8):
return """#!/bin/bash
#PBS -N {0}
#PBS -l nodes=1:ppn={3}
#PBS -o {0}.out
#PBS -e {0}.err
class ArrayJob():
def __init__(self):
self._epilogue = "eval ${cmd[$PBS_ARRAYID]}"
def _prologue(self, name, count, run_dir, ppn=1, walltime=8):
return """#!/bin/bash
#PBS -N {0}
#PBS -l nodes=1:ppn={3}
#PBS -o {0}.out
#PBS -e {0}.err
class ArrayJob():
def __init__(self):
self._epilogue = "eval ${cmd[$PBS_ARRAYID]}"
def _prologue(self, name, count, run_dir, ppn=1, walltime=8):
return """#!/bin/bash
#PBS -N {0}
#PBS -l nodes=1:ppn={3}
#PBS -o {0}.out
#PBS -e {0}.err
@gpratt
gpratt / plot_pdf.py
Created October 29, 2015 16:45
Plots a PDF
def pdf(data, bins=50):
data = np.array(data, dtype=float)
minimum = np.min(data) - .000001
maximum = np.max(data) + .000001
pos = np.linspace(minimum, maximum, bins + 1)
xs = np.linspace(minimum, maximum, bins + 1)[:-1]
ys = np.linspace(minimum, maximum, bins + 1)[1:]
pdf = np.ndarray(shape=(bins + 1, 1))
pdf[0] = 0
for i, (x, y) in enumerate(zip(xs, ys)):
import pandas as pd
mouse_gene_id_names = pd.read_table("/nas3/gpratt/Dropbox/TAF15/Data/mouse_integration/mouse_gene_id_to_names.txt", index_col=0)
human_mouse_genes = pd.read_table("/nas3/gpratt/projects/taf15/mouse_human_genes.txt", index_col=2)
known_rbps = pd.read_excel("nrg3813-s3.xls", "RBP table", index_col=2)
known_tfs = pd.read_excel("nrg3813-s4.xls", "human TFs", index_col=1)
known_tfs['gene_id'] = known_tfs.index
known_rbps['gene_id'] = known_rbps.index
@gpratt
gpratt / flip_hex_value.py
Last active August 29, 2015 14:24
flip_hex_vaule
def flip_hex_value(hex_value):
has = ""
if hex_value.startswith("#"):
has = "#"
hex_value = hex_value.lstrip("#")
bits = bin(int(hex_value, 16))[2:]
flipped = "".join(["1" if bool(int(bit)) ^ True else "0" for bit in bits])
return has + "{0:0>{width}x}".format(int(flipped, 2), width=len(hex_value))
flip_hex_value("#262626")
@gpratt
gpratt / count_to_rpkm_function.py
Created April 24, 2015 20:53
count_to_rpkm_function
def counts_to_rpkm(featureCountsTable):
counts = featureCountsTable.ix[:,5:]
lengths = featureCountsTable['Length']
mapped_reads = counts.sum()
return (counts * pow(10,9)).div(mapped_reads, axis=1).div(lengths, axis=0)
@gpratt
gpratt / go_enrichment_plotter.py
Created February 20, 2015 21:36
go_enrichment_plotter
def plot_go_enrichment(df, filter_value=None, **kwargs):
new_index = []
for index, description in izip(df.index, df['GO Term Description']):
new_index.append(list(index[:-1]) + [description])
df.index = pd.MultiIndex.from_tuples(new_index)
go_matrix = df['Bonferroni-corrected Hypergeometric p-Value'].apply(lambda x: -1 * np.log10(x))
go_matrix = go_matrix.unstack(range(len(go_matrix.index.levels) - 1))
go_matrix = go_matrix.fillna(0)