This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
annotated_bedtool_header = ['chrom', 'start', "stop", "name", "score", "strand", "annotation", "gene_id"] | |
full_header = ["chrom", "start", "stop", "full_name", "ip_reads", "input_reads", "p_val", "chisq", "test_type", | |
"enrichment", "log10_p_val", "log2_fold_change"] | |
def get_full_from_annotated(fn): | |
stripped_fn = ".".join(fn.split(".")[:-3]) | |
return stripped_fn + ".full.compressed2.bed.full" | |
def calculate_entropy(row, total_ip_reads, total_input_reads): | |
p_ip = float(row.ip_reads) / total_ip_reads | |
p_input = float(row.input_reads) / total_input_reads |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
total_datasets = len(merged_data.groupby(level=0)) | |
num_cols = 4 | |
num_rows = (total_datasets / 4) + 1 | |
count = 0 | |
with dataviz.Figure(os.path.join(img_dir, "increase_in_enriched_regions.svg"), figsize=(4* num_cols, 4*num_rows)) as fig: | |
for uID, df in merged_data.groupby(level='uID'): | |
count += 1 | |
ax = fig.add_subplot(num_rows, num_cols, count) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ArrayJob(): | |
def __init__(self): | |
self._epilogue = "eval ${cmd[$PBS_ARRAYID]}" | |
def _prologue(self, name, count, run_dir, ppn=1, walltime=8): | |
return """#!/bin/bash | |
#PBS -N {0} | |
#PBS -l nodes=1:ppn={3} | |
#PBS -o {0}.out | |
#PBS -e {0}.err |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ArrayJob(): | |
def __init__(self): | |
self._epilogue = "eval ${cmd[$PBS_ARRAYID]}" | |
def _prologue(self, name, count, run_dir, ppn=1, walltime=8): | |
return """#!/bin/bash | |
#PBS -N {0} | |
#PBS -l nodes=1:ppn={3} | |
#PBS -o {0}.out | |
#PBS -e {0}.err |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ArrayJob(): | |
def __init__(self): | |
self._epilogue = "eval ${cmd[$PBS_ARRAYID]}" | |
def _prologue(self, name, count, run_dir, ppn=1, walltime=8): | |
return """#!/bin/bash | |
#PBS -N {0} | |
#PBS -l nodes=1:ppn={3} | |
#PBS -o {0}.out | |
#PBS -e {0}.err |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pdf(data, bins=50): | |
data = np.array(data, dtype=float) | |
minimum = np.min(data) - .000001 | |
maximum = np.max(data) + .000001 | |
pos = np.linspace(minimum, maximum, bins + 1) | |
xs = np.linspace(minimum, maximum, bins + 1)[:-1] | |
ys = np.linspace(minimum, maximum, bins + 1)[1:] | |
pdf = np.ndarray(shape=(bins + 1, 1)) | |
pdf[0] = 0 | |
for i, (x, y) in enumerate(zip(xs, ys)): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
mouse_gene_id_names = pd.read_table("/nas3/gpratt/Dropbox/TAF15/Data/mouse_integration/mouse_gene_id_to_names.txt", index_col=0) | |
human_mouse_genes = pd.read_table("/nas3/gpratt/projects/taf15/mouse_human_genes.txt", index_col=2) | |
known_rbps = pd.read_excel("nrg3813-s3.xls", "RBP table", index_col=2) | |
known_tfs = pd.read_excel("nrg3813-s4.xls", "human TFs", index_col=1) | |
known_tfs['gene_id'] = known_tfs.index | |
known_rbps['gene_id'] = known_rbps.index |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def flip_hex_value(hex_value): | |
has = "" | |
if hex_value.startswith("#"): | |
has = "#" | |
hex_value = hex_value.lstrip("#") | |
bits = bin(int(hex_value, 16))[2:] | |
flipped = "".join(["1" if bool(int(bit)) ^ True else "0" for bit in bits]) | |
return has + "{0:0>{width}x}".format(int(flipped, 2), width=len(hex_value)) | |
flip_hex_value("#262626") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def counts_to_rpkm(featureCountsTable): | |
counts = featureCountsTable.ix[:,5:] | |
lengths = featureCountsTable['Length'] | |
mapped_reads = counts.sum() | |
return (counts * pow(10,9)).div(mapped_reads, axis=1).div(lengths, axis=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_go_enrichment(df, filter_value=None, **kwargs): | |
new_index = [] | |
for index, description in izip(df.index, df['GO Term Description']): | |
new_index.append(list(index[:-1]) + [description]) | |
df.index = pd.MultiIndex.from_tuples(new_index) | |
go_matrix = df['Bonferroni-corrected Hypergeometric p-Value'].apply(lambda x: -1 * np.log10(x)) | |
go_matrix = go_matrix.unstack(range(len(go_matrix.index.levels) - 1)) | |
go_matrix = go_matrix.fillna(0) |
NewerOlder