Last active
October 21, 2015 07:56
-
-
Save afrendeiro/5d2191290677e3bd79dd to your computer and use it in GitHub Desktop.
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from argparse import ArgumentParser | |
import pyBigWig | |
import numpy as np | |
import multiprocessing | |
import parmap | |
""" | |
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files. | |
""" | |
def add_args(parser): | |
parser.add_argument(dest="input_file", | |
help="Tab-delimited file with the path of a bigWig file per line.") | |
parser.add_argument("-o", "--output-prefix", dest="output_prefix", default="quantilized", | |
help="Prefix of output files. Default=quantilized_{quantile}.bigWig") | |
parser.add_argument("-m", "--no-mean", dest="mean", action="store_false", default=True, | |
help="Should we not produce data for the mean signal? Default=False") | |
parser.add_argument("-q", "--quantiles", dest="quantiles", nargs='+', type=int, default=[5, 25, 75, 95], | |
help="Which quantiles should we produce data for? Default=5,25,75,95") | |
return parser | |
def get_quantilized_signal(chrom, bigwigs, quantiles, mean_signal=True): | |
# get handles of input bigwigs | |
bigwig_handles = [pyBigWig.open(bigwig) for bigwig in bigwigs] | |
# get handles for output bedgraphs | |
if mean_signal: | |
bedgraph_handle_mean = open("%s_mean.bedgraph" % chrom, "w") | |
bedgraph_handles_quantiles = [open("%s_%i_quantile.bedgraph" % (chrom, quantile), "w") for quantile in quantiles] | |
# get size of this chromosome | |
size = bigwig_handles[0].chroms()[chrom] | |
i = 0 | |
while i < size: | |
# retrieve values from bigwigs | |
values = [handle.values(chrom, i, i + 1)[0] for handle in bigwig_handles] | |
if mean_signal: | |
# compute mean | |
mean = np.nan_to_num(np.mean(values)) | |
# write to disk | |
bedgraph_handle_mean.write("\t".join([chrom, str(i), str(i + 1), str(mean)]) + "\n") | |
for j, quantile in enumerate(quantiles): | |
# compute quantiles | |
quantilized = np.nan_to_num(np.percentile(values, quantile)) | |
# write to disk | |
bedgraph_handles_quantiles[j].write("\t".join([chrom, str(i), str(i + 1), str(quantilized)]) + "\n") | |
# increment | |
i += 1 | |
# close input file handles | |
[handle.close() for handle in bigwig_handles] | |
# close output file handles | |
if mean_signal: | |
bedgraph_handle_mean.close() | |
[handle.close() for handle in bedgraph_handles_quantiles] | |
def main(): | |
# Parse command-line arguments | |
parser = ArgumentParser( | |
prog="quantilize_bigwigs.py", | |
description="Produce bigWig files with the quantiles/mean of signal across a number of bigWig files." | |
) | |
parser = add_args(parser) | |
args = parser.parse_args() | |
# Parse input file | |
with open(args.input_file, "r") as handle: | |
inputs = handle.readlines() | |
inputs = [line.strip() for line in inputs] | |
# get bigwig chromosomes of the first sample (assuming they'd be equal for all samples) | |
bw = pyBigWig.open(inputs[0].bigwig) | |
chromosomes = bw.chroms().keys() | |
bw.close() | |
# filter out "random" chromosomes | |
chromosomes = [chrom for chrom in chromosomes if len(chrom) < 6] | |
# filter out mitochondrial | |
chromosomes.pop(chromosomes.index("chrM")) | |
if len(args.quantiles) > 0 or args.mean: | |
# get signal for each chromosome in parallel | |
parmap.map(get_quantilized_signal, chromosomes, inputs, args.quantiles, args.mean) | |
# concatenate bedgraph files in order | |
for quantile in args.quantile: | |
output_bedgraph = args.output_prefix + "_%i" % quantile + ".bedgraph" | |
# convert to bigwig | |
for quantile in args.quantile: | |
output_bigwig = args.output_prefix + "_%i" % quantile + ".bigWig" | |
else: | |
# no data to produce, end | |
print("Nothing to do. Select quantiles or turn -no--mean flag off.") | |
if __name__ == '__main__': | |
try: | |
sys.exit(main()) | |
except KeyboardInterrupt: | |
print("Program canceled by user!") | |
sys.exit(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
numpy | |
parmap | |
-e git://github.com/dpryan79/pyBigWig.git#egg=pyBigWig |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment