Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
import sys
from argparse import ArgumentParser
import pyBigWig
import numpy as np
import multiprocessing
import parmap
"""
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
"""
def add_args(parser):
parser.add_argument(dest="input_file",
help="Tab-delimited file with the path of a bigWig file per line.")
parser.add_argument("-o", "--output-prefix", dest="output_prefix", default="quantilized",
help="Prefix of output files. Default=quantilized_{quantile}.bigWig")
parser.add_argument("-m", "--no-mean", dest="mean", action="store_false", default=True,
help="Should we not produce data for the mean signal? Default=False")
parser.add_argument("-q", "--quantiles", dest="quantiles", nargs='+', type=int, default=[5, 25, 75, 95],
help="Which quantiles should we produce data for? Default=5,25,75,95")
return parser
def get_quantilized_signal(chrom, bigwigs, quantiles, mean_signal=True):
# get handles of input bigwigs
bigwig_handles = [pyBigWig.open(bigwig) for bigwig in bigwigs]
# get handles for output bedgraphs
if mean_signal:
bedgraph_handle_mean = open("%s_mean.bedgraph" % chrom, "w")
bedgraph_handles_quantiles = [open("%s_%i_quantile.bedgraph" % (chrom, quantile), "w") for quantile in quantiles]
# get size of this chromosome
size = bigwig_handles[0].chroms()[chrom]
i = 0
while i < size:
# retrieve values from bigwigs
values = [handle.values(chrom, i, i + 1)[0] for handle in bigwig_handles]
if mean_signal:
# compute mean
mean = np.nan_to_num(np.mean(values))
# write to disk
bedgraph_handle_mean.write("\t".join([chrom, str(i), str(i + 1), str(mean)]) + "\n")
for j, quantile in enumerate(quantiles):
# compute quantiles
quantilized = np.nan_to_num(np.percentile(values, quantile))
# write to disk
bedgraph_handles_quantiles[j].write("\t".join([chrom, str(i), str(i + 1), str(quantilized)]) + "\n")
# increment
i += 1
# close input file handles
[handle.close() for handle in bigwig_handles]
# close output file handles
if mean_signal:
bedgraph_handle_mean.close()
[handle.close() for handle in bedgraph_handles_quantiles]
def main():
# Parse command-line arguments
parser = ArgumentParser(
prog="quantilize_bigwigs.py",
description="Produce bigWig files with the quantiles/mean of signal across a number of bigWig files."
)
parser = add_args(parser)
args = parser.parse_args()
# Parse input file
with open(args.input_file, "r") as handle:
inputs = handle.readlines()
inputs = [line.strip() for line in inputs]
# get bigwig chromosomes of the first sample (assuming they'd be equal for all samples)
bw = pyBigWig.open(inputs[0].bigwig)
chromosomes = bw.chroms().keys()
bw.close()
# filter out "random" chromosomes
chromosomes = [chrom for chrom in chromosomes if len(chrom) < 6]
# filter out mitochondrial
chromosomes.pop(chromosomes.index("chrM"))
if len(args.quantiles) > 0 or args.mean:
# get signal for each chromosome in parallel
parmap.map(get_quantilized_signal, chromosomes, inputs, args.quantiles, args.mean)
# concatenate bedgraph files in order
for quantile in args.quantile:
output_bedgraph = args.output_prefix + "_%i" % quantile + ".bedgraph"
# convert to bigwig
for quantile in args.quantile:
output_bigwig = args.output_prefix + "_%i" % quantile + ".bigWig"
else:
# no data to produce, end
print("Nothing to do. Select quantiles or turn -no--mean flag off.")
if __name__ == '__main__':
try:
sys.exit(main())
except KeyboardInterrupt:
print("Program canceled by user!")
sys.exit(1)
numpy
parmap
-e git://github.com/dpryan79/pyBigWig.git#egg=pyBigWig
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment