opplatek/deeptools-heatmap-fast.sh

## deeptools-heatmap-fast.sh
#!/bin/bash
#
# Speed up deepTools computeMatrix by splitting the references into smaller chunks and then merging the matrices together
#

positions=5000
threads=12

rnd=$RANDOM

# split reference into chunks by number of lines
split -l $positions ref.bed ref.chunks${rnd}

for chunk in ref.chunks${rnd}*; do
  # Rename name column (4) in bed to avoid potential problems which deepTools naming which might happen if the reference position name are not unique
  name=$(basename $chunk)
  name=${name##*.}

  cat $chunk | awk -v name=$name 'BEGIN {FS = "\t"; OFS = "\t"} {print $1,$2,$3,name,$5,$6}' > tmp.$rnd && mv tmp.$rnd $chunk
done

# calculate matrix for each chunk
for chunk in ref.chunks${rnd}*; do
  computeMatrix reference-point \
    --referencePoint TSS \
    -R $chunk \
    -S input.bw \
    -b 500 -a 500 \
    --skipZeros \
    --missingDataAsZero \
    --binSize 10 \
    --averageTypeBins median \
    --numberOfProcessors $threads \
    --outFileName ${chunk}.gz
done

# merge the chunks back to one file
computeMatrixOperations rbind -m ref.chunks${rnd}*.gz -o ref.matrix.gz && rm ref.chunks${rnd}*.gz

# make heatmaps
plotHeatmap \
  -m ref.matrix.gz \
  --sortUsing mean \
  --averageTypeSummaryPlot mean \
  --missingDataColor "#440154" \
  --colorMap viridis \
  --zMax 100 \
  --linesAtTickMarks \
  --refPointLabel "TSS" \
  --heatmapHeight 20 \
  --heatmapWidth 10 \
  --dpi 300 \
  --outFileName ref.png

rm ref.chunks${rnd}*
	#!/bin/bash
	#
	# Speed up deepTools computeMatrix by splitting the references into smaller chunks and then merging the matrices together
	#

	positions=5000
	threads=12

	rnd=$RANDOM

	# split reference into chunks by number of lines
	split -l $positions ref.bed ref.chunks${rnd}

	for chunk in ref.chunks${rnd}*; do
	# Rename name column (4) in bed to avoid potential problems which deepTools naming which might happen if the reference position name are not unique
	name=$(basename $chunk)
	name=${name##*.}

	cat $chunk \| awk -v name=$name 'BEGIN {FS = "\t"; OFS = "\t"} {print $1,$2,$3,name,$5,$6}' > tmp.$rnd && mv tmp.$rnd $chunk
	done

	# calculate matrix for each chunk
	for chunk in ref.chunks${rnd}*; do
	computeMatrix reference-point \
	--referencePoint TSS \
	-R $chunk \
	-S input.bw \
	-b 500 -a 500 \
	--skipZeros \
	--missingDataAsZero \
	--binSize 10 \
	--averageTypeBins median \
	--numberOfProcessors $threads \
	--outFileName ${chunk}.gz
	done

	# merge the chunks back to one file
	computeMatrixOperations rbind -m ref.chunks${rnd}.gz -o ref.matrix.gz && rm ref.chunks${rnd}.gz

	# make heatmaps
	plotHeatmap \
	-m ref.matrix.gz \
	--sortUsing mean \
	--averageTypeSummaryPlot mean \
	--missingDataColor "#440154" \
	--colorMap viridis \
	--zMax 100 \
	--linesAtTickMarks \
	--refPointLabel "TSS" \
	--heatmapHeight 20 \
	--heatmapWidth 10 \
	--dpi 300 \
	--outFileName ref.png

	rm ref.chunks${rnd}*