Skip to content

Instantly share code, notes, and snippets.

@odinokov
Last active April 5, 2023 03:32
Show Gist options
  • Save odinokov/2612dc6131b79c8686d94989ae460719 to your computer and use it in GitHub Desktop.
Save odinokov/2612dc6131b79c8686d94989ae460719 to your computer and use it in GitHub Desktop.
counts size of cfDNA fragments in a specific region
#!/bin/bash
set -euo pipefail
# Define software dependencies
declare -ra deps=("samtools" "bedtools")
# Check that required software dependencies are installed
for dep in "${deps[@]}"
do
command -v "$dep" >/dev/null 2>&1 || { echo >&2 "Error: $dep is required but not installed. Aborting."; exit 1; }
done
cfdna_size_count() {
local FILE="$1"
local GENOMIC_REGION="$2"
# Check if the input file exists and is a BAM file
if ! [[ -f "${FILE}" && "${FILE}" =~ \.bam$ ]]; then
echo "Error: Input file '${FILE}' not found or is not a BAM file"
return 1
fi
# Check if GENOMIC_REGION has a valid format as chr:start-end
if ! [[ $GENOMIC_REGION =~ ^[[:alnum:]]+:[[:digit:]]+-[[:digit:]]+$ ]]; then
echo "Error: Invalid genomic region format. Please provide in the format as chr:start-end"
return 1
fi
GENOMIC_REGION_NAME=$(echo $GENOMIC_REGION | sed 's/:/_/; s/-/_/')
# Run the samtools command and pipe to bedtools bamtobed and awk
samtools view -b -h -q 5 -f 3 -F 3852 -G 48 --incl-flags 48 "${FILE}" "$GENOMIC_REGION" \
| bedtools bamtobed -bedpe -mate1 -i /dev/stdin 2>/dev/null \
| awk -F'\t' -v OFS="\t" '{if ($1!=$4) next; if ($9=="+") {s=$2;e=$6} else {s=$5;e=$3} if (e>s) print $1,s,e,$7,$8,$9}' \
| awk '{dups[$3-$2]++} END{for (num in dups) {print num,dups[num]}}' \
| sort -V \
| tee "${FILE}.${GENOMIC_REGION_NAME}.size_count.out" >/dev/null
# Check if the output is empty
if [ ! -s "${FILE}.${GENOMIC_REGION_NAME}.size_count.out" ]; then
echo "Error: Output file is empty"
rm -f "${FILE}.out"
return 1
fi
return 0
}
# Run cfdna_size_count function
cfdna_size_count "$@"
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment