Skip to content

Instantly share code, notes, and snippets.

@holtgrewe
Created July 29, 2016 07:03
Show Gist options
  • Save holtgrewe/faeede09894f4bc066b29ff3330947e7 to your computer and use it in GitHub Desktop.
Save holtgrewe/faeede09894f4bc066b29ff3330947e7 to your computer and use it in GitHub Desktop.
Extract region from BAM file, keeping only paired reads, output sorted by query name, suited for remapping
#!/bin/bash
set -euo pipefail
BAM=
OUT=
print_help()
{
>&2 echo "USAGE: extract_bam.sh -i IN.bam -o OUT_PREFIX {REGIONS}+"
}
while getopts "i:o:" opt; do
case $opt in
i)
BAM=$OPTARG
;;
o)
OUT=$OPTARG
;;
h)
print_help
exit 1
;;
:)
>&2 echo "Option -$OPTARG requires an argument"
print_help
exit 1
;;
*)
>&2 echo "Invalid option: -$OPTARG"
print_help
exit 1
;;
esac
done
shift $(($OPTIND - 1))
REGIONS=$*
if [[ -z "${BAM}" ]]; then
>&2 echo "No BAM file given"
print_help
exit 1
fi
if [[ -z "${OUT}" ]]; then
>&2 echo "No OUT file given"
print_help
exit 1
fi
if [[ -z "${REGIONS}" ]]; then
>&2 echo "No REGIONS file given"
print_help
exit 1
fi
>2& echo "Processing regions $REGIONS"
module purge
module load SAMtools/1.3.1-foss-2015a
export TMPDIR=$(mktemp -d)
trap "rm -rf ${TMPDIR}" EXIT KILL TERM INT HUP
samtools view -f 1 -F 3840 -o ${TMPDIR}/extracted.bam -b ${BAM} ${REGIONS}
samtools sort -n -o ${TMPDIR}/sorted.bam ${TMPDIR}/extracted.bam
samtools view -h ${TMPDIR}/extracted.bam \
| awk -F $'\t' \
'BEGIN { OFS=FS; prev1=0; }
/^@/ { print $0; }
!/^@/ {
if ($1 == prev1) {
if (and($2, 64)) {
print $0;
print prev0;
} else {
print prev0;
print $0;
}
} else {
prev0 = $0;
prev1 = $1;
}
}' \
| samtools bam2fq \
-1 >(gzip -c > ${OUT}_1.fq.gz) \
-2 >(gzip -c > ${OUT}_2.fq.gz) \
/dev/stdin
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment