Last active
February 26, 2022 21:10
-
-
Save RandyHarr/7ccfaa250844a7374325d857f30841ef to your computer and use it in GitHub Desktop.
For fixing FTDNA version 1 BAM files that incorrectly include a space in the QNAME field
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Fixes FTDNA BAM version 1 files so can be processed by standard bioinformatic tools. | |
# Applies only to Bigy files (not needed for Bigy2 or Bigy3) | |
# | |
# This is handled behind the scenes (automagically) by WGS Extract (in the next release) | |
# Simply a stand-alone. simple scenario script installation for demonstration purposes here | |
# | |
# Relies on htslib bgzip and samtools; along with wget, python rm, zip and unzip. | |
shopt -s nullglob | |
if [ $# -ne 1 ] ; then | |
echo "Usage: $0 FTDNA_BAM_zip_file or file.bam" | |
exit | |
elif [ ! -f "$1" ] ; then | |
echo "$1 is not a file" | |
exit | |
fi | |
file="$1" | |
ext=${file##*.} | |
case $ext in | |
zip) | |
mkdir temp_for_BAM | |
cd temp_for_BAM | |
unzip ../"$1" | |
file="*.bam" | |
;; | |
bam) | |
file="$1" | |
;; | |
*) | |
echo "Unknown file type: $1" | |
exit | |
;; | |
esac | |
wget https://github.com/WGSExtract/WGSExtract-Dev/blob/master/program/fixFTDNAbam.py | |
samtools view -h "$file" | python fixFTDNAbam.py | bgzip > new.bam | |
mv new.bam "$file" | |
samtools index "$file" | |
rm fixFTDNAvcf.py | |
case $ext in | |
zip) | |
zip -f "../$1" "$file" "$file.bai" | |
rm "$file" "$file.bai"y | |
cd .. | |
rmdir temp_for_BAM | |
;; | |
*) | |
;; | |
esac |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment