Created
July 1, 2011 03:54
-
-
Save mjclark/1057839 to your computer and use it in GitHub Desktop.
Calculate mean heterozygous allele balance from VCF4 files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Automatically calculates mean heterozygous allele balance for heterozygous alleles in VCF4 files | |
#Allele balance must be pre-caluclated and defined for each variant in advance and given the INFO ID flag "AB" (default for GATK, etc.) | |
usage="usage: sh mean_allele_balance.sh <input.vcf>"; | |
if (( $# != 1 )); then echo $usage; exit; fi | |
#input | |
VCF=$1; | |
#calculations | |
noLowQual=`grep -v ^# $VCF | grep -v "LowQual" | grep "AB=" | awk '{print $8}' | awk -F ';' '{print $1}' | awk -F '=' '{print $2}' | awk '{SUM+=$1; TOT++} END {print SUM/TOT}'`; | |
PassOnly=`grep -v ^# $VCF | grep "PASS" | grep "AB=" | awk '{print $8}' | awk -F ';' '{print $1}' | awk -F '=' '{print $2}' | awk '{SUM+=$1; TOT++} END {print SUM/TOT}'`; | |
#output | |
echo "Mean allele balance for all non-\"LowQual\" variants:"; | |
echo "$noLowQual"; | |
echo "Mean allele balance for all \"PASS\" variants:"; | |
echo "$PassOnly"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment