Skip to content

Instantly share code, notes, and snippets.

@indraniel
Last active July 6, 2017 21:29
Show Gist options
  • Save indraniel/f4c608bd7362d86f08131a086cab3253 to your computer and use it in GitHub Desktop.
Save indraniel/f4c608bd7362d86f08131a086cab3253 to your computer and use it in GitHub Desktop.
example to pass bash associative arrays to functions
#!/bin/bash
# Example usage: bash get-passing-line-in-vcf.sh /gscmnt/gc2758/analysis/ccdg_gatk_callsets/eocad_aa_2017.05.18/post-vqsr/12-concat-vcfs
# programs
BCFTOOLS=/gscmnt/gc2802/halllab/idas/software/local/bin/bcftools1.4
TABIX=/gscmnt/gc2802/halllab/idas/software/local/bin/tabix
SORT=/gscmnt/gc2802/halllab/idas/software/local/bin/lh3-sort
# data
B38_FAI=/gscmnt/gc2802/halllab/ccdg_resources/genomes/human/GRCh38DH/all_sequences.fa.fai
function get_chrom_lengths {
local -A lengths
while IFS=$'\t' read -r chrom size; do
lengths[${chrom}]=${size}
done < <(head -n 24 ${B38_FAI} | cut -f 1,2)
declare -p lengths | sed -e 's/declare -A \w\+=//'
}
function count_passing_variants {
local region=$1
local vcf=$2
${BCFTOOLS} query -r ${region} -f '%POS\t%FILTER\n' ${vcf} | grep PASS | wc -l
}
function get_final_passing_variants {
local region=$1
local vcf=$2
${BCFTOOLS} query -r ${region} -f '%POS\t%FILTER\n' ${vcf} | grep PASS | tail -n 1
}
function get_last_entry {
local chrom=$1
local max=$2
local vcf=$3
local step=10000
local start=$(( ${max} - ${step} ))
region=${chrom}:${start}-${max}
local count=$(count_passing_variants ${region} ${vcf})
while [[ ${count} == 0 ]]; do
start=$(( ${start} - ${step} ))
region=${chrom}:${start}-${max}
count=$(count_passing_variants ${region} ${vcf})
# echo '============'
# ${BCFTOOLS} query -r ${region} -f '%POS\t%FILTER\n' ${vcf}
# echo '============'
done
local out=$(get_final_passing_variants ${region} ${vcf})
printf "chrom : %s | last-position : %s\n" "${chrom}" "${out}"
}
function main {
local vcfdir=$1
local lengths_string=$(get_chrom_lengths)
eval "declare -A lengths=${lengths_string}"
local -a chroms=($(for k in "${!lengths[@]}"; do echo ${k}; done | ${SORT} -N))
for c in ${chroms[@]}; do
local vcf=${vcfdir}/${c}/concatenated.c${c}.vcf.gz
get_last_entry ${c} ${lengths[${c}]} ${vcf}
done
}
main "$@";
# declare -p NAME : display the attributes and values of each NAME
# https://stackoverflow.com/questions/10582763/how-to-return-an-array-in-bash-without-using-globals
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment