Last active
July 6, 2017 21:29
-
-
Save indraniel/f4c608bd7362d86f08131a086cab3253 to your computer and use it in GitHub Desktop.
example to pass bash associative arrays to functions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Example usage: bash get-passing-line-in-vcf.sh /gscmnt/gc2758/analysis/ccdg_gatk_callsets/eocad_aa_2017.05.18/post-vqsr/12-concat-vcfs | |
# programs | |
BCFTOOLS=/gscmnt/gc2802/halllab/idas/software/local/bin/bcftools1.4 | |
TABIX=/gscmnt/gc2802/halllab/idas/software/local/bin/tabix | |
SORT=/gscmnt/gc2802/halllab/idas/software/local/bin/lh3-sort | |
# data | |
B38_FAI=/gscmnt/gc2802/halllab/ccdg_resources/genomes/human/GRCh38DH/all_sequences.fa.fai | |
function get_chrom_lengths { | |
local -A lengths | |
while IFS=$'\t' read -r chrom size; do | |
lengths[${chrom}]=${size} | |
done < <(head -n 24 ${B38_FAI} | cut -f 1,2) | |
declare -p lengths | sed -e 's/declare -A \w\+=//' | |
} | |
function count_passing_variants { | |
local region=$1 | |
local vcf=$2 | |
${BCFTOOLS} query -r ${region} -f '%POS\t%FILTER\n' ${vcf} | grep PASS | wc -l | |
} | |
function get_final_passing_variants { | |
local region=$1 | |
local vcf=$2 | |
${BCFTOOLS} query -r ${region} -f '%POS\t%FILTER\n' ${vcf} | grep PASS | tail -n 1 | |
} | |
function get_last_entry { | |
local chrom=$1 | |
local max=$2 | |
local vcf=$3 | |
local step=10000 | |
local start=$(( ${max} - ${step} )) | |
region=${chrom}:${start}-${max} | |
local count=$(count_passing_variants ${region} ${vcf}) | |
while [[ ${count} == 0 ]]; do | |
start=$(( ${start} - ${step} )) | |
region=${chrom}:${start}-${max} | |
count=$(count_passing_variants ${region} ${vcf}) | |
# echo '============' | |
# ${BCFTOOLS} query -r ${region} -f '%POS\t%FILTER\n' ${vcf} | |
# echo '============' | |
done | |
local out=$(get_final_passing_variants ${region} ${vcf}) | |
printf "chrom : %s | last-position : %s\n" "${chrom}" "${out}" | |
} | |
function main { | |
local vcfdir=$1 | |
local lengths_string=$(get_chrom_lengths) | |
eval "declare -A lengths=${lengths_string}" | |
local -a chroms=($(for k in "${!lengths[@]}"; do echo ${k}; done | ${SORT} -N)) | |
for c in ${chroms[@]}; do | |
local vcf=${vcfdir}/${c}/concatenated.c${c}.vcf.gz | |
get_last_entry ${c} ${lengths[${c}]} ${vcf} | |
done | |
} | |
main "$@"; | |
# declare -p NAME : display the attributes and values of each NAME | |
# https://stackoverflow.com/questions/10582763/how-to-return-an-array-in-bash-without-using-globals |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment