Created
May 12, 2018 15:36
-
-
Save indraniel/c0462ec0a025da797e9d01a04055156c to your computer and use it in GitHub Desktop.
bash template to reheader a VCF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -eo pipefail | |
# http://stackoverflow.com/questions/9893667/is-there-a-way-to-write-a-bash-function-which-aborts-the-whole-execution-no-mat | |
trap "exit 1" TERM | |
export TOP_PID=$$ | |
BGZIP=/gscmnt/gc2802/halllab/idas/software/local/bin/bgzip | |
TABIX=/gscmnt/gc2802/halllab/idas/software/local/bin/tabix | |
BCFTOOLS=/gscmnt/gc2802/halllab/idas/software/local/bin/bcftools1.4 | |
function join_by { local IFS="$1"; shift; echo "$*"; } | |
function die { | |
local timestamp=$(date +"%Y-%m-%d %T") | |
echo "[ ${timestamp} ] ERROR: $@" >&2 | |
kill -s TERM ${TOP_PID} | |
} | |
function log { | |
local timestamp=$(date +"%Y-%m-%d %T") | |
echo "---> [ ${timestamp} ] $@" >&2 | |
} | |
function run_cmd { | |
local cmd=$1 | |
log "EXEC: ${cmd}" | |
eval "${cmd}" | |
if [[ $? -ne 0 ]]; then | |
die "[err] Problem running command: ${cmd} !" | |
exit 1; | |
fi | |
} | |
function tabix_and_finalize_vcf { | |
local tmpvcf=$1 | |
local finalvcf=$2 | |
local cmd=" | |
${TABIX} -p vcf -f ${tmpvcf} \ | |
&& mv ${tmpvcf}.tbi ${finalvcf}.tbi \ | |
&& mv ${tmpvcf} ${finalvcf} | |
" | |
run_cmd "${cmd}" | |
} | |
function vcf_reheader { | |
local header_vcf=$1 | |
local content_vcf=$2 | |
log "(vcf_reheader) calling bcftools subprocess" | |
local cmd=" | |
cat <(${BCFTOOLS} view -h ${header_vcf} | head -n -1) \ | |
<(${BCFTOOLS} view -h ${content_vcf} | tail -n 1) \ | |
<(${BCFTOOLS} view -H ${content_vcf}) | |
" | |
run_cmd "${cmd}" | |
} | |
function update_vcf { | |
local header_vcf=$1 | |
local input_vcf=$2 | |
local output_vcf=$3 | |
if [[ -e "${output_vcf}" ]]; then | |
log "shortcutting update_vcf" | |
return 0; | |
fi | |
local tmpvcf=${output_vcf}.tmp | |
local cmd1=" | |
vcf_reheader ${header_vcf} ${input_vcf} \ | |
| ${BGZIP} -c \ | |
> ${tmpvcf} | |
" | |
run_cmd "${cmd1}" | |
tabix_and_finalize_vcf ${tmpvcf} ${output_vcf} | |
} | |
function main { | |
local header_vcf=/gscmnt/gc2758/analysis/ccdg_gatk_callsets/t1d_2017.03.31/post-vqsr/7-vep-cadd-annotation/22:20000001-30000000/annotated.vep.cadd.c22:20000001-30000000.vcf.gz | |
local input_vcf=$1 | |
local output_vcf=$2 | |
update_vcf $header_vcf $input_vcf $output_vcf | |
} | |
INPUT_VCF=$1 | |
OUTPUT_VCF=$2 | |
main $INPUT_VCF $OUTPUT_VCF ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment