Skip to content

Instantly share code, notes, and snippets.

@indraniel
Created May 12, 2018 15:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save indraniel/36188480442bf6d9c4467fb6d3f1b558 to your computer and use it in GitHub Desktop.
Save indraniel/36188480442bf6d9c4467fb6d3f1b558 to your computer and use it in GitHub Desktop.
bash template to reheader a VCF
#!/bin/bash
set -eo pipefail
# http://stackoverflow.com/questions/9893667/is-there-a-way-to-write-a-bash-function-which-aborts-the-whole-execution-no-mat
trap "exit 1" TERM
export TOP_PID=$$
BGZIP=/gscmnt/gc2802/halllab/idas/software/local/bin/bgzip
TABIX=/gscmnt/gc2802/halllab/idas/software/local/bin/tabix
BCFTOOLS=/gscmnt/gc2802/halllab/idas/software/local/bin/bcftools1.4
function join_by { local IFS="$1"; shift; echo "$*"; }
function die {
local timestamp=$(date +"%Y-%m-%d %T")
echo "[ ${timestamp} ] ERROR: $@" >&2
kill -s TERM ${TOP_PID}
}
function log {
local timestamp=$(date +"%Y-%m-%d %T")
echo "---> [ ${timestamp} ] $@" >&2
}
function run_cmd {
local cmd=$1
log "EXEC: ${cmd}"
eval "${cmd}"
if [[ $? -ne 0 ]]; then
die "[err] Problem running command: ${cmd} !"
exit 1;
fi
}
function tabix_and_finalize_vcf {
local tmpvcf=$1
local finalvcf=$2
local cmd="
${TABIX} -p vcf -f ${tmpvcf} \
&& mv ${tmpvcf}.tbi ${finalvcf}.tbi \
&& mv ${tmpvcf} ${finalvcf}
"
run_cmd "${cmd}"
}
function vcf_reheader {
local header_vcf=$1
local content_vcf=$2
log "(vcf_reheader) calling bcftools subprocess"
local cmd="
cat <(${BCFTOOLS} view -h ${header_vcf} | head -n -1) \
<(${BCFTOOLS} view -h ${content_vcf} | tail -n 1) \
<(${BCFTOOLS} view -H ${content_vcf})
"
run_cmd "${cmd}"
}
function update_vcf {
local header_vcf=$1
local input_vcf=$2
local output_vcf=$3
if [[ -e "${output_vcf}" ]]; then
log "shortcutting update_vcf"
return 0;
fi
local tmpvcf=${output_vcf}.tmp
local cmd1="
vcf_reheader ${header_vcf} ${input_vcf} \
| ${BGZIP} -c \
> ${tmpvcf}
"
run_cmd "${cmd1}"
tabix_and_finalize_vcf ${tmpvcf} ${output_vcf}
}
function main {
local header_vcf=/gscmnt/gc2758/analysis/ccdg_gatk_callsets/t1d_2017.03.31/post-vqsr/7-vep-cadd-annotation/22:20000001-30000000/annotated.vep.cadd.c22:20000001-30000000.vcf.gz
local input_vcf=$1
local output_vcf=$2
update_vcf $header_vcf $input_vcf $output_vcf
}
INPUT_VCF=$1
OUTPUT_VCF=$2
main $INPUT_VCF $OUTPUT_VCF ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment