Created
May 12, 2018 15:37
-
-
Save indraniel/36188480442bf6d9c4467fb6d3f1b558 to your computer and use it in GitHub Desktop.
bash template to reheader a VCF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -eo pipefail | |
# http://stackoverflow.com/questions/9893667/is-there-a-way-to-write-a-bash-function-which-aborts-the-whole-execution-no-mat | |
trap "exit 1" TERM | |
export TOP_PID=$$ | |
BGZIP=/gscmnt/gc2802/halllab/idas/software/local/bin/bgzip | |
TABIX=/gscmnt/gc2802/halllab/idas/software/local/bin/tabix | |
BCFTOOLS=/gscmnt/gc2802/halllab/idas/software/local/bin/bcftools1.4 | |
function join_by { local IFS="$1"; shift; echo "$*"; } | |
function die { | |
local timestamp=$(date +"%Y-%m-%d %T") | |
echo "[ ${timestamp} ] ERROR: $@" >&2 | |
kill -s TERM ${TOP_PID} | |
} | |
function log { | |
local timestamp=$(date +"%Y-%m-%d %T") | |
echo "---> [ ${timestamp} ] $@" >&2 | |
} | |
function run_cmd { | |
local cmd=$1 | |
log "EXEC: ${cmd}" | |
eval "${cmd}" | |
if [[ $? -ne 0 ]]; then | |
die "[err] Problem running command: ${cmd} !" | |
exit 1; | |
fi | |
} | |
function tabix_and_finalize_vcf { | |
local tmpvcf=$1 | |
local finalvcf=$2 | |
local cmd=" | |
${TABIX} -p vcf -f ${tmpvcf} \ | |
&& mv ${tmpvcf}.tbi ${finalvcf}.tbi \ | |
&& mv ${tmpvcf} ${finalvcf} | |
" | |
run_cmd "${cmd}" | |
} | |
function vcf_reheader { | |
local header_vcf=$1 | |
local content_vcf=$2 | |
log "(vcf_reheader) calling bcftools subprocess" | |
local cmd=" | |
cat <(${BCFTOOLS} view -h ${header_vcf} | head -n -1) \ | |
<(${BCFTOOLS} view -h ${content_vcf} | tail -n 1) \ | |
<(${BCFTOOLS} view -H ${content_vcf}) | |
" | |
run_cmd "${cmd}" | |
} | |
function update_vcf { | |
local header_vcf=$1 | |
local input_vcf=$2 | |
local output_vcf=$3 | |
if [[ -e "${output_vcf}" ]]; then | |
log "shortcutting update_vcf" | |
return 0; | |
fi | |
local tmpvcf=${output_vcf}.tmp | |
local cmd1=" | |
vcf_reheader ${header_vcf} ${input_vcf} \ | |
| ${BGZIP} -c \ | |
> ${tmpvcf} | |
" | |
run_cmd "${cmd1}" | |
tabix_and_finalize_vcf ${tmpvcf} ${output_vcf} | |
} | |
function main { | |
local header_vcf=/gscmnt/gc2758/analysis/ccdg_gatk_callsets/t1d_2017.03.31/post-vqsr/7-vep-cadd-annotation/22:20000001-30000000/annotated.vep.cadd.c22:20000001-30000000.vcf.gz | |
local input_vcf=$1 | |
local output_vcf=$2 | |
update_vcf $header_vcf $input_vcf $output_vcf | |
} | |
INPUT_VCF=$1 | |
OUTPUT_VCF=$2 | |
main $INPUT_VCF $OUTPUT_VCF ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment