Skip to content

Instantly share code, notes, and snippets.

@sxv
Last active August 10, 2016 20:01
Show Gist options
  • Save sxv/fefc9bb48e6c15815851 to your computer and use it in GitHub Desktop.
Save sxv/fefc9bb48e6c15815851 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
for f in *.vcf; do \
gawk 'BEGIN{FS="\t"; OFS="\t"; } # separator=tab
/CHROM/{
## for(i=0; i<NF; i++){ if($i=="FORMAT"){ samples=(NF-i) } } # todo
if($(NF-2)~/FORMAT/){
samples=2; # two samples?
if($(NF)~/[nN]$/ || $(NF)~/normal$/){ swap=1 } # is order t/n?
}
}
{ if(swap>0){ # swap t/n -> n/t
t=$(NF-1);
$(NF-1)=$NF;
$NF=t
}
if($1~/#CHROM/ || $7~/PASS/){ print; next } # filter out REJECTs
if($(NF-samples)~/^GT:AD/){ # freebayes
split($(NF),a,":");
depth=a[4];
if(depth>=30){ print };
}
if($(NF-samples)~/^GT:AO/){ # vardict
split($(NF),a,":");
depth=a[3];
if(depth>=30){ print }
}
}' $f > "0.$f" ;
done
# Annotate with annovar
for f in 0.*vcf; do
perl /media/data3tb_2/annovar/table_annovar.pl \
-vcfinput $f \
/media/data3tb_2/annovar/humandb/ \
-buildver hg19 \
-protocol refGene,ensGene,gerp++gt2,clinvar_20150330,popfreq_all_20150413,cosmic70,snp129,snp132,snp138,avsift,caddgt20 \
-operation g,g,f,f,f,f,f,f,f,f,f \
-otherinfo \
; done
# Insert the case name as column 1 (shift all columns right)
# Combine all NOT "synonymous" and (exonic or splicing) variants into a new master file
gawk 'BEGIN{FS="\t"; OFS="\t"};
{ if(NR==1){ print } }
{ if($9~/^syn/){ next } }
{ if($18>0.1){ next } } # remove if popfreq_all > 10%
{ split(FILENAME,a,".vcf"); split(a[1],b,"."); $0=b[2]"\t"$0 }
{ if($7~/^exonic/ || $7~/^splicing/){ print } }
' *multianno.txt > all.n0.exo.txt
python ./genesort.py > all.final.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment