Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save timothymillar/121ddb643e134d06e36a113126e988a0 to your computer and use it in GitHub Desktop.
Save timothymillar/121ddb643e134d06e36a113126e988a0 to your computer and use it in GitHub Desktop.
Example VCFs with fields of length G
##fileformat=VCFv4.3
##fileDate=20210420
##source=mchap v0.4.2
##phasing=None
##commandline="mchap assemble --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --targets simple.bed.gz --variants simple.vcf.gz --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --genotype-likelihoods"
##randomseed=11
##contig=<ID=CHR1,length=60>
##contig=<ID=CHR2,length=60>
##contig=<ID=CHR3,length=60>
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Combined depth across samples">
##INFO=<ID=RCOUNT,Number=1,Type=Integer,Description="Total number of observed reads across all samples">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position on CHROM">
##INFO=<ID=NVAR,Number=1,Type=Integer,Description="Number of input variants within assembly locus">
##INFO=<ID=SNVPOS,Number=.,Type=Integer,Description="Relative (1-based) positions of SNVs within haplotypes">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
##FORMAT=<ID=PHQ,Number=1,Type=Integer,Description="Phenotype quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=RCOUNT,Number=1,Type=Integer,Description="Total count of read pairs within haplotype interval">
##FORMAT=<ID=RCALLS,Number=1,Type=Integer,Description="Total count of read base calls matching a known variant">
##FORMAT=<ID=MEC,Number=1,Type=Integer,Description="Minimum error correction">
##FORMAT=<ID=KMERCOV,Number=3,Type=Float,Description="Minimum proportion of read-SNV 1-, 2-, and 3-mers found in genotype at any position.">
##FORMAT=<ID=GPM,Number=1,Type=Float,Description="Genotype posterior mode probability">
##FORMAT=<ID=PHPM,Number=1,Type=Float,Description="Phenotype posterior mode probability">
##FORMAT=<ID=MCI,Number=1,Type=Integer,Description="Replicate Markov-chain incongruence, 0 = none, 1 = incongruence, 2 = putative CNV">
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype likelihoods">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 SAMPLE3
CHR1 6 CHR1_05_25 AAAAAAAAAAAAAAAAAAAA AAAAAAAAAAGAAAAAATAA,ACAAAAAAAAGAAAAAACAA . . AN=3;AC=3,2;NS=3;DP=159;RCOUNT=240;END=25;NVAR=3;SNVPOS=2,11,18 GT:GQ:PHQ:DP:RCOUNT:RCALLS:MEC:KMERCOV:GPM:PHPM:MCI:GL 0/0/1/2:12:60:13:20:40:0:1,1,1:0.941:1:0:-98.588,-39.531,-38.582,-40.008,-120.497,-38.327,-9.031,-10.536,-91.769,-37.98,-10.536,-91.848,-39.758,-93.677,-142.405 0/0/1/1:60:60:133:200:400:0:1,1,1:1:1:0:-876.34,-72.701,-60.208,-72.701,-876.34,-693.832,-90.31,-90.311,-893.832,-699.4,-120.413,-918.484,-722.459,-960.628,-1752.679 0/0/0/2:10:22:13:20:40:0:1,1,1:0.896:0.994:0:-54.771,-37.145,-38.582,-42.394,-164.314,-4.885,-7.526,-12.041,-135.211,-6.021,-10.536,-134.762,-9.656,-135.688,-164.314
CHR1 31 CHR1_30_50 AAAAAAAAAAAAAAAAAAAA . . . AN=1;AC=.;NS=3;DP=.;RCOUNT=288;END=50;NVAR=0;SNVPOS=. GT:GQ:PHQ:DP:RCOUNT:RCALLS:MEC:KMERCOV:GPM:PHPM:MCI:GL 0/0/0/0:60:60:.:24:0:0:.,.,.:1:1:0:0 0/0/0/0:60:60:.:240:0:0:.,.,.:1:1:0:0 0/0/0/0:60:60:.:24:0:0:.,.,.:1:1:0:0
CHR2 11 CHR2_10_30 AAAAAAAAAAAAAAAAAAAA AAAAAAAAAGAAAAAAAAAA,AAAAAAAAATAAAAAAAAAA,AAAATAAAAGAAAAAAAAAA . . AN=4;AC=3,2,1;NS=3;DP=168;RCOUNT=288;END=30;NVAR=2;SNVPOS=5,10 GT:GQ:PHQ:DP:RCOUNT:RCALLS:MEC:KMERCOV:GPM:PHPM:MCI:GL 0/0/0/2:7:14:14:24:28:0:1,1,.:0.812:0.962:0:-21.909,-23.408,-25.521,-29.133,-87.634,-3.908,-6.021,-9.633,-68.134,-4.817,-8.429,-66.93,-7.725,-66.225,-65.726,-23.783,-25.896,-29.508,-89.133,-6.021,-9.633,-69.258,-8.429,-68.054,-67.35,-26.424,-30.036,-91.246,-9.633,-70.843,-69.639,-30.939,-94.859,-73.552,-153.359 0/0/1/2:60:60:140:240:280:0:1,1,.:1:1:0:-438.171,-253.163,-255.209,-272.248,-657.255,-253.163,-72.248,-84.289,-467.25,-255.209,-84.289,-462.253,-272.248,-467.25,-657.255,-403.163,-267.988,-281.279,-672.248,-218.5,-93.32,-480.029,-230.541,-478.78,-620.998,-419.523,-295.592,-693.379,-239.572,-499.626,-635.597,-450.875,-729.502,-663.939,-1314.509 0/1/1/3:8:21:14:24:28:0:1,1,.:0.828:0.992:0:-82.157,-24.156,-21.248,-20.339,-38.34,-82.657,-24.86,-22.452,-39.839,-83.361,-26.064,-41.952,-84.565,-45.565,-104.065,-36.975,-7.526,-6.396,-24.839,-37.679,-8.73,-26.271,-38.883,-28.605,-58.758,-35.873,-7.299,-25.521,-37.077,-27.702,-57.481,-36.021,-27.702,-57.327,-71.203
CHR3 21 CHR3_20_40 AAAAAAAAAAAAAAAAAAAA . . . AN=1;AC=.;NS=3;DP=.;RCOUNT=0;END=40;NVAR=0;SNVPOS=. GT:GQ:PHQ:DP:RCOUNT:RCALLS:MEC:KMERCOV:GPM:PHPM:MCI:GL 0/0/0/0:60:60:.:0:0:0:.,.,.:1:1:0:0 0/0/0/0:60:60:.:0:0:0:.,.,.:1:1:0:0 0/0/0/0:60:60:.:0:0:0:.,.,.:1:1:0:0
##fileformat=VCFv4.3
##fileDate=20210420
##source=mchap v0.4.2
##phasing=None
##commandline="mchap assemble --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --targets simple.bed.gz --variants simple.vcf.gz --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --genotype-posteriors"
##randomseed=11
##contig=<ID=CHR1,length=60>
##contig=<ID=CHR2,length=60>
##contig=<ID=CHR3,length=60>
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Combined depth across samples">
##INFO=<ID=RCOUNT,Number=1,Type=Integer,Description="Total number of observed reads across all samples">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position on CHROM">
##INFO=<ID=NVAR,Number=1,Type=Integer,Description="Number of input variants within assembly locus">
##INFO=<ID=SNVPOS,Number=.,Type=Integer,Description="Relative (1-based) positions of SNVs within haplotypes">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
##FORMAT=<ID=PHQ,Number=1,Type=Integer,Description="Phenotype quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=RCOUNT,Number=1,Type=Integer,Description="Total count of read pairs within haplotype interval">
##FORMAT=<ID=RCALLS,Number=1,Type=Integer,Description="Total count of read base calls matching a known variant">
##FORMAT=<ID=MEC,Number=1,Type=Integer,Description="Minimum error correction">
##FORMAT=<ID=KMERCOV,Number=3,Type=Float,Description="Minimum proportion of read-SNV 1-, 2-, and 3-mers found in genotype at any position.">
##FORMAT=<ID=GPM,Number=1,Type=Float,Description="Genotype posterior mode probability">
##FORMAT=<ID=PHPM,Number=1,Type=Float,Description="Phenotype posterior mode probability">
##FORMAT=<ID=MCI,Number=1,Type=Integer,Description="Replicate Markov-chain incongruence, 0 = none, 1 = incongruence, 2 = putative CNV">
##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype posterior probabilities">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 SAMPLE3
CHR1 6 CHR1_05_25 AAAAAAAAAAAAAAAAAAAA AAAAAAAAAAGAAAAAATAA,ACAAAAAAAAGAAAAAACAA . . AN=3;AC=3,2;NS=3;DP=159;RCOUNT=240;END=25;NVAR=3;SNVPOS=2,11,18 GT:GQ:PHQ:DP:RCOUNT:RCALLS:MEC:KMERCOV:GPM:PHPM:MCI:GP 0/0/1/2:12:60:13:20:40:0:1,1,1:0.941:1:0:0,0,0,0,0,0,0.941,0.029,0,0,0.029,0,0,0,0 0/0/1/1:60:60:133:200:400:0:1,1,1:1:1:0:0,0,1,0,0,0,0,0,0,0,0,0,0,0,0 0/0/0/2:10:22:13:20:40:0:1,1,1:0.896:0.994:0:0,0,0,0,0,0.896,0.006,0,0,0.098,0,0,0,0,0
CHR1 31 CHR1_30_50 AAAAAAAAAAAAAAAAAAAA . . . AN=1;AC=.;NS=3;DP=.;RCOUNT=288;END=50;NVAR=0;SNVPOS=. GT:GQ:PHQ:DP:RCOUNT:RCALLS:MEC:KMERCOV:GPM:PHPM:MCI:GP 0/0/0/0:60:60:.:24:0:0:.,.,.:1:1:0:1 0/0/0/0:60:60:.:240:0:0:.,.,.:1:1:0:1 0/0/0/0:60:60:.:24:0:0:.,.,.:1:1:0:1
CHR2 11 CHR2_10_30 AAAAAAAAAAAAAAAAAAAA AAAAAAAAAGAAAAAAAAAA,AAAAAAAAATAAAAAAAAAA,AAAATAAAAGAAAAAAAAAA . . AN=4;AC=3,2,1;NS=3;DP=168;RCOUNT=288;END=30;NVAR=2;SNVPOS=5,10 GT:GQ:PHQ:DP:RCOUNT:RCALLS:MEC:KMERCOV:GPM:PHPM:MCI:GP 0/0/0/2:7:14:14:24:28:0:1,1,.:0.812:0.962:0:0,0,0,0,0,0.812,0.019,0,0,0.15,0,0,0,0,0,0,0,0,0,0.019,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 0/0/1/2:60:60:140:240:280:0:1,1,.:1:1:0:0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 0/1/1/3:8:21:14:24:28:0:1,1,.:0.828:0.992:0:0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.061,0.828,0,0,0.008,0,0,0,0,0,0.103,0,0,0,0,0,0,0,0
CHR3 21 CHR3_20_40 AAAAAAAAAAAAAAAAAAAA . . . AN=1;AC=.;NS=3;DP=.;RCOUNT=0;END=40;NVAR=0;SNVPOS=. GT:GQ:PHQ:DP:RCOUNT:RCALLS:MEC:KMERCOV:GPM:PHPM:MCI:GP 0/0/0/0:60:60:.:0:0:0:.,.,.:1:1:0:1 0/0/0/0:60:60:.:0:0:0:.,.,.:1:1:0:1 0/0/0/0:60:60:.:0:0:0:.,.,.:1:1:0:1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment