Skip to content

Instantly share code, notes, and snippets.

@fstrozzi
Created February 10, 2016 11:57
Show Gist options
  • Save fstrozzi/de6730183d5110fc9de5 to your computer and use it in GitHub Desktop.
Save fstrozzi/de6730183d5110fc9de5 to your computer and use it in GitHub Desktop.
PyVCF example using Cython and BGZIP compressed output
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
import cyvcf
import Bio.bgzf
vcf_input = sys.argv[1]
vcf_name = os.path.basename(vcf_input).split(".gz")[0]
bgzip_output = Bio.bgzf.BgzfWriter(filename="{0}.filtered.gz".format(vcf_name))
vcf_reader = cyvcf.Reader(filename=(vcf_input))
vcf_writer = cyvcf.Writer(bgzip_output,vcf_reader)
for record in vcf_reader:
if record.call_rate <= 0.95:
continue
if not record.is_snp:
continue
if record.num_het / float(len(record.samples)) > 0.02:
continue
samples_cov = filter(lambda x: x["DP"] >= 8,record.samples)
if len(samples_cov) / float(len(record.samples)) <= 0.5:
continue
genotypes = map(lambda x: x.gt_type, record.samples)
print "Coord: {0} {1} {2}: {3}".format(record.CHROM,record.POS,genotypes,(record.num_hom_alt*2 + record.num_het) / float((len(record.samples)-record.num_unknown)*2))
if (record.num_hom_alt*2 + record.num_het) / float((len(record.samples)-record.num_unknown)*2) < 0.05:
continue
if record.QUAL < 30:
continue
vcf_writer.write_record(record)
vcf_output.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment