Skip to content

Instantly share code, notes, and snippets.

@danielecook
Last active August 29, 2015 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danielecook/332fe0103f745206bc50 to your computer and use it in GitHub Desktop.
Save danielecook/332fe0103f745206bc50 to your computer and use it in GitHub Desktop.
A lightweight wrapper for bcftools written in python (a work in progress)
import os, subprocess, uuid, re
import vcf.filters
class bcf(file):
def __init__(self, file):
# Start by storing basic information about the vcf/bcf
self.file = file
self.ops = []
self.header = subprocess.Popen("bcftools view -h %s" % self.file, shell=True, stdout=subprocess.PIPE).communicate()[0]
print self.header
# Samples
self.samples = filter(len,subprocess.Popen("bcftools query -l %s" % self.file, shell=True, stdout=subprocess.PIPE).communicate()[0].split("\n"))
# Meta Data
self.metadata = re.compile(r'''^##(?P<key>[^<#]+?)=(?P<val>[^<#]+$)''', re.M).findall(self.header)
# Contigs
self.contigs = [x.split(",") for x in re.compile(r'''^##contig=<(?P<data>.*)>''', re.M).findall(self.header)]
self.contigs = [{x.split("=")[0]:x.split("=")[1] for x in f} for f in self.contigs]
# Thanks to pyVCF for these patterns.
# Info
self.info_set = re.compile(r'''\#\#INFO=<
ID=(?P<id>[^,]+),
Number=(?P<number>-?\d+|\.|[AG]),
Type=(?P<type>Integer|Float|Flag|Character|String),
Description="(?P<desc>[^"]*)"
>''', re.VERBOSE).findall(self.header)
# Filter
self.filter_set = re.compile(r'''\#\#FILTER=<
ID=(?P<id>[^,]+),
Description="(?P<desc>[^"]*)"
>''', re.VERBOSE).findall(self.header)
self.format_pattern = re.compile(r'''\#\#FORMAT=<
ID=(?P<id>.+),
Number=(?P<number>-?\d+|\.|[AG]),
Type=(?P<type>.+),
Description="(?P<desc>.*)"
>''', re.VERBOSE).findall(self.header)
# Parse Header
def filename(self):
return self.file
def meta(self,term=None):
# Return all meta-data; term can be used to get a specific term
if term == None:
return self.metadata
else:
return [x[1] for x in self.metadata if x[0] == term][0]
def region(self,chrom,start,end):
print "bcftools filter -H -r %s:%s-%s %s" % (chrom, start, end, self.file)
self.ops += ["bcftools view -r %s:%s-%s %s" % (chrom, start, end, self.file)]
return self
def include(self,depth):
self.ops += ["bcftools filter --include 'DP<%s'" % (depth)]
return self
def out(self):
print self.ops
print ' | '.join(self.ops)
return (len,subprocess.Popen(' | '.join(self.ops + ["bcftools view -H"]), shell=True, stdout=subprocess.PIPE).communicate()[0].split("\n"))
x = bcf("vcf/mmp.vcf.gz")
print dir(x)
print x.meta(term="fileformat")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment