Last active
August 29, 2015 14:03
-
-
Save danielecook/332fe0103f745206bc50 to your computer and use it in GitHub Desktop.
A lightweight wrapper for bcftools written in python (a work in progress)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, subprocess, uuid, re | |
import vcf.filters | |
class bcf(file): | |
def __init__(self, file): | |
# Start by storing basic information about the vcf/bcf | |
self.file = file | |
self.ops = [] | |
self.header = subprocess.Popen("bcftools view -h %s" % self.file, shell=True, stdout=subprocess.PIPE).communicate()[0] | |
print self.header | |
# Samples | |
self.samples = filter(len,subprocess.Popen("bcftools query -l %s" % self.file, shell=True, stdout=subprocess.PIPE).communicate()[0].split("\n")) | |
# Meta Data | |
self.metadata = re.compile(r'''^##(?P<key>[^<#]+?)=(?P<val>[^<#]+$)''', re.M).findall(self.header) | |
# Contigs | |
self.contigs = [x.split(",") for x in re.compile(r'''^##contig=<(?P<data>.*)>''', re.M).findall(self.header)] | |
self.contigs = [{x.split("=")[0]:x.split("=")[1] for x in f} for f in self.contigs] | |
# Thanks to pyVCF for these patterns. | |
# Info | |
self.info_set = re.compile(r'''\#\#INFO=< | |
ID=(?P<id>[^,]+), | |
Number=(?P<number>-?\d+|\.|[AG]), | |
Type=(?P<type>Integer|Float|Flag|Character|String), | |
Description="(?P<desc>[^"]*)" | |
>''', re.VERBOSE).findall(self.header) | |
# Filter | |
self.filter_set = re.compile(r'''\#\#FILTER=< | |
ID=(?P<id>[^,]+), | |
Description="(?P<desc>[^"]*)" | |
>''', re.VERBOSE).findall(self.header) | |
self.format_pattern = re.compile(r'''\#\#FORMAT=< | |
ID=(?P<id>.+), | |
Number=(?P<number>-?\d+|\.|[AG]), | |
Type=(?P<type>.+), | |
Description="(?P<desc>.*)" | |
>''', re.VERBOSE).findall(self.header) | |
# Parse Header | |
def filename(self): | |
return self.file | |
def meta(self,term=None): | |
# Return all meta-data; term can be used to get a specific term | |
if term == None: | |
return self.metadata | |
else: | |
return [x[1] for x in self.metadata if x[0] == term][0] | |
def region(self,chrom,start,end): | |
print "bcftools filter -H -r %s:%s-%s %s" % (chrom, start, end, self.file) | |
self.ops += ["bcftools view -r %s:%s-%s %s" % (chrom, start, end, self.file)] | |
return self | |
def include(self,depth): | |
self.ops += ["bcftools filter --include 'DP<%s'" % (depth)] | |
return self | |
def out(self): | |
print self.ops | |
print ' | '.join(self.ops) | |
return (len,subprocess.Popen(' | '.join(self.ops + ["bcftools view -H"]), shell=True, stdout=subprocess.PIPE).communicate()[0].split("\n")) | |
x = bcf("vcf/mmp.vcf.gz") | |
print dir(x) | |
print x.meta(term="fileformat") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment