Skip to content

Instantly share code, notes, and snippets.

@0x3bfc
Created January 14, 2016 15:47
Show Gist options
  • Save 0x3bfc/4257ea181c155d7e489b to your computer and use it in GitHub Desktop.
Save 0x3bfc/4257ea181c155d7e489b to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import os
import sys
import subprocess
"""
This method is inspired from this issue on stackoverflow
http://stackoverflow.com/questions/23091077/get-all-nested-directories-in-a-folder-python
"""
def list_dirs(path):
seen = set()
for root, dirs, files, in os.walk(path, topdown=False):
if dirs:
parent = root
while parent:
seen.add(parent)
parent = os.path.dirname(parent)
for d in dirs:
d = os.path.join(root,d)
if d not in seen:
yield d
# execute command lines
def execute(command, option=None):
pipe = subprocess.PIPE
p = subprocess.Popen(command,stdout=pipe,stderr=pipe,shell=True)
if option == 'wait':
p.wait()
return p.stdout.read()+"\n"+p.stderr.read()
# Merge all vcf files into one file
def vcf_merge(base_dir, output):
cur_dir = os.path.dirname(os.path.abspath(__file__))
execute("mkdir -p %s/merge_dir/"%(cur_dir))
i = 0
for cdir in list_dirs(base_dir):
lst = os.listdir('./%s'%(cdir))
i +=1
for filename in lst:
if filename.endswith('.vcf'):
execute('cd ./%s && bgzip %s && tabix -p vcf %s.gz && cp %s.gz %s/merge_dir/%s-%s.gz && cp %s.gz.tbi %s/merge_dir/%s-%s.gz.tbi'%(cdir, filename, filename, filename, cur_dir,i,filename, filename, cur_dir, i, filename))
execute("cd %s"%(cur_dir))
# merge all vcf files into one large vcf file
vcf_files = os.listdir("%s/merge_dir/"%(cur_dir))
vcfs = [vcf for vcf in vcf_files if vcf.endswith('.gz')]
vcfs = ' '.join(vcfs)
execute("cd %s/merge_dir/ && vcf-merge %s > %s"%(cur_dir, vcfs, output))
execute("cd %s/merge_dir/ && bgzip %s && tabix -p vcf %s.gz && mv %s.gz ../ && mv %s.gz.tbi ../" %(cur_dir, output, output, output, output))
# remove tmp merge directory
execute("rm -rf %s/merge_dir/"%(cur_dir))
def help():
return """
Note:
- Make sure that you have installed vcftools
$ sudo apt-get install vcftools
- Please note that you have to move your current directory
to the directory of vcf files
Usage:
./vcf-merger . <output file>
"""
if __name__ == "__main__":
args = sys.argv
output = "merged.vcf"
base_dir = os.path.dirname(os.path.abspath(__file__))
if len(args) < 2:
print help()
sys.exit(0)
else:
try:
base_dir = args[1]
output = args[2]
# call vcf merge
vcf_merge(base_dir, output)
except:
print help()
sys.exit(-1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment