Skip to content

Instantly share code, notes, and snippets.

@raonyguimaraes
Created September 12, 2017 12:50
Show Gist options
  • Save raonyguimaraes/796fd7ee0c4c541ace3e1ec40dc71361 to your computer and use it in GitHub Desktop.
Save raonyguimaraes/796fd7ee0c4c541ace3e1ec40dc71361 to your computer and use it in GitHub Desktop.
Parallel VCF Validation
from subprocess import run
from multiprocessing import Pool
regions = ['1:1-50000000', '1:50000001-100000000', '1:100000001-150000000', '1:150000001-200000000', '1:200000001-250000000', '2:1-50000000', '2:50000001-100000000', '2:100000001-150000000', '2:150000001-200000000', '2:200000001-250000000', '3:1-50000000', '3:50000001-100000000', '3:100000001-150000000', '3:150000001-200000000', '4:1-50000000', '4:50000001-100000000', '4:100000001-150000000', '4:150000001-200000000', '5:1-50000000', '5:50000001-100000000', '5:100000001-150000000', '5:150000001-200000000', '6:1-50000000', '6:50000001-100000000', '6:100000001-150000000', '6:150000001-200000000', '7:1-50000000', '7:50000001-100000000', '7:100000001-150000000', '7:150000001-200000000', '8:1-50000000', '8:50000001-100000000', '8:100000001-150000000', '9:1-50000000', '9:50000001-100000000', '9:100000001-150000000', '10:1-50000000', '10:50000001-100000000', '10:100000001-150000000', '11:1-50000000', '11:50000001-100000000', '11:100000001-150000000', '12:1-50000000', '12:50000001-100000000', '12:100000001-150000000', '13:1-50000000', '13:50000001-100000000', '13:100000001-150000000', '14:1-50000000', '14:50000001-100000000', '14:100000001-150000000', '15:1-50000000', '15:50000001-100000000', '15:100000001-150000000', '16:1-50000000', '16:50000001-100000000', '17:1-50000000', '17:50000001-100000000', '18:1-50000000', '18:50000001-100000000', '19:1-50000000', '19:50000001-100000000', '20:1-50000000', '20:50000001-100000000', '21:1-50000000', '22:1-50000000', '22:50000001-100000000', 'X:1-50000000', 'X:50000001-100000000', 'X:100000001-150000000', 'X:150000001-200000000', 'Y:1-50000000', 'Y:50000001-100000000', 'MT:1-50000000', 'GL000207.1:1-50000000', 'GL000226.1:1-50000000', 'GL000229.1:1-50000000', 'GL000231.1:1-50000000', 'GL000210.1:1-50000000', 'GL000239.1:1-50000000', 'GL000235.1:1-50000000', 'GL000201.1:1-50000000', 'GL000247.1:1-50000000', 'GL000245.1:1-50000000', 'GL000197.1:1-50000000', 'GL000203.1:1-50000000', 'GL000246.1:1-50000000', 'GL000249.1:1-50000000', 'GL000196.1:1-50000000', 'GL000248.1:1-50000000', 'GL000244.1:1-50000000', 'GL000238.1:1-50000000', 'GL000202.1:1-50000000', 'GL000234.1:1-50000000', 'GL000232.1:1-50000000', 'GL000206.1:1-50000000', 'GL000240.1:1-50000000', 'GL000236.1:1-50000000', 'GL000241.1:1-50000000', 'GL000243.1:1-50000000', 'GL000242.1:1-50000000', 'GL000230.1:1-50000000', 'GL000237.1:1-50000000', 'GL000233.1:1-50000000', 'GL000204.1:1-50000000', 'GL000198.1:1-50000000', 'GL000208.1:1-50000000', 'GL000191.1:1-50000000', 'GL000227.1:1-50000000', 'GL000228.1:1-50000000', 'GL000214.1:1-50000000', 'GL000221.1:1-50000000', 'GL000209.1:1-50000000', 'GL000218.1:1-50000000', 'GL000220.1:1-50000000', 'GL000213.1:1-50000000', 'GL000211.1:1-50000000', 'GL000199.1:1-50000000', 'GL000217.1:1-50000000', 'GL000216.1:1-50000000', 'GL000215.1:1-50000000', 'GL000205.1:1-50000000', 'GL000219.1:1-50000000', 'GL000224.1:1-50000000', 'GL000223.1:1-50000000', 'GL000195.1:1-50000000', 'GL000212.1:1-50000000', 'GL000222.1:1-50000000', 'GL000200.1:1-50000000', 'GL000193.1:1-50000000', 'GL000194.1:1-50000000', 'GL000225.1:1-50000000', 'GL000192.1:1-50000000', 'NC_007605:1-50000000']
def validate(region):
vcf = 'sample.vcf.gz'
command = 'bcftools view %s %s | vcf-validator > %s.validation.txt' % (vcf, region, region)
run(command, shell=True)
if __name__ == '__main__':
with Pool(4) as p:
print(p.map(validate, regions))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment