Skip to content

Instantly share code, notes, and snippets.

@raonyguimaraes
Last active February 12, 2018 18:45
Show Gist options
  • Save raonyguimaraes/37a58d8f43053dff857ed4bcff031088 to your computer and use it in GitHub Desktop.
Save raonyguimaraes/37a58d8f43053dff857ed4bcff031088 to your computer and use it in GitHub Desktop.
from subprocess import call, run
from multiprocessing import Pool
command = 'wget -r -l 1 ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/'
run(command, shell=True)
path = '/storage3/1000genomes/ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502'
ind_file = open('{}/individuals.txt'.format(path))
individuals = []
for line in ind_file:
individuals.append(line.strip())
lista = list(range(1,23))
vcfs = {}
for i in lista:
# print(i)
vcf = '{}/ALL.chr{}.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz'.format(path, i)
vcfs[i] = vcf
vcfs['X'] = '{}/ALL.chrX.phase3_shapeit2_mvncall_integrated_v1b.20130502.genotypes.vcf.gz'.format(path)
vcfs['Y'] = '{}/ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf.gz'.format(path)
vcfs['MT'] = '{}/ALL.chrMT.phase3_callmom-v0_4.20130502.genotypes.vcf.gz'.format(path)
commands = []
for individual in individuals:
for key in vcfs:
command = 'bcftools view -s {} -O z -o samples/{}.{}.vcf.gz {}'.format(individual, individual, key, vcfs[key])
commands.append(command)
def extract_samples(command):
run(command, shell=True)
with Pool(8) as p:
print(p.map(extract_samples, commands))
def concat_sample_vcfs(command):
print(command)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment