Last active
February 12, 2018 18:45
-
-
Save raonyguimaraes/37a58d8f43053dff857ed4bcff031088 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from subprocess import call, run | |
from multiprocessing import Pool | |
command = 'wget -r -l 1 ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/' | |
run(command, shell=True) | |
path = '/storage3/1000genomes/ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502' | |
ind_file = open('{}/individuals.txt'.format(path)) | |
individuals = [] | |
for line in ind_file: | |
individuals.append(line.strip()) | |
lista = list(range(1,23)) | |
vcfs = {} | |
for i in lista: | |
# print(i) | |
vcf = '{}/ALL.chr{}.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz'.format(path, i) | |
vcfs[i] = vcf | |
vcfs['X'] = '{}/ALL.chrX.phase3_shapeit2_mvncall_integrated_v1b.20130502.genotypes.vcf.gz'.format(path) | |
vcfs['Y'] = '{}/ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf.gz'.format(path) | |
vcfs['MT'] = '{}/ALL.chrMT.phase3_callmom-v0_4.20130502.genotypes.vcf.gz'.format(path) | |
commands = [] | |
for individual in individuals: | |
for key in vcfs: | |
command = 'bcftools view -s {} -O z -o samples/{}.{}.vcf.gz {}'.format(individual, individual, key, vcfs[key]) | |
commands.append(command) | |
def extract_samples(command): | |
run(command, shell=True) | |
with Pool(8) as p: | |
print(p.map(extract_samples, commands)) | |
def concat_sample_vcfs(command): | |
print(command) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment