Last active
August 24, 2018 16:27
-
-
Save brianhill11/600df79015a1ff0964a2c820f6e276fb to your computer and use it in GitHub Desktop.
Finds haploid genotype calls and turns them into homozygous diploid calls
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/python | |
from pysam import VariantFile | |
import sys | |
import os | |
import argparse | |
parser = argparse.ArgumentParser() | |
bcf_in = VariantFile(sys.argv[1], 'r') | |
bcf_out = VariantFile(sys.argv[2], 'w', header=bcf_in.header) | |
samples = bcf_in.header.samples | |
chroms_to_fix = ['X', 'Y', 'M', 'MT', 'chrX', 'chrY', 'chrM', 'chrMT'] | |
num_records_fixed = 0 | |
for record in bcf_in.fetch(): | |
if record.chrom in chroms_to_fix: | |
for sample in samples: | |
# get GT value | |
GT = record.samples[sample]['GT'] | |
# if we have haploid value (i.e. GT doesn't look like X/X) | |
if len(GT) < 2: | |
# turn to diploid value | |
record.samples[sample]['GT'] = (GT[0], GT[0]) | |
#print record.samples[sample]['GT'] | |
num_records_fixed += 1 | |
bcf_out.write(record) | |
else: | |
bcf_out.write(record) | |
bcf_in.close() | |
bcf_out.close() | |
print "Fixed", num_records_fixed, "records" | |
exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment