Skip to content

Instantly share code, notes, and snippets.

@brianhill11
Last active August 24, 2018 16:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brianhill11/600df79015a1ff0964a2c820f6e276fb to your computer and use it in GitHub Desktop.
Save brianhill11/600df79015a1ff0964a2c820f6e276fb to your computer and use it in GitHub Desktop.
Finds haploid genotype calls and turns them into homozygous diploid calls
#/usr/bin/python
from pysam import VariantFile
import sys
import os
import argparse
parser = argparse.ArgumentParser()
bcf_in = VariantFile(sys.argv[1], 'r')
bcf_out = VariantFile(sys.argv[2], 'w', header=bcf_in.header)
samples = bcf_in.header.samples
chroms_to_fix = ['X', 'Y', 'M', 'MT', 'chrX', 'chrY', 'chrM', 'chrMT']
num_records_fixed = 0
for record in bcf_in.fetch():
if record.chrom in chroms_to_fix:
for sample in samples:
# get GT value
GT = record.samples[sample]['GT']
# if we have haploid value (i.e. GT doesn't look like X/X)
if len(GT) < 2:
# turn to diploid value
record.samples[sample]['GT'] = (GT[0], GT[0])
#print record.samples[sample]['GT']
num_records_fixed += 1
bcf_out.write(record)
else:
bcf_out.write(record)
bcf_in.close()
bcf_out.close()
print "Fixed", num_records_fixed, "records"
exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment