Skip to content

Instantly share code, notes, and snippets.

@arundurvasula
Created April 26, 2016 09:21
Show Gist options
  • Save arundurvasula/3e3241ab4ae8877781654a4af4f86d4f to your computer and use it in GitHub Desktop.
Save arundurvasula/3e3241ab4ae8877781654a4af4f86d4f to your computer and use it in GitHub Desktop.
import gzip
import csv
import argparse
import sys
parser = argparse.ArgumentParser(description="A script that makes a haploid vcf diploid. Expects a whole genome VCF.")
parser.add_argument("-v", "--vcf", action="store", required=True, help="Input VCF file. Should be a multisample, whole genome VCF from Shore with haploid calls.")
parser.add_argument("-o", "--out", action="store", required=True, help="Output filename")
parser.add_argument("-g", "--gzip", action="store_true", required=False, help="Set if the VCF is gzipped.")
args = parser.parse_args()
vcf_in = args.vcf
out_name = args.out
if args.gzip:
opener = gzip.open
else:
opener = open
with opener(vcf_in, 'r') as tsvin:
tsvin = csv.reader(tsvin, delimiter='\t')
vcf_out = csv.writer(open(out_name, 'w'), delimiter='\t', lineterminator="\n")
for row in tsvin:
if any('##' in strings for strings in row):
continue
if any('#CHROM' in strings for strings in row):
vcf_out.writerow(["##fileformat=VCFv4.0"])
vcf_out.writerow(row)
continue
chrom,pos,id,ref,alt,qual,filter,info,format=row[0:9]
if chrom=='23':
chrom='X'
# elif chrom=='24':
# chrom='Y'
else:
continue
out = [chrom,pos,id,ref,alt,qual,filter,info,format] + row[9:]
vcf_out.writerow(out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment