Skip to content

Instantly share code, notes, and snippets.

@lorenapr92
Last active April 12, 2018 16:40
Show Gist options
  • Save lorenapr92/a0adb0ad326a006f77ee5a1403be04bf to your computer and use it in GitHub Desktop.
Save lorenapr92/a0adb0ad326a006f77ee5a1403be04bf to your computer and use it in GitHub Desktop.
#This will parse out the SNPs from chr 7 CNTNAP2 gene into a vcf file, from phase 3 1000 genomes project, all subjects genome build 37
#Jennifer Owen
#Stephanie Perez Robles
#Lisa Dimusto
import sys
infile = 'ALL.chr7.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf'
class ParseV:
def __init__(self,infile,List=None,samplestuff=None):
self.infile = infile
self.List = []
self.samplestuff=[] # creates a new empty list, think this addresses the problem from last time where the mistaken use of class variable with a mutable data type?
with open ('ALL.chr7.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf','r') as file:
for line in file.readlines():
line=line.rstrip()
if line.startswith('##'):
continue
elif line.startswith('#'):
parts= line.split('\t')
self.samplestuff= parts[:]
#print(samplestuff)
else:
if line.startswith('7'):
P=line.split()
POS=P[1]
if '146116035' <= POS <= '148420998':
ID = P[2]
CHROM = P[0]
POSITION= P[1]
Ref = P[3]
ALT = P[4]
QUAL= P[5]
FILTER= P[6]
INFO= P[7]
FORMAT= P[8]
FIRST=P[9:]
self.List.append([CHROM,ID,POSITION,Ref,ALT,QUAL,FILTER,INFO,FORMAT,FIRST])
#print(self.List)
#vcf.close # close file
#return List
def outfile(self): # need to work on this part
out = open('chr7CATNAP.vcf','w')
out.write(str(self.samplestuff) +'\n'+ str(self.List))
#for i in self.List:
# out.write(str(i[0]) + "\t" + str(i[1]) + "\t" + str(i[2]) + "\t" + str(i[3]) + "\t" + str(i[4]) + "\t" + str(i[5]) + "\t" + str(i[6]) + "\t" + str(i[7]) + "\t" + str(i[8]) + "\n")
out.close # close file
tired = ParseV(infile)
tired.outfile()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment