Skip to content

Instantly share code, notes, and snippets.

@lorenapr92
Created April 3, 2018 15:13
Show Gist options
  • Save lorenapr92/ce695fe80127b4b4a40a6d4521099571 to your computer and use it in GitHub Desktop.
Save lorenapr92/ce695fe80127b4b4a40a6d4521099571 to your computer and use it in GitHub Desktop.
Parsing python code for vcf files. We can use this to parse vcf file based on their POS for specific genes
import sys
infile = 'chr7first300.vcf'
class ParseV:
def __init__(self,infile,List=None,samplestuff=None):
self.infile = infile
self.List = []
self.samplestuff=[] # creates a new empty list, think this addresses the problem from last time where the mistaken use of class variable with a mutable data type?
with open ('chr7first300.vcf','r') as file:
for line in file.readlines():
line=line.rstrip()
if line.startswith('##'):
continue
elif line.startswith('#'):
parts= line.split('\t')
self.samplestuff= parts[:]
#print(samplestuff)
else:
if line.startswith('7'):
P=line.split()
POS=P[1]
if '16692' <= POS <= '16742':
ID = P[2]
CHROM = P[0]
POSITION= P[1]
Ref = P[3]
ALT = P[4]
QUAL= P[5]
FILTER= P[6]
INFO= P[7]
FORMAT= P[8]
FIRST=P[9:]
self.List.append([CHROM,ID,POSITION,Ref,ALT,QUAL,FILTER,INFO,FORMAT,FIRST])
#print(self.List)
#vcf.close # close file
#return List
def outfile(self): # need to work on this part
out = open('chr7foxp2.out','w')
out.write(str(self.samplestuff) +'\n'+ str(self.List))
#for i in self.List:
# out.write(str(i[0]) + "\t" + str(i[1]) + "\t" + str(i[2]) + "\t" + str(i[3]) + "\t" + str(i[4]) + "\t" + str(i[5]) + "\t" + str(i[6]) + "\t" + str(i[7]) + "\t" + str(i[8]) + "\n")
out.close # close file
tired = ParseV(infile)
tired.outfile()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment