seandavi/liftoverData.py

## liftoverData.py
import sys
import os
import subprocess
import argparse
import tempfile
import csv

#liftoverChain = sys.argv[2]
#fname = sys.argv[1]

parser = argparse.ArgumentParser()
parser.add_argument('-l','--liftover',default="liftOver",
                    help="The location of the liftOver executable")
parser.add_argument('-s','--suffix',default="lifted",
                    help="Suffix to add to the original filename when making lifted-over file")
parser.add_argument("-d","--dyeswap",action='store_true',default=False)
parser.add_argument('liftoverfile',
                    help="Name of the unzipped liftover chain file")
parser.add_argument('fefile',
                    help="Name of the Agilent FE file to lift over")

opts = parser.parse_args()

def returnBed(splitLine):
    probeId=splitLine[probeIdCol]
    try:
        tmp1 = splitLine[sysNameCol].split(':')
        tmp2 = [int(x) for x in tmp1[1].split('-')]
        return((tmp1[0],tmp2[0],tmp2[1],probeId))
    except IndexError:
        return None

linedict = {}
headerlines = []

liftoverinfile = tempfile.NamedTemporaryFile(delete=False)
sysNameCol=None
probeIdCol=None
logRatioCol=None
with open(opts.fefile,'r') as f:
    for skip in range(10):
        headerlines.append(f.next())
    lastheader=headerlines[-1].strip().split("\t")
    for i in range(len(lastheader)):
        if(lastheader[i] == 'SystematicName'):
            sysNameCol=i
        if(lastheader[i] == 'ProbeUID'):
            probeIdCol=i
        if(lastheader[i] == 'LogRatio'):
            logRatioCol=i
    with open(liftoverinfile.name,'w') as liftin:
        for line in f:
            if(line.startswith("DATA")):
                sline = line.strip().split("\t")
                if(opts.dyeswap):
                    sline[logRatioCol]=str(-1*float(sline[logRatioCol]))
                linedict[sline[probeIdCol]]=sline
                bedline = returnBed(sline)
                if(bedline is not None):
                    liftin.write("\t".join([str(x) for x in bedline]) + "\n")
## Do liftover
liftoveroutfile = tempfile.NamedTemporaryFile(delete=False)
liftoverunmappedfile = tempfile.NamedTemporaryFile(delete=False)
subprocess.call(['liftOver',liftoverinfile.name,opts.liftoverfile,
                 liftoveroutfile.name,liftoverunmappedfile.name])

## Write out new file with lifted-over coordinates (and ignore all lines that
## did not map.
with open(os.path.basename(opts.fefile)+"."+opts.suffix,"w") as outfile:
    outfile.writelines(headerlines)
    with open(liftoveroutfile.name) as liftout:
        for row in liftout:
            row=row.strip().split("\t")
            lineinfo = linedict[row[3]]
            lineinfo[probeIdCol]="%s:%s-%s" % (tuple(row[0:3]))
            outfile.write("\t".join(lineinfo) + "\n")
os.unlink(liftoverinfile.name)
os.unlink(liftoveroutfile.name)
os.unlink(liftoverunmappedfile.name)
	import sys
	import os
	import subprocess
	import argparse
	import tempfile
	import csv

	#liftoverChain = sys.argv[2]
	#fname = sys.argv[1]

	parser = argparse.ArgumentParser()
	parser.add_argument('-l','--liftover',default="liftOver",
	help="The location of the liftOver executable")
	parser.add_argument('-s','--suffix',default="lifted",
	help="Suffix to add to the original filename when making lifted-over file")
	parser.add_argument("-d","--dyeswap",action='store_true',default=False)
	parser.add_argument('liftoverfile',
	help="Name of the unzipped liftover chain file")
	parser.add_argument('fefile',
	help="Name of the Agilent FE file to lift over")

	opts = parser.parse_args()

	def returnBed(splitLine):
	probeId=splitLine[probeIdCol]
	try:
	tmp1 = splitLine[sysNameCol].split(':')
	tmp2 = [int(x) for x in tmp1[1].split('-')]
	return((tmp1[0],tmp2[0],tmp2[1],probeId))
	except IndexError:
	return None

	linedict = {}
	headerlines = []

	liftoverinfile = tempfile.NamedTemporaryFile(delete=False)
	sysNameCol=None
	probeIdCol=None
	logRatioCol=None
	with open(opts.fefile,'r') as f:
	for skip in range(10):
	headerlines.append(f.next())
	lastheader=headerlines[-1].strip().split("\t")
	for i in range(len(lastheader)):
	if(lastheader[i] == 'SystematicName'):
	sysNameCol=i
	if(lastheader[i] == 'ProbeUID'):
	probeIdCol=i
	if(lastheader[i] == 'LogRatio'):
	logRatioCol=i
	with open(liftoverinfile.name,'w') as liftin:
	for line in f:
	if(line.startswith("DATA")):
	sline = line.strip().split("\t")
	if(opts.dyeswap):
	sline[logRatioCol]=str(-1*float(sline[logRatioCol]))
	linedict[sline[probeIdCol]]=sline
	bedline = returnBed(sline)
	if(bedline is not None):
	liftin.write("\t".join([str(x) for x in bedline]) + "\n")
	## Do liftover
	liftoveroutfile = tempfile.NamedTemporaryFile(delete=False)
	liftoverunmappedfile = tempfile.NamedTemporaryFile(delete=False)
	subprocess.call(['liftOver',liftoverinfile.name,opts.liftoverfile,
	liftoveroutfile.name,liftoverunmappedfile.name])

	## Write out new file with lifted-over coordinates (and ignore all lines that
	## did not map.
	with open(os.path.basename(opts.fefile)+"."+opts.suffix,"w") as outfile:
	outfile.writelines(headerlines)
	with open(liftoveroutfile.name) as liftout:
	for row in liftout:
	row=row.strip().split("\t")
	lineinfo = linedict[row[3]]
	lineinfo[probeIdCol]="%s:%s-%s" % (tuple(row[0:3]))
	outfile.write("\t".join(lineinfo) + "\n")
	os.unlink(liftoverinfile.name)
	os.unlink(liftoveroutfile.name)
	os.unlink(liftoverunmappedfile.name)