uludag/read-hg19-regions.py

## read-hg19-regions.py
#!/usr/bin/python3

# Example script that queries UCSC DAS servers to retrieve 50 bases of sequence regions
# specified in an input file (format: chr5 + 12345)

import csv
from urllib.request import FancyURLopener

urlOpener = FancyURLopener()


# TODO: how we specify strand in DAS URL queries?
def retrieveandsave(i, chrm, start, strand):
    if(strand == '+'):
        url = "http://genome.ucsc.edu/cgi-bin/das/hg19/dna?segment="
        end = int(start) + 49
        curl = url + chrm + ":" + start + "," + str(end)
        # print(curl)
        print(">hg19 " + chrm + ":" + start + "-" + str(end))
        with urlOpener.open(curl) as dasxml:
            i = 0
            for line in dasxml:
                i += 1
                if i == 6:
                    print("%s" % line.decode('utf-8').strip())

    return None


def read_hg19_coords(infile):
    print("Reading %s " % infile)
    i = 0
    with open(infile, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=' ')
        for row in reader:
            i += 1
            chrm = str(row[0]).strip()
            # input file we had mixed delimiters
            start = row[1].split('\t')[1].strip()
            strand = row[1].split('\t')[0].strip()
            retrieveandsave(i, chrm, start, strand)

    print("-- %d hg19 coordinate lines processed" % i)

    return None


def main(*args_):
    opts = set(args_)

    read_hg19_coords(opts.pop())


args = (
    "./hg19-regions-test.csv"
)

main(args)
	#!/usr/bin/python3

	# Example script that queries UCSC DAS servers to retrieve 50 bases of sequence regions
	# specified in an input file (format: chr5 + 12345)

	import csv
	from urllib.request import FancyURLopener

	urlOpener = FancyURLopener()


	# TODO: how we specify strand in DAS URL queries?
	def retrieveandsave(i, chrm, start, strand):
	if(strand == '+'):
	url = "http://genome.ucsc.edu/cgi-bin/das/hg19/dna?segment="
	end = int(start) + 49
	curl = url + chrm + ":" + start + "," + str(end)
	# print(curl)
	print(">hg19 " + chrm + ":" + start + "-" + str(end))
	with urlOpener.open(curl) as dasxml:
	i = 0
	for line in dasxml:
	i += 1
	if i == 6:
	print("%s" % line.decode('utf-8').strip())

	return None


	def read_hg19_coords(infile):
	print("Reading %s " % infile)
	i = 0
	with open(infile, newline='') as csvfile:
	reader = csv.reader(csvfile, delimiter=' ')
	for row in reader:
	i += 1
	chrm = str(row[0]).strip()
	# input file we had mixed delimiters
	start = row[1].split('\t')[1].strip()
	strand = row[1].split('\t')[0].strip()
	retrieveandsave(i, chrm, start, strand)

	print("-- %d hg19 coordinate lines processed" % i)

	return None


	def main(*args_):
	opts = set(args_)

	read_hg19_coords(opts.pop())


	args = (
	"./hg19-regions-test.csv"
	)

	main(args)