Skip to content

Instantly share code, notes, and snippets.

@uludag
Last active August 1, 2016 06:28
Show Gist options
  • Save uludag/18b883ee9834601752a4a739f26f70a3 to your computer and use it in GitHub Desktop.
Save uludag/18b883ee9834601752a4a739f26f70a3 to your computer and use it in GitHub Desktop.
Example script that queries UCSC DAS servers to retrieve 50 bases of sequence regions specified in an input file
#!/usr/bin/python3
# Example script that queries UCSC DAS servers to retrieve 50 bases of sequence regions
# specified in an input file (format: chr5 + 12345)
import csv
from urllib.request import FancyURLopener
urlOpener = FancyURLopener()
# TODO: how we specify strand in DAS URL queries?
def retrieveandsave(i, chrm, start, strand):
if(strand == '+'):
url = "http://genome.ucsc.edu/cgi-bin/das/hg19/dna?segment="
end = int(start) + 49
curl = url + chrm + ":" + start + "," + str(end)
# print(curl)
print(">hg19 " + chrm + ":" + start + "-" + str(end))
with urlOpener.open(curl) as dasxml:
i = 0
for line in dasxml:
i += 1
if i == 6:
print("%s" % line.decode('utf-8').strip())
return None
def read_hg19_coords(infile):
print("Reading %s " % infile)
i = 0
with open(infile, newline='') as csvfile:
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
i += 1
chrm = str(row[0]).strip()
# input file we had mixed delimiters
start = row[1].split('\t')[1].strip()
strand = row[1].split('\t')[0].strip()
retrieveandsave(i, chrm, start, strand)
print("-- %d hg19 coordinate lines processed" % i)
return None
def main(*args_):
opts = set(args_)
read_hg19_coords(opts.pop())
args = (
"./hg19-regions-test.csv"
)
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment