Skip to content

Instantly share code, notes, and snippets.

@chapmanb
Created November 16, 2010 20:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chapmanb/702489 to your computer and use it in GitHub Desktop.
Save chapmanb/702489 to your computer and use it in GitHub Desktop.
"""Retrieve sequences from a MAF file for a genomic coordinate.
Usage:
maf_retrieve_region.py <maf file> <organism> <chromosome> <start> <end>
"""
import os
import sys
import subprocess
from bx.align import maf
def main(in_file, org, chrom, start, end):
index_file = in_file + ".index"
if not os.path.exists(index_file):
build_index(in_file)
region_name = "%s.%s" % (org, chrom)
start = int(start)
end = int(end)
index = maf.Indexed(in_file, index_file)
for align in index.get(region_name, start, end):
region_align = align.slice_by_component(region_name, start, end)
seqs_by_org = dict()
for component in region_align.components:
seqs_by_org[component.src] = component.text
print seqs_by_org
def build_index(in_file):
cl = ["maf_build_index.py", in_file]
subprocess.check_call(cl)
if __name__ == "__main__":
main(*sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment