lennax/CoordinateMapper.py

## CoordinateMapper.py
# CoordinateMapper -- map between genomic, cds, and protein coordinates
# AUTHOR: Reece Hart <reecehart@gmail.com>
# Modifications: Lenna X. Peterson <arklenna@gmail.com>
# LICENSE: BioPython

# Examples:
# AB026906.1:g.7872G>T
# AB026906.1:c.274G>T
# BA...:p.Asp92Tyr
#
# All refer to congruent variants. A coordinate mapper is needed to
# translate between at least these three coordinate frames.  The mapper
# should deal with specialized syntax for splicing and UTR (e.g., 88+1,
# 89-2, -14, *46). In addition, care should be taken to ensure consistent 0-
# or 1-based numbering (0 internally, as with Python/BioPython and
# Perl/BioPerl).
#
# g   -----------00000000000-----1111111----------22222222222*222-----
#                s0        e0    s1    e1         s2            e2
#                \         \     |     |          /             /
#                 +--+      +--+ |     | +-------+     +-------+
#                     \         \|     |/             /
# c                   00000000000111111122222222222*222
#                               c0     c1             c2
#                     aaabbbcccdddeeefffggghhhiiijj*kkk
# p                     A  B  C  D  E  F  G  H  I  J  K
#                       p0 p1 p2 ...                  pn
#
#
# TODO:
# * g2c returns index + extended syntax
# * c2g accepts index + extended syntax

from functools import wraps
from math import floor
import warnings

import MapPositions  # required for pos_factory decorator
from MapPositions import CDSPositionError, ProteinPositionError
from MapPositions import GenomePosition, CDSPosition, ProteinPosition

# FIXME change to absolute import once CompoundLocation is merged to master
import SeqFeature
from SeqFeature import FeatureLocation


class CoordinateMapper(object):

    def __init__(self, selist=None, seqrecord=None):
        # Note: seqrecord parameter will take SeqRecord or SeqFeature
        if seqrecord is not None:
            self._exons = self._CDS_from_Seq(seqrecord)
        else:
            self._exons = self._exons_from_list(selist)

    @staticmethod
    def _CDS_from_Seq(seq):
        if hasattr(seq, 'features'):
            # generator
            cdsf = (f for f in seq.features if f.type == 'CDS').next()
            return cdsf.location
        if hasattr(seq, 'location'):
            if seq.type != 'CDS':
                # XXX should this be a fatal error?
                warnings.warn("Provided SeqFeature should be CDS",
                              RuntimeWarning)
            return seq.location

    @staticmethod
    def _exons_from_list(selist):
        return sum([FeatureLocation(*pair) for pair in selist])

    @property  # read-only
    def exons(self):
        return self._exons

    @property  # read-only
    def exon_list(self):
        return list(self.exons)

    def pos_factory(pos_type):
        """
        Convert string or int pos to appropriate Position object
        Argument to decorator: position type (Genome, CDS, Protein)
        """
        def wrapper(fn):
            @wraps(fn)
            def make_pos(self, pos, dialect=None):
                # retrieve Position object
                _obj = getattr(MapPositions, pos_type + "Position")
                # if pos is not already Position object, make it one
                if not isinstance(pos, _obj):
                    # no dialect: use default constructor
                    if dialect is None:
                        pos = _obj(pos)
                    # use dialect alternate constructor
                    else:
                        _init = getattr(_obj, "from_%s" % dialect.lower())
                        pos = _init(pos)
                # call function with new pos
                return fn(self, pos, dialect)
            return make_pos
        return wrapper

    @pos_factory("Genome")
    def g2c(self, gpos, dialect=None):
        "Convert integer from genomic to CDS coordinates"

        gpos = int(gpos)
        fmts = CDSPosition.fmt_dict

        def _simple_g2c(g):
            return CDSPosition(self.exon_list.index(g))

        # within exon
        if gpos in self.exons:
            return _simple_g2c(gpos)
        # before CDS
        if gpos < self.exons.start:
            return CDSPosition(fmts['post-CDS'] % (gpos - self.exons.start))
        # after CDS
        if gpos >= self.exons.end:
            return CDSPosition(fmts['pre-CDS'] % (gpos - self.exons.end + 1))
        # intron
        # set start of first intron
        prev_end = self.exons.parts[0].end
        for part in self.exons.parts[1:]:
            # not in this intron
            if gpos > part.start:
                prev_end = part.end
                continue
            len_intron = part.start - prev_end
            # second half (exclusive) of intron
            #       offset     >     middle of intron
            if gpos - prev_end > floor(len_intron / 2.0):
                anchor = _simple_g2c(part.start)
                offset = gpos - part.start
                assert offset < 0
            # first half (inclusive) of intron
            else:
                anchor = _simple_g2c(prev_end - 1)
                offset = gpos - prev_end + 1
                assert offset > 0
            assert self.check_intron(anchor, offset)
            return CDSPosition(fmts['intron'] % (anchor, offset))

        assert False  # function should return for every integer

    # TODO verify that values of offset are sane
    def check_intron(self, anchor, offset):
        """
        Verify that CDS-relative intron position is valid with given exons.
        """
        for exon in self.exons.parts:
            start = int(self.g2c(exon.start))
            if anchor == start:
                if offset > 0:
                    raise CDSPositionError("Invalid intron: offset from exon start must be negative.")
                return True
            end = int(self.g2c(exon.end - 1))
            if anchor == end:
                if offset < 0:
                    raise CDSPositionError("Invalid intron: offset from exon end must be positive.")
                return True
        raise CDSPositionError("Invalid intron: anchor must be start or end of an exon.")

    @pos_factory("CDS")
    def c2g(self, cpos, dialect=None):
        "Convert from CDS to genomic coordinates"

        if cpos.pos_type == "pre-CDS":
            return GenomePosition(self.exons.start + cpos.offset)
        elif cpos.pos_type == "post-CDS":
            return GenomePosition(self.exons.end - 1 + cpos.offset)

        g_anchor = self.exon_list[cpos.anchor]
        if cpos.pos_type == "exon":
            return GenomePosition(g_anchor)
        elif cpos.pos_type == "intron":
            offset = cpos.offset
            if self.check_intron(cpos.anchor, offset):
                return GenomePosition(g_anchor + offset)

        assert False  # all positions should be one of the 4 types

    @pos_factory("CDS")
    def c2p(self, cpos, dialect=None):
        """Convert from CDS to protein coordinates"""
        try:
            cpos = int(cpos)
        except TypeError:
            raise CDSPositionError("'%s' does not correspond to a protein"
                                   % repr(cpos))
        return ProteinPosition(int(cpos / 3.0))

    @pos_factory("Genome")
    def g2p(self, gpos, dialect=None):
        "Convert integer from genomic to protein coordinates"
        return self.c2p(self.g2c(gpos))

    @pos_factory("Protein")
    def p2c(self, ppos, dialect=None):
        "Convert integer from protein coordinate to CDS closed range"
        try:
            ppos = int(ppos)
        except TypeError:
            return None
        if ppos < 0:
            raise ProteinPositionError("'%s' should not be negative")
        # FIXME is CDS guaranteed to have len % 3 == 0?
        first_base = (ppos) * 3
        last_base = first_base + 2
        if last_base > len(self.exons):
            raise ProteinPositionError("'%s' is too large")
        return (CDSPosition(first_base), CDSPosition(last_base))

    @pos_factory("Protein")
    def p2g(self, ppos, dialect=None):
        "Convert integer from protein to genomic coordinates"
        #first, last = self.p2c(ppos)
        #return (self.c2g(first), self.c2g(last))
        return tuple(self.c2g(x) for x in self.p2c(ppos))


if __name__ == '__main__':
    # The following exons are from AB026906.1.
    # test case: g.7872 -> c.274 -> p.92
    # N.B. These are python counting coordinates (0-based)
    exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]

    def test_list(g_range):
        cm = CoordinateMapper(exons)
        for g1 in g_range:
            print g1,
            c1 = cm.g2c(g1)
            print c1,
            p1 = cm.c2p(c1)
            print p1,
            if p1:
                c2 = cm.p2c(p1)[0]
            else:
                c2 = c1
            print ' | ', c2,
            g2 = cm.c2g(c2)
            print g2

        print cm.g2p(7872)
        print cm.p2g(92)

    def test_simple():
        simple_exons = SeqFeature.SeqFeature(sum([FeatureLocation(2, 4), FeatureLocation(8, 11), FeatureLocation(16, 18)]), type="CDS")
        cm = CoordinateMapper(seqrecord=simple_exons)
        print cm.exons
        print list(cm.exons)
        print range(len(cm.exons))
        for i in range(len(cm.exons)):
            print "%3s" % cm.c2g(i),
        print
        for i in xrange(20):
            print "%3d" % i,
        print
        for i in xrange(20):
            print "%3s" % cm.g2c(i),
        print

    r1 = (7870, 7871, 7872, 7873, 7874)
    r2 = (5807, 5808, 5809,
          6871, 6872, 6873, 6874, 6875,
          7766, 7767, 7768, 7769,
          13784, 13785, 13786)
    test_list(r1)
    #test_list(r2)
    print
    test_simple()

## MapPositions.py
from copy import copy
import re


class InvalidPositionError(ValueError):
    "Exception for bad coordinates"


class GenomePositionError(InvalidPositionError):
    "Exception for bad genome coordinates"


class CDSPositionError(InvalidPositionError):
    "Exception for bad CDS coordinates"


class ProteinPositionError(InvalidPositionError):
    "Exception for bad protein coordinates"


class MapPosition(object):
    def __init__(self, pos, index=None, **kwargs):
        self.index = index
        if pos and self.index:
            pos -= index
        self.pos = pos

    # TODO classmethod from, like to

    @classmethod
    def from_hgvs(cls, hgvs_pos, strand=None):
        return cls(hgvs_pos, index=1, strand=strand, post_fmt="*%d")

    @classmethod
    def from_genbank(cls, gbk_pos, strand=None):
        return cls(gbk_pos, index=1, strand=strand)

    def to(self, dialect):
        if dialect is None:
            return self.to_str()
        try:
            to_func = getattr(self, "to_" + dialect.lower())
        except AttributeError:
            raise ValueError("Dialect '%s' not valid" % dialect)
        return to_func()

    def to_hgvs(self):
        if self.pos or self.pos == 0:
            return self.pos + 1
        return None

    def to_genbank(self):
        if self.pos or self.pos == 0:
            return self.pos + 1
        return None

    def to_str(self):
        return self.pos

    def __str__(self):
        return str(self.to_str())

    def __int__(self):
        return self.pos

    def __repr__(self):
        return "%s(%s)" % (self.__class__.__name__, self.to_str())

    def __eq__(self, other):
        if isinstance(other, int):
            return self.pos == other
        return self.pos == other.pos


class GenomePosition(MapPosition):
    def __init__(self, gpos, index=None, strand=None, **kwargs):
        # FIXME if index is string, error may be raised
        if gpos < (index or 0):
            raise GenomePositionError("Genome position cannot be negative.")
        # call superclass constructor
        MapPosition.__init__(self, gpos, index)
        self.strand = strand


class ProteinPosition(MapPosition):
    def __init__(self, ppos, index=None, **kwargs):
        # call superclass constructor
        MapPosition.__init__(self, ppos, index)


class CDSPosition(MapPosition):
    def __init__(self, cpos, index=None, strand=None,
                 pre_fmt=None, post_fmt=None):
        # Dispatch types and return anchor, offset
        if isinstance(cpos, int):
            anchor, offset = self.parse_int(cpos)
        elif isinstance(cpos, str):
            anchor, offset = self.parse_str(cpos, pre_fmt, post_fmt)
        else:
            raise CDSPositionError("'%s' is of unknown type" % cpos)
        # Set instance anchor and offset
        # call superclass constructor
        MapPosition.__init__(self, anchor, index)
        self._offset = offset
        self.validate()

    @classmethod
    def from_anchor(cls, anchor=None, offset=None):
        if not anchor:
            pos = cls("%+d" % offset)
        elif anchor < 0:
            raise CDSPositionError("Anchor cannot be negative.")
        else:
            pos = cls(anchor)
            pos.offset = offset
        return pos

    @property
    def offset(self):
        return self._offset

    @offset.setter
    def offset(self, val):
        "Validate new offset, then update"
        self.validate(offset=val)
        self._offset = val

    @property
    def anchor(self):
        return self.pos

    @anchor.setter
    def anchor(self, val):
        "Validate new anchor, then update pos"
        self.validate(anchor=val)
        self.pos = val

    def validate(self, anchor=None, offset=None):
        "Check whether anchor and offset yield a valid position"
        anchor = anchor or self.anchor
        offset = offset or self.offset
        if offset == 0:
            raise CDSPositionError("Offset may not be 0. For no offset, use None.")
        if not anchor and anchor != 0 and not offset:
            raise CDSPositionError("At least one of pos or offset must be defined")
        if anchor and anchor < 0:
            raise CDSPositionError("CDS anchor may not be negative.")
        return True

    @property
    def pos_type(self):
        "Type of CDS position, dynamically determined from values"
        # inside CDS
        if self.pos or self.pos == 0:
            if not self.offset:
                return "exon"
            return "intron"
        # outside CDS
        elif self.offset > 0:
            return "post-CDS"
        return "pre-CDS"
        assert False  # all integers should return

    @property
    def sub_dict(self):
        if self.pos_type == 'intron':
            return {'pos': self.pos, 'offset': self.offset}
        if self.pos_type == 'exon':
            return {'pos': self.pos}
        if self.pos_type == 'post-CDS' or self.pos_type == 'pre-CDS':
            return {'offset': self.offset}

    fmt_dict = {
        'exon': "%d",
        'intron': "%d%+d",
        'post-CDS': "%+d",
        'pre-CDS': "%+d",
    }

    @staticmethod
    def _shift_index(pos_dict, idx):
        if 'pos' in pos_dict and idx:
            pos_dict['pos'] += idx
        return pos_dict

    def _make_str(self, val_dict=None, fmt_dict=None):
        # set default dicts if parameter dicts are false
        fmt_dict = fmt_dict or self.fmt_dict
        val_dict = val_dict or self.sub_dict
        return fmt_dict[self.pos_type] % tuple(val_dict.values())

    @staticmethod
    def parse_int(cpos):
        "parse int to anchor, offset tuple"
        # treat negative int as offset
        if cpos < 0:
            return (None, cpos)
        # treat positive int as anchor
        return (cpos, None)

    @staticmethod
    def parse_str(cpos, pre_fmt, post_fmt):
        "parse string to anchor, offset tuple"
        delimiters = "\+\-"
        if post_fmt and "*" in post_fmt:
            delimiters += "\*"
        # parenth causes split pattern to be kept
        delim_rx = re.compile("([%s])" % delimiters)
        parsed = delim_rx.split(cpos, 1)
        if len(parsed) == 1:  # may be int
            return CDSPosition.parse_int(int(cpos))
        # 1 split is normally length 2 but delimiter is also kept
        elif len(parsed) != 3:
            raise CDSPositionError("String '%s' not parseable for this position. Please provide int or CDSPosition object." % cpos)
        if parsed[0] == "":
            anchor = None
        else:
            anchor = int(parsed[0])
        if post_fmt and parsed[1] == "*" == post_fmt[0]:
            parsed[1] = "+"
        offset = int("".join(parsed[1:]))
        return (anchor, offset)

    def to_hgvs(self):
        fmt_dict = copy(self.fmt_dict)
        fmt_dict['post-CDS'] = "*%d"
        sub_dict = self._shift_index(self.sub_dict, 1)
        return self._make_str(sub_dict, fmt_dict)

    def to_genbank(self):
        sub_dict = self._shift_index(self.sub_dict, 1)
        return self._make_str(sub_dict)

    def to_str(self):
        return self._make_str()

    def __int__(self):
        if self.pos_type == "exon":
            return MapPosition.__int__(self)
        return NotImplemented

    def __eq__(self, other):
        if isinstance(other, int):
            return MapPosition.__eq__(self, other)
        return MapPosition.__eq__(self, other) and self.offset == other.offset


if __name__ == "__main__":

    #def make_hgvs(foo):
        #return str(foo.to_hgvs())
    #MapPosition.__str__ = make_hgvs

    def print_pos(pos_obj):
        print "object:", pos_obj
        print "repr:", repr(pos_obj)
        print "HGVS:", pos_obj.to_hgvs()
        print

    g = GenomePosition.from_hgvs(6)
    print_pos(g)

    test_g = GenomePosition(5)
    #test_c = CDSPosition("6+1")
    #test_c = CDSPosition.from_hgvs("6+1")
    test_c = CDSPosition.from_hgvs("*1")
    #test_c = CDSPosition(6)
    #test_c = CDSPosition(-1)
    print_pos(test_g)
    print test_c.pos_type
    print_pos(test_c)

## testCoordinateMapper.py
#!/usr/bin/python

from functools import wraps
import unittest

from CoordinateMapper import CoordinateMapper
from MapPositions import GenomePositionError
from MapPositions import ProteinPositionError
from MapPositions import CDSPosition, CDSPositionError
from SeqFeature import FeatureLocation, SeqFeature


exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]
cmap = CoordinateMapper(exons)
g_exons = xrange(7868, 7875)  # len 7
c_exons = xrange(270, 279)  # len 9
p_exons = xrange(90, 93)   # len 3
p_exons_trip = [p for p in p_exons for n in range(3)]  # len 9
c_exon_prs = ((270, 272), (273, 275), (276, 278))  # len 3
g_exon_prs = ((7868, 7870), (7871, 7873), (7874, 7876))  # len 3

g_introns = {None: (5860, 5861, 6308, 6309, 6755, 6756),
             'HGVS': (5861, 5862, 6309, 6310, 6756, 6757)}
c_introns = {None: ('51+1', '51+2', '51+449', '52-448', '52-2', '52-1'),
             'HGVS': ('52+1', '52+2', '52+449', '53-448', '53-2', '53-1')}
c_intron_tups = ((51, 1), (51, 2), (51, 449), (52, -448), (52, -2), (52, -1))

g_outside = {None: (0, 13785, 14000), 'HGVS': (1, 13786, 14001)}
c_outside = {None: ('-5808', '+1', '+216'), 'HGVS': ('-5808', '*1', '*216')}
c_outside_tups = ((None, -5808), (None, 1), (None, 216))


def two_dialects(fn):
    @wraps(fn)
    def call_tests(self):
        orig_dialect = self.dialect
        for dialect in (None, 'HGVS'):
            self.dialect = dialect
            fn(self)
        self.dialect = orig_dialect
    return call_tests


class TestCDSPosition(unittest.TestCase):
    """Test that CDSPosition works properly"""

    def setUp(self):
        self.dialect = None

    @two_dialects
    def testGoodIntron(self):
        """CDSPosition should match good intron values"""
        for c_args, c in zip(c_intron_tups, c_introns[self.dialect]):
            actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
            self.assertEqual(actual, c)

    @two_dialects
    def testGoodOutside(self):
        """CDSPosition should match good outside-CDS values"""
        for c_args, c in zip(c_outside_tups, c_outside[self.dialect]):
            actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
            self.assertEqual(actual, c)

    def testEqual(self):
        """CDSPosition should test equal with same args"""
        for args in c_intron_tups:
            CPos = CDSPosition.from_anchor
            self.assertEqual(CPos(*args), CPos(*args))
        self.assertEqual(str(CPos(*args)), str(CPos(*args)))

    #def testEqualDialects(self):
        #for c_pos in c_outside:
            #self.assertEqual(CDSPosition(c_pos), CDSPosition.from_hgvs(c_pos))
            #self.assertEqual(c_pos, CDSPosition.from_hgvs(c_pos).to_hgvs())

    def testBadAnchor(self):
        """CDSPosition should fail with negative CDS anchor"""
        self.assertRaises(CDSPositionError, CDSPosition.from_anchor, -1, 1)

    def testBadOffset(self):
        """CDSPosition should fail with zero offset"""
        self.assertRaises(CDSPositionError, CDSPosition.from_anchor, None, 0)


class TestCoordinateMapper(unittest.TestCase):
    """Test that CoordinateMapper works properly"""

    def testListInit(self):
        """CoordinateMapper should take list of exon pairs"""
        CoordinateMapper(exons)

    # TODO
    def testSeqRecordInit(self):
        pass

    def testSeqFeatureInit(self):
        """CoordinateMapper should take a CDS SeqFeature"""
        sf = SeqFeature(sum((FeatureLocation(*pr) for pr in exons)),
                        type="CDS")
        CoordinateMapper(seqrecord=sf)

    # FIXME Maybe RuntimeWarning printed to stderr isn't captured?
    #def testNotCDS(self):
        #sf = SeqFeature(sum((FeatureLocation(*pr) for pr in exons)))
        #self.assertRaises(RuntimeWarning, CoordinateMapper._CDS_from_Seq, sf)

    def testEmptyInit(self):
        """CoordinateMapper should fail with no arguments"""
        self.assertRaises(Exception, CoordinateMapper)


# TODO HGVS and GenBank coords
# TODO 0 and 1 index should both work
# \*\d+  # XXX only allowed if * is in Config.post_CDS_fmt
# TODO in HGVS, base 0 doesn't exist


class Testg2c(unittest.TestCase):
    """Test success of good input and failure of bad input to g2c"""

    def setUp(self):
        self.dialect = None

    # what it should do

    # any integer should return
    def testGoodExons(self):
        """g2c should work for exon positions"""
        for arg, expected in zip(g_exons, c_exons):
            self.assertEqual(cmap.g2c(arg), expected)

    @two_dialects
    def testGoodIntronsStr(self):
        """g2c should work for intron positions (string)"""
        dia = self.dialect
        for arg, expect in zip(g_introns[dia], c_introns[dia]):
            actual = cmap.g2c(arg, dia).to(dia)
            self.assertEqual(actual, expect)

    @two_dialects
    def testGoodIntrons(self):
        """g2c should work for intron positions (CDSPosition)"""
        for arg, tup in zip(g_introns[self.dialect], c_intron_tups):
            actual = cmap.g2c(arg, self.dialect)
            expect = CDSPosition.from_anchor(*tup)
            self.assertEqual(actual, expect)
            self.assertEqual(str(actual), str(expect))

    @two_dialects
    def testGoodOutsideStr(self):
        """g2c should work for outside positions (string)"""
        dia = self.dialect
        for arg, expected in zip(g_outside[dia], c_outside[dia]):
            actual = cmap.g2c(arg, dia).to(dia)
            self.assertEqual(actual, expected)

    def testGoodOutside(self):
        """g2c should work for outside positions (CDSPosition)"""
        for arg, tup in zip(g_outside[self.dialect], c_outside_tups):
            actual = cmap.g2c(arg)
            expect = CDSPosition.from_anchor(*tup)
            self.assertEqual(actual, expect)
            self.assertEqual(str(actual), str(expect))

    # what it shouldn't do

    # should it handle non-exact positions?

    def testBadArg(self):
        """g2c should fail on string, float, None, or negative"""
        bad = ("string", None, 3.14, )
        for arg in bad:
            self.assertRaises(Exception, cmap.g2c, arg)
        self.assertRaises(GenomePositionError, cmap.g2c, -5)


class Testc2p(unittest.TestCase):
    """Test success of good input and failure of bad input to c2p"""

    # what it should do

    # integer within length of exon should return correct protein
    def testGoodExons(self):
        """c2p should work for exon positions"""
        for arg, expect in zip(c_exons, p_exons_trip):
            self.assertEqual(cmap.c2p(arg), expect)

    # FIXME should CDSPosition return None or raise error?
    def testBadNotProtein(self):
        """c2p should fail for CDSPosition or str"""
        bad = ("string", CDSPosition.from_anchor(7, -5))
        for arg in bad:
            self.assertRaises(ValueError, cmap.c2p, arg)


class Testp2c(unittest.TestCase):
    """Test success of good input and failure of bad input to p2c"""

    # what it should do

    # integer within length of protein should return correct range
    def testGoodExons(self):
        """p2c should work for exon positions"""
        #for arg, expect in p2c_exons.iteritems():
        for arg, expect in zip(p_exons, c_exon_prs):
            self.assertEqual(cmap.p2c(arg), expect)

    def testBadTooLarge(self):
        """p2c should fail for positions longer than the max length"""
        self.assertRaises(ProteinPositionError, cmap.p2c, 500)

    def testBadNegative(self):
        """p2c should fail for negative protein coordinate"""
        self.assertRaises(ProteinPositionError, cmap.p2c, -50)


class Testc2g(unittest.TestCase):
    """Test success of good input and failure of bad input to c2g"""

    def setUp(self):
        self.dialect = None

    # what it should do

    # integer within length of exon should return correct value
    def testGoodExons(self):
        """c2g should work for exon positions"""
        for arg, expected in zip(c_exons, g_exons):
            self.assertEqual(cmap.c2g(arg), expected)

    # CDSPosition object should return correct value
    @two_dialects
    def testGoodIntrons(self):
        """c2g should work for introns (CDSPosition)"""
        dia = self.dialect
        for c_args, g in zip(c_intron_tups, g_introns[dia]):
            actual = cmap.c2g(CDSPosition.from_anchor(*c_args), dia)
            self.assertEqual(actual.to(dia), g)

    # FIXME doesn't work with HGVS
    #@two_dialects
    def testGoodOutside(self):
        """c2g should work for outside (CDSPosition)"""
        for args, expect in zip(c_outside_tups, g_outside[self.dialect]):
            self.assertEqual(cmap.c2g(CDSPosition.from_anchor(*args)),
                             expect)

    # simple string should return correct value
    #    \d*[+-]\d+
    @two_dialects
    def testGoodIntronsStr(self):
        """c2g should work for intron positions (string)"""
        dia = self.dialect
        # c_str from dialect to *dialect* matches *dialect* g_str
        for c_str, g in zip(c_introns[dia], g_introns[dia]):
            self.assertEqual(cmap.c2g(c_str, dia).to(dia), g)

        # c_str from dialect as *default* matches *default* g_str
        for c_str, g in zip(c_introns[dia], g_introns[None]):
            self.assertEqual(cmap.c2g(c_str, dia), g)

    @two_dialects
    def testGoodOutsideStr(self):
        """c2g should work for outside positions (string)"""
        dia = self.dialect
        for expected, arg in zip(g_outside[dia], c_outside[dia]):
            self.assertEqual(cmap.c2g(arg, dia).to(dia), expected)

    def testNegativeInt(self):
        """c2g should treat negative integer as pre-CDS"""
        # XXX negative integers are treated as pre-CDS
        self.assertEqual(cmap.c2g(-2), 5806)

    # what it shouldn't do

    # bad positions in form 123+4 where 123 is not end of an exon
    # or where sign is wrong
    @two_dialects
    def testBadNotEnd(self):
        """c2g should fail on invalid introns"""
        bad_introns = {None: ("160+5",  # 160 is exonic
                       "51-6",  # 51 is end
                       "52+10",  # 52 is start
        ), 'HGVS': ("160+5", "52-6", "53+10")}
        for arg in bad_introns[self.dialect]:
            self.assertRaises(CDSPositionError, cmap.c2g, arg, self.dialect)

    # TODO test both dialects
    def testBadString(self):
        """c2g should fail on arbitrary string"""
        # for non-HGVS, treat * as meaningless
        #assert CMConfig.post_CDS_fmt[0] != "*"
        bad_vals = ["xxx", "4-5'", "*10"]
        for arg in bad_vals:
            self.assertRaises(ValueError, cmap.c2g, arg)

    # integers outside length of exon should raise IndexError
    def testBadTooLarge(self):
        """c2g should fail on values beyond the exon"""
        num = 400
        assert num > len(cmap.exons)
        self.assertRaises(IndexError, cmap.c2g, num)
        self.assertRaises(Exception, cmap.c2g, "%d%d" % (num, -12))


class Testp2g(unittest.TestCase):

    def testGoodStr(self):
        """p2g should return expected ranges"""
        for p_arg, gpos in zip(p_exons, g_exon_prs):
            self.assertEqual(cmap.p2g(p_arg), gpos)


class Testg2p(unittest.TestCase):

    def testGoodStr(self):
        """g2p should return correct position for exons"""
        for g_arg, ppos in zip(g_exons, p_exons_trip):
            self.assertEqual(cmap.g2p(g_arg), ppos)


if __name__ == "__main__":
    #unittest.main()
    suite0 = unittest.TestLoader().loadTestsFromTestCase(TestCDSPosition)
    suite1 = unittest.TestLoader().loadTestsFromTestCase(TestCoordinateMapper)
    suite2 = unittest.TestLoader().loadTestsFromTestCase(Testc2g)
    suite3 = unittest.TestLoader().loadTestsFromTestCase(Testg2c)
    suite4 = unittest.TestLoader().loadTestsFromTestCase(Testc2p)
    suite5 = unittest.TestLoader().loadTestsFromTestCase(Testp2c)
    suite6 = unittest.TestLoader().loadTestsFromTestCase(Testp2g)
    suite7 = unittest.TestLoader().loadTestsFromTestCase(Testg2p)
    all_suites = [
        suite0,
        suite1,
        suite2,
        suite3,
        suite4,
        suite5,
        suite6,
        suite7,
    ]
    all = unittest.TestSuite(all_suites)
    unittest.TextTestRunner(verbosity=3).run(all)
	# CoordinateMapper -- map between genomic, cds, and protein coordinates
	# AUTHOR: Reece Hart <reecehart@gmail.com>
	# Modifications: Lenna X. Peterson <arklenna@gmail.com>
	# LICENSE: BioPython

	# Examples:
	# AB026906.1:g.7872G>T
	# AB026906.1:c.274G>T
	# BA...:p.Asp92Tyr
	#
	# All refer to congruent variants. A coordinate mapper is needed to
	# translate between at least these three coordinate frames. The mapper
	# should deal with specialized syntax for splicing and UTR (e.g., 88+1,
	# 89-2, -14, *46). In addition, care should be taken to ensure consistent 0-
	# or 1-based numbering (0 internally, as with Python/BioPython and
	# Perl/BioPerl).
	#
	# g -----------00000000000-----1111111----------22222222222*222-----
	# s0 e0 s1 e1 s2 e2
	# \ \ \| \| / /
	# +--+ +--+ \| \| +-------+ +-------+
	# \ \\| \|/ /
	# c 00000000000111111122222222222*222
	# c0 c1 c2
	# aaabbbcccdddeeefffggghhhiiijj*kkk
	# p A B C D E F G H I J K
	# p0 p1 p2 ... pn
	#
	#
	# TODO:
	# * g2c returns index + extended syntax
	# * c2g accepts index + extended syntax

	from functools import wraps
	from math import floor
	import warnings

	import MapPositions # required for pos_factory decorator
	from MapPositions import CDSPositionError, ProteinPositionError
	from MapPositions import GenomePosition, CDSPosition, ProteinPosition

	# FIXME change to absolute import once CompoundLocation is merged to master
	import SeqFeature
	from SeqFeature import FeatureLocation


	class CoordinateMapper(object):

	def __init__(self, selist=None, seqrecord=None):
	# Note: seqrecord parameter will take SeqRecord or SeqFeature
	if seqrecord is not None:
	self._exons = self._CDS_from_Seq(seqrecord)
	else:
	self._exons = self._exons_from_list(selist)

	@staticmethod
	def _CDS_from_Seq(seq):
	if hasattr(seq, 'features'):
	# generator
	cdsf = (f for f in seq.features if f.type == 'CDS').next()
	return cdsf.location
	if hasattr(seq, 'location'):
	if seq.type != 'CDS':
	# XXX should this be a fatal error?
	warnings.warn("Provided SeqFeature should be CDS",
	RuntimeWarning)
	return seq.location

	@staticmethod
	def _exons_from_list(selist):
	return sum([FeatureLocation(*pair) for pair in selist])

	@property # read-only
	def exons(self):
	return self._exons

	@property # read-only
	def exon_list(self):
	return list(self.exons)

	def pos_factory(pos_type):
	"""
	Convert string or int pos to appropriate Position object
	Argument to decorator: position type (Genome, CDS, Protein)
	"""
	def wrapper(fn):
	@wraps(fn)
	def make_pos(self, pos, dialect=None):
	# retrieve Position object
	_obj = getattr(MapPositions, pos_type + "Position")
	# if pos is not already Position object, make it one
	if not isinstance(pos, _obj):
	# no dialect: use default constructor
	if dialect is None:
	pos = _obj(pos)
	# use dialect alternate constructor
	else:
	_init = getattr(_obj, "from_%s" % dialect.lower())
	pos = _init(pos)
	# call function with new pos
	return fn(self, pos, dialect)
	return make_pos
	return wrapper

	@pos_factory("Genome")
	def g2c(self, gpos, dialect=None):
	"Convert integer from genomic to CDS coordinates"

	gpos = int(gpos)
	fmts = CDSPosition.fmt_dict

	def _simple_g2c(g):
	return CDSPosition(self.exon_list.index(g))

	# within exon
	if gpos in self.exons:
	return _simple_g2c(gpos)
	# before CDS
	if gpos < self.exons.start:
	return CDSPosition(fmts['post-CDS'] % (gpos - self.exons.start))
	# after CDS
	if gpos >= self.exons.end:
	return CDSPosition(fmts['pre-CDS'] % (gpos - self.exons.end + 1))
	# intron
	# set start of first intron
	prev_end = self.exons.parts[0].end
	for part in self.exons.parts[1:]:
	# not in this intron
	if gpos > part.start:
	prev_end = part.end
	continue
	len_intron = part.start - prev_end
	# second half (exclusive) of intron
	# offset > middle of intron
	if gpos - prev_end > floor(len_intron / 2.0):
	anchor = _simple_g2c(part.start)
	offset = gpos - part.start
	assert offset < 0
	# first half (inclusive) of intron
	else:
	anchor = _simple_g2c(prev_end - 1)
	offset = gpos - prev_end + 1
	assert offset > 0
	assert self.check_intron(anchor, offset)
	return CDSPosition(fmts['intron'] % (anchor, offset))

	assert False # function should return for every integer

	# TODO verify that values of offset are sane
	def check_intron(self, anchor, offset):
	"""
	Verify that CDS-relative intron position is valid with given exons.
	"""
	for exon in self.exons.parts:
	start = int(self.g2c(exon.start))
	if anchor == start:
	if offset > 0:
	raise CDSPositionError("Invalid intron: offset from exon start must be negative.")
	return True
	end = int(self.g2c(exon.end - 1))
	if anchor == end:
	if offset < 0:
	raise CDSPositionError("Invalid intron: offset from exon end must be positive.")
	return True
	raise CDSPositionError("Invalid intron: anchor must be start or end of an exon.")

	@pos_factory("CDS")
	def c2g(self, cpos, dialect=None):
	"Convert from CDS to genomic coordinates"

	if cpos.pos_type == "pre-CDS":
	return GenomePosition(self.exons.start + cpos.offset)
	elif cpos.pos_type == "post-CDS":
	return GenomePosition(self.exons.end - 1 + cpos.offset)

	g_anchor = self.exon_list[cpos.anchor]
	if cpos.pos_type == "exon":
	return GenomePosition(g_anchor)
	elif cpos.pos_type == "intron":
	offset = cpos.offset
	if self.check_intron(cpos.anchor, offset):
	return GenomePosition(g_anchor + offset)

	assert False # all positions should be one of the 4 types

	@pos_factory("CDS")
	def c2p(self, cpos, dialect=None):
	"""Convert from CDS to protein coordinates"""
	try:
	cpos = int(cpos)
	except TypeError:
	raise CDSPositionError("'%s' does not correspond to a protein"
	% repr(cpos))
	return ProteinPosition(int(cpos / 3.0))

	@pos_factory("Genome")
	def g2p(self, gpos, dialect=None):
	"Convert integer from genomic to protein coordinates"
	return self.c2p(self.g2c(gpos))

	@pos_factory("Protein")
	def p2c(self, ppos, dialect=None):
	"Convert integer from protein coordinate to CDS closed range"
	try:
	ppos = int(ppos)
	except TypeError:
	return None
	if ppos < 0:
	raise ProteinPositionError("'%s' should not be negative")
	# FIXME is CDS guaranteed to have len % 3 == 0?
	first_base = (ppos) * 3
	last_base = first_base + 2
	if last_base > len(self.exons):
	raise ProteinPositionError("'%s' is too large")
	return (CDSPosition(first_base), CDSPosition(last_base))

	@pos_factory("Protein")
	def p2g(self, ppos, dialect=None):
	"Convert integer from protein to genomic coordinates"
	#first, last = self.p2c(ppos)
	#return (self.c2g(first), self.c2g(last))
	return tuple(self.c2g(x) for x in self.p2c(ppos))


	if __name__ == '__main__':
	# The following exons are from AB026906.1.
	# test case: g.7872 -> c.274 -> p.92
	# N.B. These are python counting coordinates (0-based)
	exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]

	def test_list(g_range):
	cm = CoordinateMapper(exons)
	for g1 in g_range:
	print g1,
	c1 = cm.g2c(g1)
	print c1,
	p1 = cm.c2p(c1)
	print p1,
	if p1:
	c2 = cm.p2c(p1)[0]
	else:
	c2 = c1
	print ' \| ', c2,
	g2 = cm.c2g(c2)
	print g2

	print cm.g2p(7872)
	print cm.p2g(92)

	def test_simple():
	simple_exons = SeqFeature.SeqFeature(sum([FeatureLocation(2, 4), FeatureLocation(8, 11), FeatureLocation(16, 18)]), type="CDS")
	cm = CoordinateMapper(seqrecord=simple_exons)
	print cm.exons
	print list(cm.exons)
	print range(len(cm.exons))
	for i in range(len(cm.exons)):
	print "%3s" % cm.c2g(i),
	print
	for i in xrange(20):
	print "%3d" % i,
	print
	for i in xrange(20):
	print "%3s" % cm.g2c(i),
	print

	r1 = (7870, 7871, 7872, 7873, 7874)
	r2 = (5807, 5808, 5809,
	6871, 6872, 6873, 6874, 6875,
	7766, 7767, 7768, 7769,
	13784, 13785, 13786)
	test_list(r1)
	#test_list(r2)
	print
	test_simple()
	from copy import copy
	import re


	class InvalidPositionError(ValueError):
	"Exception for bad coordinates"


	class GenomePositionError(InvalidPositionError):
	"Exception for bad genome coordinates"


	class CDSPositionError(InvalidPositionError):
	"Exception for bad CDS coordinates"


	class ProteinPositionError(InvalidPositionError):
	"Exception for bad protein coordinates"


	class MapPosition(object):
	def __init__(self, pos, index=None, **kwargs):
	self.index = index
	if pos and self.index:
	pos -= index
	self.pos = pos

	# TODO classmethod from, like to

	@classmethod
	def from_hgvs(cls, hgvs_pos, strand=None):
	return cls(hgvs_pos, index=1, strand=strand, post_fmt="*%d")

	@classmethod
	def from_genbank(cls, gbk_pos, strand=None):
	return cls(gbk_pos, index=1, strand=strand)

	def to(self, dialect):
	if dialect is None:
	return self.to_str()
	try:
	to_func = getattr(self, "to_" + dialect.lower())
	except AttributeError:
	raise ValueError("Dialect '%s' not valid" % dialect)
	return to_func()

	def to_hgvs(self):
	if self.pos or self.pos == 0:
	return self.pos + 1
	return None

	def to_genbank(self):
	if self.pos or self.pos == 0:
	return self.pos + 1
	return None

	def to_str(self):
	return self.pos

	def __str__(self):
	return str(self.to_str())

	def __int__(self):
	return self.pos

	def __repr__(self):
	return "%s(%s)" % (self.__class__.__name__, self.to_str())

	def __eq__(self, other):
	if isinstance(other, int):
	return self.pos == other
	return self.pos == other.pos


	class GenomePosition(MapPosition):
	def __init__(self, gpos, index=None, strand=None, **kwargs):
	# FIXME if index is string, error may be raised
	if gpos < (index or 0):
	raise GenomePositionError("Genome position cannot be negative.")
	# call superclass constructor
	MapPosition.__init__(self, gpos, index)
	self.strand = strand


	class ProteinPosition(MapPosition):
	def __init__(self, ppos, index=None, **kwargs):
	# call superclass constructor
	MapPosition.__init__(self, ppos, index)


	class CDSPosition(MapPosition):
	def __init__(self, cpos, index=None, strand=None,
	pre_fmt=None, post_fmt=None):
	# Dispatch types and return anchor, offset
	if isinstance(cpos, int):
	anchor, offset = self.parse_int(cpos)
	elif isinstance(cpos, str):
	anchor, offset = self.parse_str(cpos, pre_fmt, post_fmt)
	else:
	raise CDSPositionError("'%s' is of unknown type" % cpos)
	# Set instance anchor and offset
	# call superclass constructor
	MapPosition.__init__(self, anchor, index)
	self._offset = offset
	self.validate()

	@classmethod
	def from_anchor(cls, anchor=None, offset=None):
	if not anchor:
	pos = cls("%+d" % offset)
	elif anchor < 0:
	raise CDSPositionError("Anchor cannot be negative.")
	else:
	pos = cls(anchor)
	pos.offset = offset
	return pos

	@property
	def offset(self):
	return self._offset

	@offset.setter
	def offset(self, val):
	"Validate new offset, then update"
	self.validate(offset=val)
	self._offset = val

	@property
	def anchor(self):
	return self.pos

	@anchor.setter
	def anchor(self, val):
	"Validate new anchor, then update pos"
	self.validate(anchor=val)
	self.pos = val

	def validate(self, anchor=None, offset=None):
	"Check whether anchor and offset yield a valid position"
	anchor = anchor or self.anchor
	offset = offset or self.offset
	if offset == 0:
	raise CDSPositionError("Offset may not be 0. For no offset, use None.")
	if not anchor and anchor != 0 and not offset:
	raise CDSPositionError("At least one of pos or offset must be defined")
	if anchor and anchor < 0:
	raise CDSPositionError("CDS anchor may not be negative.")
	return True

	@property
	def pos_type(self):
	"Type of CDS position, dynamically determined from values"
	# inside CDS
	if self.pos or self.pos == 0:
	if not self.offset:
	return "exon"
	return "intron"
	# outside CDS
	elif self.offset > 0:
	return "post-CDS"
	return "pre-CDS"
	assert False # all integers should return

	@property
	def sub_dict(self):
	if self.pos_type == 'intron':
	return {'pos': self.pos, 'offset': self.offset}
	if self.pos_type == 'exon':
	return {'pos': self.pos}
	if self.pos_type == 'post-CDS' or self.pos_type == 'pre-CDS':
	return {'offset': self.offset}

	fmt_dict = {
	'exon': "%d",
	'intron': "%d%+d",
	'post-CDS': "%+d",
	'pre-CDS': "%+d",
	}

	@staticmethod
	def _shift_index(pos_dict, idx):
	if 'pos' in pos_dict and idx:
	pos_dict['pos'] += idx
	return pos_dict

	def _make_str(self, val_dict=None, fmt_dict=None):
	# set default dicts if parameter dicts are false
	fmt_dict = fmt_dict or self.fmt_dict
	val_dict = val_dict or self.sub_dict
	return fmt_dict[self.pos_type] % tuple(val_dict.values())

	@staticmethod
	def parse_int(cpos):
	"parse int to anchor, offset tuple"
	# treat negative int as offset
	if cpos < 0:
	return (None, cpos)
	# treat positive int as anchor
	return (cpos, None)

	@staticmethod
	def parse_str(cpos, pre_fmt, post_fmt):
	"parse string to anchor, offset tuple"
	delimiters = "\+\-"
	if post_fmt and "*" in post_fmt:
	delimiters += "\*"
	# parenth causes split pattern to be kept
	delim_rx = re.compile("([%s])" % delimiters)
	parsed = delim_rx.split(cpos, 1)
	if len(parsed) == 1: # may be int
	return CDSPosition.parse_int(int(cpos))
	# 1 split is normally length 2 but delimiter is also kept
	elif len(parsed) != 3:
	raise CDSPositionError("String '%s' not parseable for this position. Please provide int or CDSPosition object." % cpos)
	if parsed[0] == "":
	anchor = None
	else:
	anchor = int(parsed[0])
	if post_fmt and parsed[1] == "*" == post_fmt[0]:
	parsed[1] = "+"
	offset = int("".join(parsed[1:]))
	return (anchor, offset)

	def to_hgvs(self):
	fmt_dict = copy(self.fmt_dict)
	fmt_dict['post-CDS'] = "*%d"
	sub_dict = self._shift_index(self.sub_dict, 1)
	return self._make_str(sub_dict, fmt_dict)

	def to_genbank(self):
	sub_dict = self._shift_index(self.sub_dict, 1)
	return self._make_str(sub_dict)

	def to_str(self):
	return self._make_str()

	def __int__(self):
	if self.pos_type == "exon":
	return MapPosition.__int__(self)
	return NotImplemented

	def __eq__(self, other):
	if isinstance(other, int):
	return MapPosition.__eq__(self, other)
	return MapPosition.__eq__(self, other) and self.offset == other.offset


	if __name__ == "__main__":

	#def make_hgvs(foo):
	#return str(foo.to_hgvs())
	#MapPosition.__str__ = make_hgvs

	def print_pos(pos_obj):
	print "object:", pos_obj
	print "repr:", repr(pos_obj)
	print "HGVS:", pos_obj.to_hgvs()
	print

	g = GenomePosition.from_hgvs(6)
	print_pos(g)

	test_g = GenomePosition(5)
	#test_c = CDSPosition("6+1")
	#test_c = CDSPosition.from_hgvs("6+1")
	test_c = CDSPosition.from_hgvs("*1")
	#test_c = CDSPosition(6)
	#test_c = CDSPosition(-1)
	print_pos(test_g)
	print test_c.pos_type
	print_pos(test_c)
	#!/usr/bin/python

	from functools import wraps
	import unittest

	from CoordinateMapper import CoordinateMapper
	from MapPositions import GenomePositionError
	from MapPositions import ProteinPositionError
	from MapPositions import CDSPosition, CDSPositionError
	from SeqFeature import FeatureLocation, SeqFeature


	exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]
	cmap = CoordinateMapper(exons)
	g_exons = xrange(7868, 7875) # len 7
	c_exons = xrange(270, 279) # len 9
	p_exons = xrange(90, 93) # len 3
	p_exons_trip = [p for p in p_exons for n in range(3)] # len 9
	c_exon_prs = ((270, 272), (273, 275), (276, 278)) # len 3
	g_exon_prs = ((7868, 7870), (7871, 7873), (7874, 7876)) # len 3

	g_introns = {None: (5860, 5861, 6308, 6309, 6755, 6756),
	'HGVS': (5861, 5862, 6309, 6310, 6756, 6757)}
	c_introns = {None: ('51+1', '51+2', '51+449', '52-448', '52-2', '52-1'),
	'HGVS': ('52+1', '52+2', '52+449', '53-448', '53-2', '53-1')}
	c_intron_tups = ((51, 1), (51, 2), (51, 449), (52, -448), (52, -2), (52, -1))

	g_outside = {None: (0, 13785, 14000), 'HGVS': (1, 13786, 14001)}
	c_outside = {None: ('-5808', '+1', '+216'), 'HGVS': ('-5808', '1', '216')}
	c_outside_tups = ((None, -5808), (None, 1), (None, 216))


	def two_dialects(fn):
	@wraps(fn)
	def call_tests(self):
	orig_dialect = self.dialect
	for dialect in (None, 'HGVS'):
	self.dialect = dialect
	fn(self)
	self.dialect = orig_dialect
	return call_tests


	class TestCDSPosition(unittest.TestCase):
	"""Test that CDSPosition works properly"""

	def setUp(self):
	self.dialect = None

	@two_dialects
	def testGoodIntron(self):
	"""CDSPosition should match good intron values"""
	for c_args, c in zip(c_intron_tups, c_introns[self.dialect]):
	actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
	self.assertEqual(actual, c)

	@two_dialects
	def testGoodOutside(self):
	"""CDSPosition should match good outside-CDS values"""
	for c_args, c in zip(c_outside_tups, c_outside[self.dialect]):
	actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
	self.assertEqual(actual, c)

	def testEqual(self):
	"""CDSPosition should test equal with same args"""
	for args in c_intron_tups:
	CPos = CDSPosition.from_anchor
	self.assertEqual(CPos(args), CPos(args))
	self.assertEqual(str(CPos(args)), str(CPos(args)))

	#def testEqualDialects(self):
	#for c_pos in c_outside:
	#self.assertEqual(CDSPosition(c_pos), CDSPosition.from_hgvs(c_pos))
	#self.assertEqual(c_pos, CDSPosition.from_hgvs(c_pos).to_hgvs())

	def testBadAnchor(self):
	"""CDSPosition should fail with negative CDS anchor"""
	self.assertRaises(CDSPositionError, CDSPosition.from_anchor, -1, 1)

	def testBadOffset(self):
	"""CDSPosition should fail with zero offset"""
	self.assertRaises(CDSPositionError, CDSPosition.from_anchor, None, 0)


	class TestCoordinateMapper(unittest.TestCase):
	"""Test that CoordinateMapper works properly"""

	def testListInit(self):
	"""CoordinateMapper should take list of exon pairs"""
	CoordinateMapper(exons)

	# TODO
	def testSeqRecordInit(self):
	pass

	def testSeqFeatureInit(self):
	"""CoordinateMapper should take a CDS SeqFeature"""
	sf = SeqFeature(sum((FeatureLocation(*pr) for pr in exons)),
	type="CDS")
	CoordinateMapper(seqrecord=sf)

	# FIXME Maybe RuntimeWarning printed to stderr isn't captured?
	#def testNotCDS(self):
	#sf = SeqFeature(sum((FeatureLocation(*pr) for pr in exons)))
	#self.assertRaises(RuntimeWarning, CoordinateMapper._CDS_from_Seq, sf)

	def testEmptyInit(self):
	"""CoordinateMapper should fail with no arguments"""
	self.assertRaises(Exception, CoordinateMapper)


	# TODO HGVS and GenBank coords
	# TODO 0 and 1 index should both work
	# \\d+ # XXX only allowed if is in Config.post_CDS_fmt
	# TODO in HGVS, base 0 doesn't exist


	class Testg2c(unittest.TestCase):
	"""Test success of good input and failure of bad input to g2c"""

	def setUp(self):
	self.dialect = None

	# what it should do

	# any integer should return
	def testGoodExons(self):
	"""g2c should work for exon positions"""
	for arg, expected in zip(g_exons, c_exons):
	self.assertEqual(cmap.g2c(arg), expected)

	@two_dialects
	def testGoodIntronsStr(self):
	"""g2c should work for intron positions (string)"""
	dia = self.dialect
	for arg, expect in zip(g_introns[dia], c_introns[dia]):
	actual = cmap.g2c(arg, dia).to(dia)
	self.assertEqual(actual, expect)

	@two_dialects
	def testGoodIntrons(self):
	"""g2c should work for intron positions (CDSPosition)"""
	for arg, tup in zip(g_introns[self.dialect], c_intron_tups):
	actual = cmap.g2c(arg, self.dialect)
	expect = CDSPosition.from_anchor(*tup)
	self.assertEqual(actual, expect)
	self.assertEqual(str(actual), str(expect))

	@two_dialects
	def testGoodOutsideStr(self):
	"""g2c should work for outside positions (string)"""
	dia = self.dialect
	for arg, expected in zip(g_outside[dia], c_outside[dia]):
	actual = cmap.g2c(arg, dia).to(dia)
	self.assertEqual(actual, expected)

	def testGoodOutside(self):
	"""g2c should work for outside positions (CDSPosition)"""
	for arg, tup in zip(g_outside[self.dialect], c_outside_tups):
	actual = cmap.g2c(arg)
	expect = CDSPosition.from_anchor(*tup)
	self.assertEqual(actual, expect)
	self.assertEqual(str(actual), str(expect))

	# what it shouldn't do

	# should it handle non-exact positions?

	def testBadArg(self):
	"""g2c should fail on string, float, None, or negative"""
	bad = ("string", None, 3.14, )
	for arg in bad:
	self.assertRaises(Exception, cmap.g2c, arg)
	self.assertRaises(GenomePositionError, cmap.g2c, -5)


	class Testc2p(unittest.TestCase):
	"""Test success of good input and failure of bad input to c2p"""

	# what it should do

	# integer within length of exon should return correct protein
	def testGoodExons(self):
	"""c2p should work for exon positions"""
	for arg, expect in zip(c_exons, p_exons_trip):
	self.assertEqual(cmap.c2p(arg), expect)

	# FIXME should CDSPosition return None or raise error?
	def testBadNotProtein(self):
	"""c2p should fail for CDSPosition or str"""
	bad = ("string", CDSPosition.from_anchor(7, -5))
	for arg in bad:
	self.assertRaises(ValueError, cmap.c2p, arg)


	class Testp2c(unittest.TestCase):
	"""Test success of good input and failure of bad input to p2c"""

	# what it should do

	# integer within length of protein should return correct range
	def testGoodExons(self):
	"""p2c should work for exon positions"""
	#for arg, expect in p2c_exons.iteritems():
	for arg, expect in zip(p_exons, c_exon_prs):
	self.assertEqual(cmap.p2c(arg), expect)

	def testBadTooLarge(self):
	"""p2c should fail for positions longer than the max length"""
	self.assertRaises(ProteinPositionError, cmap.p2c, 500)

	def testBadNegative(self):
	"""p2c should fail for negative protein coordinate"""
	self.assertRaises(ProteinPositionError, cmap.p2c, -50)


	class Testc2g(unittest.TestCase):
	"""Test success of good input and failure of bad input to c2g"""

	def setUp(self):
	self.dialect = None

	# what it should do

	# integer within length of exon should return correct value
	def testGoodExons(self):
	"""c2g should work for exon positions"""
	for arg, expected in zip(c_exons, g_exons):
	self.assertEqual(cmap.c2g(arg), expected)

	# CDSPosition object should return correct value
	@two_dialects
	def testGoodIntrons(self):
	"""c2g should work for introns (CDSPosition)"""
	dia = self.dialect
	for c_args, g in zip(c_intron_tups, g_introns[dia]):
	actual = cmap.c2g(CDSPosition.from_anchor(*c_args), dia)
	self.assertEqual(actual.to(dia), g)

	# FIXME doesn't work with HGVS
	#@two_dialects
	def testGoodOutside(self):
	"""c2g should work for outside (CDSPosition)"""
	for args, expect in zip(c_outside_tups, g_outside[self.dialect]):
	self.assertEqual(cmap.c2g(CDSPosition.from_anchor(*args)),
	expect)

	# simple string should return correct value
	# \d*[+-]\d+
	@two_dialects
	def testGoodIntronsStr(self):
	"""c2g should work for intron positions (string)"""
	dia = self.dialect
	# c_str from dialect to dialect matches dialect g_str
	for c_str, g in zip(c_introns[dia], g_introns[dia]):
	self.assertEqual(cmap.c2g(c_str, dia).to(dia), g)

	# c_str from dialect as default matches default g_str
	for c_str, g in zip(c_introns[dia], g_introns[None]):
	self.assertEqual(cmap.c2g(c_str, dia), g)

	@two_dialects
	def testGoodOutsideStr(self):
	"""c2g should work for outside positions (string)"""
	dia = self.dialect
	for expected, arg in zip(g_outside[dia], c_outside[dia]):
	self.assertEqual(cmap.c2g(arg, dia).to(dia), expected)

	def testNegativeInt(self):
	"""c2g should treat negative integer as pre-CDS"""
	# XXX negative integers are treated as pre-CDS
	self.assertEqual(cmap.c2g(-2), 5806)

	# what it shouldn't do

	# bad positions in form 123+4 where 123 is not end of an exon
	# or where sign is wrong
	@two_dialects
	def testBadNotEnd(self):
	"""c2g should fail on invalid introns"""
	bad_introns = {None: ("160+5", # 160 is exonic
	"51-6", # 51 is end
	"52+10", # 52 is start
	), 'HGVS': ("160+5", "52-6", "53+10")}
	for arg in bad_introns[self.dialect]:
	self.assertRaises(CDSPositionError, cmap.c2g, arg, self.dialect)

	# TODO test both dialects
	def testBadString(self):
	"""c2g should fail on arbitrary string"""
	# for non-HGVS, treat * as meaningless
	#assert CMConfig.post_CDS_fmt[0] != "*"
	bad_vals = ["xxx", "4-5'", "*10"]
	for arg in bad_vals:
	self.assertRaises(ValueError, cmap.c2g, arg)

	# integers outside length of exon should raise IndexError
	def testBadTooLarge(self):
	"""c2g should fail on values beyond the exon"""
	num = 400
	assert num > len(cmap.exons)
	self.assertRaises(IndexError, cmap.c2g, num)
	self.assertRaises(Exception, cmap.c2g, "%d%d" % (num, -12))


	class Testp2g(unittest.TestCase):

	def testGoodStr(self):
	"""p2g should return expected ranges"""
	for p_arg, gpos in zip(p_exons, g_exon_prs):
	self.assertEqual(cmap.p2g(p_arg), gpos)


	class Testg2p(unittest.TestCase):

	def testGoodStr(self):
	"""g2p should return correct position for exons"""
	for g_arg, ppos in zip(g_exons, p_exons_trip):
	self.assertEqual(cmap.g2p(g_arg), ppos)


	if __name__ == "__main__":
	#unittest.main()
	suite0 = unittest.TestLoader().loadTestsFromTestCase(TestCDSPosition)
	suite1 = unittest.TestLoader().loadTestsFromTestCase(TestCoordinateMapper)
	suite2 = unittest.TestLoader().loadTestsFromTestCase(Testc2g)
	suite3 = unittest.TestLoader().loadTestsFromTestCase(Testg2c)
	suite4 = unittest.TestLoader().loadTestsFromTestCase(Testc2p)
	suite5 = unittest.TestLoader().loadTestsFromTestCase(Testp2c)
	suite6 = unittest.TestLoader().loadTestsFromTestCase(Testp2g)
	suite7 = unittest.TestLoader().loadTestsFromTestCase(Testg2p)
	all_suites = [
	suite0,
	suite1,
	suite2,
	suite3,
	suite4,
	suite5,
	suite6,
	suite7,
	]
	all = unittest.TestSuite(all_suites)
	unittest.TextTestRunner(verbosity=3).run(all)