Skip to content

Instantly share code, notes, and snippets.

@lennax
Last active August 11, 2021 16:14
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lennax/3172753 to your computer and use it in GitHub Desktop.
Save lennax/3172753 to your computer and use it in GitHub Desktop.
# CoordinateMapper -- map between genomic, cds, and protein coordinates
# AUTHOR: Reece Hart <reecehart@gmail.com>
# Modifications: Lenna X. Peterson <arklenna@gmail.com>
# LICENSE: BioPython
# Examples:
# AB026906.1:g.7872G>T
# AB026906.1:c.274G>T
# BA...:p.Asp92Tyr
#
# All refer to congruent variants. A coordinate mapper is needed to
# translate between at least these three coordinate frames. The mapper
# should deal with specialized syntax for splicing and UTR (e.g., 88+1,
# 89-2, -14, *46). In addition, care should be taken to ensure consistent 0-
# or 1-based numbering (0 internally, as with Python/BioPython and
# Perl/BioPerl).
#
# g -----------00000000000-----1111111----------22222222222*222-----
# s0 e0 s1 e1 s2 e2
# \ \ | | / /
# +--+ +--+ | | +-------+ +-------+
# \ \| |/ /
# c 00000000000111111122222222222*222
# c0 c1 c2
# aaabbbcccdddeeefffggghhhiiijj*kkk
# p A B C D E F G H I J K
# p0 p1 p2 ... pn
#
#
# TODO:
# * g2c returns index + extended syntax
# * c2g accepts index + extended syntax
from functools import wraps
from math import floor
import warnings
import MapPositions # required for pos_factory decorator
from MapPositions import CDSPositionError, ProteinPositionError
from MapPositions import GenomePosition, CDSPosition, ProteinPosition
# FIXME change to absolute import once CompoundLocation is merged to master
import SeqFeature
from SeqFeature import FeatureLocation
class CoordinateMapper(object):
def __init__(self, selist=None, seqrecord=None):
# Note: seqrecord parameter will take SeqRecord or SeqFeature
if seqrecord is not None:
self._exons = self._CDS_from_Seq(seqrecord)
else:
self._exons = self._exons_from_list(selist)
@staticmethod
def _CDS_from_Seq(seq):
if hasattr(seq, 'features'):
# generator
cdsf = (f for f in seq.features if f.type == 'CDS').next()
return cdsf.location
if hasattr(seq, 'location'):
if seq.type != 'CDS':
# XXX should this be a fatal error?
warnings.warn("Provided SeqFeature should be CDS",
RuntimeWarning)
return seq.location
@staticmethod
def _exons_from_list(selist):
return sum([FeatureLocation(*pair) for pair in selist])
@property # read-only
def exons(self):
return self._exons
@property # read-only
def exon_list(self):
return list(self.exons)
def pos_factory(pos_type):
"""
Convert string or int pos to appropriate Position object
Argument to decorator: position type (Genome, CDS, Protein)
"""
def wrapper(fn):
@wraps(fn)
def make_pos(self, pos, dialect=None):
# retrieve Position object
_obj = getattr(MapPositions, pos_type + "Position")
# if pos is not already Position object, make it one
if not isinstance(pos, _obj):
# no dialect: use default constructor
if dialect is None:
pos = _obj(pos)
# use dialect alternate constructor
else:
_init = getattr(_obj, "from_%s" % dialect.lower())
pos = _init(pos)
# call function with new pos
return fn(self, pos, dialect)
return make_pos
return wrapper
@pos_factory("Genome")
def g2c(self, gpos, dialect=None):
"Convert integer from genomic to CDS coordinates"
gpos = int(gpos)
fmts = CDSPosition.fmt_dict
def _simple_g2c(g):
return CDSPosition(self.exon_list.index(g))
# within exon
if gpos in self.exons:
return _simple_g2c(gpos)
# before CDS
if gpos < self.exons.start:
return CDSPosition(fmts['post-CDS'] % (gpos - self.exons.start))
# after CDS
if gpos >= self.exons.end:
return CDSPosition(fmts['pre-CDS'] % (gpos - self.exons.end + 1))
# intron
# set start of first intron
prev_end = self.exons.parts[0].end
for part in self.exons.parts[1:]:
# not in this intron
if gpos > part.start:
prev_end = part.end
continue
len_intron = part.start - prev_end
# second half (exclusive) of intron
# offset > middle of intron
if gpos - prev_end > floor(len_intron / 2.0):
anchor = _simple_g2c(part.start)
offset = gpos - part.start
assert offset < 0
# first half (inclusive) of intron
else:
anchor = _simple_g2c(prev_end - 1)
offset = gpos - prev_end + 1
assert offset > 0
assert self.check_intron(anchor, offset)
return CDSPosition(fmts['intron'] % (anchor, offset))
assert False # function should return for every integer
# TODO verify that values of offset are sane
def check_intron(self, anchor, offset):
"""
Verify that CDS-relative intron position is valid with given exons.
"""
for exon in self.exons.parts:
start = int(self.g2c(exon.start))
if anchor == start:
if offset > 0:
raise CDSPositionError("Invalid intron: offset from exon start must be negative.")
return True
end = int(self.g2c(exon.end - 1))
if anchor == end:
if offset < 0:
raise CDSPositionError("Invalid intron: offset from exon end must be positive.")
return True
raise CDSPositionError("Invalid intron: anchor must be start or end of an exon.")
@pos_factory("CDS")
def c2g(self, cpos, dialect=None):
"Convert from CDS to genomic coordinates"
if cpos.pos_type == "pre-CDS":
return GenomePosition(self.exons.start + cpos.offset)
elif cpos.pos_type == "post-CDS":
return GenomePosition(self.exons.end - 1 + cpos.offset)
g_anchor = self.exon_list[cpos.anchor]
if cpos.pos_type == "exon":
return GenomePosition(g_anchor)
elif cpos.pos_type == "intron":
offset = cpos.offset
if self.check_intron(cpos.anchor, offset):
return GenomePosition(g_anchor + offset)
assert False # all positions should be one of the 4 types
@pos_factory("CDS")
def c2p(self, cpos, dialect=None):
"""Convert from CDS to protein coordinates"""
try:
cpos = int(cpos)
except TypeError:
raise CDSPositionError("'%s' does not correspond to a protein"
% repr(cpos))
return ProteinPosition(int(cpos / 3.0))
@pos_factory("Genome")
def g2p(self, gpos, dialect=None):
"Convert integer from genomic to protein coordinates"
return self.c2p(self.g2c(gpos))
@pos_factory("Protein")
def p2c(self, ppos, dialect=None):
"Convert integer from protein coordinate to CDS closed range"
try:
ppos = int(ppos)
except TypeError:
return None
if ppos < 0:
raise ProteinPositionError("'%s' should not be negative")
# FIXME is CDS guaranteed to have len % 3 == 0?
first_base = (ppos) * 3
last_base = first_base + 2
if last_base > len(self.exons):
raise ProteinPositionError("'%s' is too large")
return (CDSPosition(first_base), CDSPosition(last_base))
@pos_factory("Protein")
def p2g(self, ppos, dialect=None):
"Convert integer from protein to genomic coordinates"
#first, last = self.p2c(ppos)
#return (self.c2g(first), self.c2g(last))
return tuple(self.c2g(x) for x in self.p2c(ppos))
if __name__ == '__main__':
# The following exons are from AB026906.1.
# test case: g.7872 -> c.274 -> p.92
# N.B. These are python counting coordinates (0-based)
exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]
def test_list(g_range):
cm = CoordinateMapper(exons)
for g1 in g_range:
print g1,
c1 = cm.g2c(g1)
print c1,
p1 = cm.c2p(c1)
print p1,
if p1:
c2 = cm.p2c(p1)[0]
else:
c2 = c1
print ' | ', c2,
g2 = cm.c2g(c2)
print g2
print cm.g2p(7872)
print cm.p2g(92)
def test_simple():
simple_exons = SeqFeature.SeqFeature(sum([FeatureLocation(2, 4), FeatureLocation(8, 11), FeatureLocation(16, 18)]), type="CDS")
cm = CoordinateMapper(seqrecord=simple_exons)
print cm.exons
print list(cm.exons)
print range(len(cm.exons))
for i in range(len(cm.exons)):
print "%3s" % cm.c2g(i),
print
for i in xrange(20):
print "%3d" % i,
print
for i in xrange(20):
print "%3s" % cm.g2c(i),
print
r1 = (7870, 7871, 7872, 7873, 7874)
r2 = (5807, 5808, 5809,
6871, 6872, 6873, 6874, 6875,
7766, 7767, 7768, 7769,
13784, 13785, 13786)
test_list(r1)
#test_list(r2)
print
test_simple()
from copy import copy
import re
class InvalidPositionError(ValueError):
"Exception for bad coordinates"
class GenomePositionError(InvalidPositionError):
"Exception for bad genome coordinates"
class CDSPositionError(InvalidPositionError):
"Exception for bad CDS coordinates"
class ProteinPositionError(InvalidPositionError):
"Exception for bad protein coordinates"
class MapPosition(object):
def __init__(self, pos, index=None, **kwargs):
self.index = index
if pos and self.index:
pos -= index
self.pos = pos
# TODO classmethod from, like to
@classmethod
def from_hgvs(cls, hgvs_pos, strand=None):
return cls(hgvs_pos, index=1, strand=strand, post_fmt="*%d")
@classmethod
def from_genbank(cls, gbk_pos, strand=None):
return cls(gbk_pos, index=1, strand=strand)
def to(self, dialect):
if dialect is None:
return self.to_str()
try:
to_func = getattr(self, "to_" + dialect.lower())
except AttributeError:
raise ValueError("Dialect '%s' not valid" % dialect)
return to_func()
def to_hgvs(self):
if self.pos or self.pos == 0:
return self.pos + 1
return None
def to_genbank(self):
if self.pos or self.pos == 0:
return self.pos + 1
return None
def to_str(self):
return self.pos
def __str__(self):
return str(self.to_str())
def __int__(self):
return self.pos
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, self.to_str())
def __eq__(self, other):
if isinstance(other, int):
return self.pos == other
return self.pos == other.pos
class GenomePosition(MapPosition):
def __init__(self, gpos, index=None, strand=None, **kwargs):
# FIXME if index is string, error may be raised
if gpos < (index or 0):
raise GenomePositionError("Genome position cannot be negative.")
# call superclass constructor
MapPosition.__init__(self, gpos, index)
self.strand = strand
class ProteinPosition(MapPosition):
def __init__(self, ppos, index=None, **kwargs):
# call superclass constructor
MapPosition.__init__(self, ppos, index)
class CDSPosition(MapPosition):
def __init__(self, cpos, index=None, strand=None,
pre_fmt=None, post_fmt=None):
# Dispatch types and return anchor, offset
if isinstance(cpos, int):
anchor, offset = self.parse_int(cpos)
elif isinstance(cpos, str):
anchor, offset = self.parse_str(cpos, pre_fmt, post_fmt)
else:
raise CDSPositionError("'%s' is of unknown type" % cpos)
# Set instance anchor and offset
# call superclass constructor
MapPosition.__init__(self, anchor, index)
self._offset = offset
self.validate()
@classmethod
def from_anchor(cls, anchor=None, offset=None):
if not anchor:
pos = cls("%+d" % offset)
elif anchor < 0:
raise CDSPositionError("Anchor cannot be negative.")
else:
pos = cls(anchor)
pos.offset = offset
return pos
@property
def offset(self):
return self._offset
@offset.setter
def offset(self, val):
"Validate new offset, then update"
self.validate(offset=val)
self._offset = val
@property
def anchor(self):
return self.pos
@anchor.setter
def anchor(self, val):
"Validate new anchor, then update pos"
self.validate(anchor=val)
self.pos = val
def validate(self, anchor=None, offset=None):
"Check whether anchor and offset yield a valid position"
anchor = anchor or self.anchor
offset = offset or self.offset
if offset == 0:
raise CDSPositionError("Offset may not be 0. For no offset, use None.")
if not anchor and anchor != 0 and not offset:
raise CDSPositionError("At least one of pos or offset must be defined")
if anchor and anchor < 0:
raise CDSPositionError("CDS anchor may not be negative.")
return True
@property
def pos_type(self):
"Type of CDS position, dynamically determined from values"
# inside CDS
if self.pos or self.pos == 0:
if not self.offset:
return "exon"
return "intron"
# outside CDS
elif self.offset > 0:
return "post-CDS"
return "pre-CDS"
assert False # all integers should return
@property
def sub_dict(self):
if self.pos_type == 'intron':
return {'pos': self.pos, 'offset': self.offset}
if self.pos_type == 'exon':
return {'pos': self.pos}
if self.pos_type == 'post-CDS' or self.pos_type == 'pre-CDS':
return {'offset': self.offset}
fmt_dict = {
'exon': "%d",
'intron': "%d%+d",
'post-CDS': "%+d",
'pre-CDS': "%+d",
}
@staticmethod
def _shift_index(pos_dict, idx):
if 'pos' in pos_dict and idx:
pos_dict['pos'] += idx
return pos_dict
def _make_str(self, val_dict=None, fmt_dict=None):
# set default dicts if parameter dicts are false
fmt_dict = fmt_dict or self.fmt_dict
val_dict = val_dict or self.sub_dict
return fmt_dict[self.pos_type] % tuple(val_dict.values())
@staticmethod
def parse_int(cpos):
"parse int to anchor, offset tuple"
# treat negative int as offset
if cpos < 0:
return (None, cpos)
# treat positive int as anchor
return (cpos, None)
@staticmethod
def parse_str(cpos, pre_fmt, post_fmt):
"parse string to anchor, offset tuple"
delimiters = "\+\-"
if post_fmt and "*" in post_fmt:
delimiters += "\*"
# parenth causes split pattern to be kept
delim_rx = re.compile("([%s])" % delimiters)
parsed = delim_rx.split(cpos, 1)
if len(parsed) == 1: # may be int
return CDSPosition.parse_int(int(cpos))
# 1 split is normally length 2 but delimiter is also kept
elif len(parsed) != 3:
raise CDSPositionError("String '%s' not parseable for this position. Please provide int or CDSPosition object." % cpos)
if parsed[0] == "":
anchor = None
else:
anchor = int(parsed[0])
if post_fmt and parsed[1] == "*" == post_fmt[0]:
parsed[1] = "+"
offset = int("".join(parsed[1:]))
return (anchor, offset)
def to_hgvs(self):
fmt_dict = copy(self.fmt_dict)
fmt_dict['post-CDS'] = "*%d"
sub_dict = self._shift_index(self.sub_dict, 1)
return self._make_str(sub_dict, fmt_dict)
def to_genbank(self):
sub_dict = self._shift_index(self.sub_dict, 1)
return self._make_str(sub_dict)
def to_str(self):
return self._make_str()
def __int__(self):
if self.pos_type == "exon":
return MapPosition.__int__(self)
return NotImplemented
def __eq__(self, other):
if isinstance(other, int):
return MapPosition.__eq__(self, other)
return MapPosition.__eq__(self, other) and self.offset == other.offset
if __name__ == "__main__":
#def make_hgvs(foo):
#return str(foo.to_hgvs())
#MapPosition.__str__ = make_hgvs
def print_pos(pos_obj):
print "object:", pos_obj
print "repr:", repr(pos_obj)
print "HGVS:", pos_obj.to_hgvs()
print
g = GenomePosition.from_hgvs(6)
print_pos(g)
test_g = GenomePosition(5)
#test_c = CDSPosition("6+1")
#test_c = CDSPosition.from_hgvs("6+1")
test_c = CDSPosition.from_hgvs("*1")
#test_c = CDSPosition(6)
#test_c = CDSPosition(-1)
print_pos(test_g)
print test_c.pos_type
print_pos(test_c)
#!/usr/bin/python
from functools import wraps
import unittest
from CoordinateMapper import CoordinateMapper
from MapPositions import GenomePositionError
from MapPositions import ProteinPositionError
from MapPositions import CDSPosition, CDSPositionError
from SeqFeature import FeatureLocation, SeqFeature
exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]
cmap = CoordinateMapper(exons)
g_exons = xrange(7868, 7875) # len 7
c_exons = xrange(270, 279) # len 9
p_exons = xrange(90, 93) # len 3
p_exons_trip = [p for p in p_exons for n in range(3)] # len 9
c_exon_prs = ((270, 272), (273, 275), (276, 278)) # len 3
g_exon_prs = ((7868, 7870), (7871, 7873), (7874, 7876)) # len 3
g_introns = {None: (5860, 5861, 6308, 6309, 6755, 6756),
'HGVS': (5861, 5862, 6309, 6310, 6756, 6757)}
c_introns = {None: ('51+1', '51+2', '51+449', '52-448', '52-2', '52-1'),
'HGVS': ('52+1', '52+2', '52+449', '53-448', '53-2', '53-1')}
c_intron_tups = ((51, 1), (51, 2), (51, 449), (52, -448), (52, -2), (52, -1))
g_outside = {None: (0, 13785, 14000), 'HGVS': (1, 13786, 14001)}
c_outside = {None: ('-5808', '+1', '+216'), 'HGVS': ('-5808', '*1', '*216')}
c_outside_tups = ((None, -5808), (None, 1), (None, 216))
def two_dialects(fn):
@wraps(fn)
def call_tests(self):
orig_dialect = self.dialect
for dialect in (None, 'HGVS'):
self.dialect = dialect
fn(self)
self.dialect = orig_dialect
return call_tests
class TestCDSPosition(unittest.TestCase):
"""Test that CDSPosition works properly"""
def setUp(self):
self.dialect = None
@two_dialects
def testGoodIntron(self):
"""CDSPosition should match good intron values"""
for c_args, c in zip(c_intron_tups, c_introns[self.dialect]):
actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
self.assertEqual(actual, c)
@two_dialects
def testGoodOutside(self):
"""CDSPosition should match good outside-CDS values"""
for c_args, c in zip(c_outside_tups, c_outside[self.dialect]):
actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
self.assertEqual(actual, c)
def testEqual(self):
"""CDSPosition should test equal with same args"""
for args in c_intron_tups:
CPos = CDSPosition.from_anchor
self.assertEqual(CPos(*args), CPos(*args))
self.assertEqual(str(CPos(*args)), str(CPos(*args)))
#def testEqualDialects(self):
#for c_pos in c_outside:
#self.assertEqual(CDSPosition(c_pos), CDSPosition.from_hgvs(c_pos))
#self.assertEqual(c_pos, CDSPosition.from_hgvs(c_pos).to_hgvs())
def testBadAnchor(self):
"""CDSPosition should fail with negative CDS anchor"""
self.assertRaises(CDSPositionError, CDSPosition.from_anchor, -1, 1)
def testBadOffset(self):
"""CDSPosition should fail with zero offset"""
self.assertRaises(CDSPositionError, CDSPosition.from_anchor, None, 0)
class TestCoordinateMapper(unittest.TestCase):
"""Test that CoordinateMapper works properly"""
def testListInit(self):
"""CoordinateMapper should take list of exon pairs"""
CoordinateMapper(exons)
# TODO
def testSeqRecordInit(self):
pass
def testSeqFeatureInit(self):
"""CoordinateMapper should take a CDS SeqFeature"""
sf = SeqFeature(sum((FeatureLocation(*pr) for pr in exons)),
type="CDS")
CoordinateMapper(seqrecord=sf)
# FIXME Maybe RuntimeWarning printed to stderr isn't captured?
#def testNotCDS(self):
#sf = SeqFeature(sum((FeatureLocation(*pr) for pr in exons)))
#self.assertRaises(RuntimeWarning, CoordinateMapper._CDS_from_Seq, sf)
def testEmptyInit(self):
"""CoordinateMapper should fail with no arguments"""
self.assertRaises(Exception, CoordinateMapper)
# TODO HGVS and GenBank coords
# TODO 0 and 1 index should both work
# \*\d+ # XXX only allowed if * is in Config.post_CDS_fmt
# TODO in HGVS, base 0 doesn't exist
class Testg2c(unittest.TestCase):
"""Test success of good input and failure of bad input to g2c"""
def setUp(self):
self.dialect = None
# what it should do
# any integer should return
def testGoodExons(self):
"""g2c should work for exon positions"""
for arg, expected in zip(g_exons, c_exons):
self.assertEqual(cmap.g2c(arg), expected)
@two_dialects
def testGoodIntronsStr(self):
"""g2c should work for intron positions (string)"""
dia = self.dialect
for arg, expect in zip(g_introns[dia], c_introns[dia]):
actual = cmap.g2c(arg, dia).to(dia)
self.assertEqual(actual, expect)
@two_dialects
def testGoodIntrons(self):
"""g2c should work for intron positions (CDSPosition)"""
for arg, tup in zip(g_introns[self.dialect], c_intron_tups):
actual = cmap.g2c(arg, self.dialect)
expect = CDSPosition.from_anchor(*tup)
self.assertEqual(actual, expect)
self.assertEqual(str(actual), str(expect))
@two_dialects
def testGoodOutsideStr(self):
"""g2c should work for outside positions (string)"""
dia = self.dialect
for arg, expected in zip(g_outside[dia], c_outside[dia]):
actual = cmap.g2c(arg, dia).to(dia)
self.assertEqual(actual, expected)
def testGoodOutside(self):
"""g2c should work for outside positions (CDSPosition)"""
for arg, tup in zip(g_outside[self.dialect], c_outside_tups):
actual = cmap.g2c(arg)
expect = CDSPosition.from_anchor(*tup)
self.assertEqual(actual, expect)
self.assertEqual(str(actual), str(expect))
# what it shouldn't do
# should it handle non-exact positions?
def testBadArg(self):
"""g2c should fail on string, float, None, or negative"""
bad = ("string", None, 3.14, )
for arg in bad:
self.assertRaises(Exception, cmap.g2c, arg)
self.assertRaises(GenomePositionError, cmap.g2c, -5)
class Testc2p(unittest.TestCase):
"""Test success of good input and failure of bad input to c2p"""
# what it should do
# integer within length of exon should return correct protein
def testGoodExons(self):
"""c2p should work for exon positions"""
for arg, expect in zip(c_exons, p_exons_trip):
self.assertEqual(cmap.c2p(arg), expect)
# FIXME should CDSPosition return None or raise error?
def testBadNotProtein(self):
"""c2p should fail for CDSPosition or str"""
bad = ("string", CDSPosition.from_anchor(7, -5))
for arg in bad:
self.assertRaises(ValueError, cmap.c2p, arg)
class Testp2c(unittest.TestCase):
"""Test success of good input and failure of bad input to p2c"""
# what it should do
# integer within length of protein should return correct range
def testGoodExons(self):
"""p2c should work for exon positions"""
#for arg, expect in p2c_exons.iteritems():
for arg, expect in zip(p_exons, c_exon_prs):
self.assertEqual(cmap.p2c(arg), expect)
def testBadTooLarge(self):
"""p2c should fail for positions longer than the max length"""
self.assertRaises(ProteinPositionError, cmap.p2c, 500)
def testBadNegative(self):
"""p2c should fail for negative protein coordinate"""
self.assertRaises(ProteinPositionError, cmap.p2c, -50)
class Testc2g(unittest.TestCase):
"""Test success of good input and failure of bad input to c2g"""
def setUp(self):
self.dialect = None
# what it should do
# integer within length of exon should return correct value
def testGoodExons(self):
"""c2g should work for exon positions"""
for arg, expected in zip(c_exons, g_exons):
self.assertEqual(cmap.c2g(arg), expected)
# CDSPosition object should return correct value
@two_dialects
def testGoodIntrons(self):
"""c2g should work for introns (CDSPosition)"""
dia = self.dialect
for c_args, g in zip(c_intron_tups, g_introns[dia]):
actual = cmap.c2g(CDSPosition.from_anchor(*c_args), dia)
self.assertEqual(actual.to(dia), g)
# FIXME doesn't work with HGVS
#@two_dialects
def testGoodOutside(self):
"""c2g should work for outside (CDSPosition)"""
for args, expect in zip(c_outside_tups, g_outside[self.dialect]):
self.assertEqual(cmap.c2g(CDSPosition.from_anchor(*args)),
expect)
# simple string should return correct value
# \d*[+-]\d+
@two_dialects
def testGoodIntronsStr(self):
"""c2g should work for intron positions (string)"""
dia = self.dialect
# c_str from dialect to *dialect* matches *dialect* g_str
for c_str, g in zip(c_introns[dia], g_introns[dia]):
self.assertEqual(cmap.c2g(c_str, dia).to(dia), g)
# c_str from dialect as *default* matches *default* g_str
for c_str, g in zip(c_introns[dia], g_introns[None]):
self.assertEqual(cmap.c2g(c_str, dia), g)
@two_dialects
def testGoodOutsideStr(self):
"""c2g should work for outside positions (string)"""
dia = self.dialect
for expected, arg in zip(g_outside[dia], c_outside[dia]):
self.assertEqual(cmap.c2g(arg, dia).to(dia), expected)
def testNegativeInt(self):
"""c2g should treat negative integer as pre-CDS"""
# XXX negative integers are treated as pre-CDS
self.assertEqual(cmap.c2g(-2), 5806)
# what it shouldn't do
# bad positions in form 123+4 where 123 is not end of an exon
# or where sign is wrong
@two_dialects
def testBadNotEnd(self):
"""c2g should fail on invalid introns"""
bad_introns = {None: ("160+5", # 160 is exonic
"51-6", # 51 is end
"52+10", # 52 is start
), 'HGVS': ("160+5", "52-6", "53+10")}
for arg in bad_introns[self.dialect]:
self.assertRaises(CDSPositionError, cmap.c2g, arg, self.dialect)
# TODO test both dialects
def testBadString(self):
"""c2g should fail on arbitrary string"""
# for non-HGVS, treat * as meaningless
#assert CMConfig.post_CDS_fmt[0] != "*"
bad_vals = ["xxx", "4-5'", "*10"]
for arg in bad_vals:
self.assertRaises(ValueError, cmap.c2g, arg)
# integers outside length of exon should raise IndexError
def testBadTooLarge(self):
"""c2g should fail on values beyond the exon"""
num = 400
assert num > len(cmap.exons)
self.assertRaises(IndexError, cmap.c2g, num)
self.assertRaises(Exception, cmap.c2g, "%d%d" % (num, -12))
class Testp2g(unittest.TestCase):
def testGoodStr(self):
"""p2g should return expected ranges"""
for p_arg, gpos in zip(p_exons, g_exon_prs):
self.assertEqual(cmap.p2g(p_arg), gpos)
class Testg2p(unittest.TestCase):
def testGoodStr(self):
"""g2p should return correct position for exons"""
for g_arg, ppos in zip(g_exons, p_exons_trip):
self.assertEqual(cmap.g2p(g_arg), ppos)
if __name__ == "__main__":
#unittest.main()
suite0 = unittest.TestLoader().loadTestsFromTestCase(TestCDSPosition)
suite1 = unittest.TestLoader().loadTestsFromTestCase(TestCoordinateMapper)
suite2 = unittest.TestLoader().loadTestsFromTestCase(Testc2g)
suite3 = unittest.TestLoader().loadTestsFromTestCase(Testg2c)
suite4 = unittest.TestLoader().loadTestsFromTestCase(Testc2p)
suite5 = unittest.TestLoader().loadTestsFromTestCase(Testp2c)
suite6 = unittest.TestLoader().loadTestsFromTestCase(Testp2g)
suite7 = unittest.TestLoader().loadTestsFromTestCase(Testg2p)
all_suites = [
suite0,
suite1,
suite2,
suite3,
suite4,
suite5,
suite6,
suite7,
]
all = unittest.TestSuite(all_suites)
unittest.TextTestRunner(verbosity=3).run(all)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment