Last active
August 11, 2021 16:14
-
-
Save lennax/3172753 to your computer and use it in GitHub Desktop.
coordinate mapper by Reece Hart: http://lists.open-bio.org/pipermail/biopython/2010-June/006598.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# CoordinateMapper -- map between genomic, cds, and protein coordinates | |
# AUTHOR: Reece Hart <reecehart@gmail.com> | |
# Modifications: Lenna X. Peterson <arklenna@gmail.com> | |
# LICENSE: BioPython | |
# Examples: | |
# AB026906.1:g.7872G>T | |
# AB026906.1:c.274G>T | |
# BA...:p.Asp92Tyr | |
# | |
# All refer to congruent variants. A coordinate mapper is needed to | |
# translate between at least these three coordinate frames. The mapper | |
# should deal with specialized syntax for splicing and UTR (e.g., 88+1, | |
# 89-2, -14, *46). In addition, care should be taken to ensure consistent 0- | |
# or 1-based numbering (0 internally, as with Python/BioPython and | |
# Perl/BioPerl). | |
# | |
# g -----------00000000000-----1111111----------22222222222*222----- | |
# s0 e0 s1 e1 s2 e2 | |
# \ \ | | / / | |
# +--+ +--+ | | +-------+ +-------+ | |
# \ \| |/ / | |
# c 00000000000111111122222222222*222 | |
# c0 c1 c2 | |
# aaabbbcccdddeeefffggghhhiiijj*kkk | |
# p A B C D E F G H I J K | |
# p0 p1 p2 ... pn | |
# | |
# | |
# TODO: | |
# * g2c returns index + extended syntax | |
# * c2g accepts index + extended syntax | |
from functools import wraps | |
from math import floor | |
import warnings | |
import MapPositions # required for pos_factory decorator | |
from MapPositions import CDSPositionError, ProteinPositionError | |
from MapPositions import GenomePosition, CDSPosition, ProteinPosition | |
# FIXME change to absolute import once CompoundLocation is merged to master | |
import SeqFeature | |
from SeqFeature import FeatureLocation | |
class CoordinateMapper(object): | |
def __init__(self, selist=None, seqrecord=None): | |
# Note: seqrecord parameter will take SeqRecord or SeqFeature | |
if seqrecord is not None: | |
self._exons = self._CDS_from_Seq(seqrecord) | |
else: | |
self._exons = self._exons_from_list(selist) | |
@staticmethod | |
def _CDS_from_Seq(seq): | |
if hasattr(seq, 'features'): | |
# generator | |
cdsf = (f for f in seq.features if f.type == 'CDS').next() | |
return cdsf.location | |
if hasattr(seq, 'location'): | |
if seq.type != 'CDS': | |
# XXX should this be a fatal error? | |
warnings.warn("Provided SeqFeature should be CDS", | |
RuntimeWarning) | |
return seq.location | |
@staticmethod | |
def _exons_from_list(selist): | |
return sum([FeatureLocation(*pair) for pair in selist]) | |
@property # read-only | |
def exons(self): | |
return self._exons | |
@property # read-only | |
def exon_list(self): | |
return list(self.exons) | |
def pos_factory(pos_type): | |
""" | |
Convert string or int pos to appropriate Position object | |
Argument to decorator: position type (Genome, CDS, Protein) | |
""" | |
def wrapper(fn): | |
@wraps(fn) | |
def make_pos(self, pos, dialect=None): | |
# retrieve Position object | |
_obj = getattr(MapPositions, pos_type + "Position") | |
# if pos is not already Position object, make it one | |
if not isinstance(pos, _obj): | |
# no dialect: use default constructor | |
if dialect is None: | |
pos = _obj(pos) | |
# use dialect alternate constructor | |
else: | |
_init = getattr(_obj, "from_%s" % dialect.lower()) | |
pos = _init(pos) | |
# call function with new pos | |
return fn(self, pos, dialect) | |
return make_pos | |
return wrapper | |
@pos_factory("Genome") | |
def g2c(self, gpos, dialect=None): | |
"Convert integer from genomic to CDS coordinates" | |
gpos = int(gpos) | |
fmts = CDSPosition.fmt_dict | |
def _simple_g2c(g): | |
return CDSPosition(self.exon_list.index(g)) | |
# within exon | |
if gpos in self.exons: | |
return _simple_g2c(gpos) | |
# before CDS | |
if gpos < self.exons.start: | |
return CDSPosition(fmts['post-CDS'] % (gpos - self.exons.start)) | |
# after CDS | |
if gpos >= self.exons.end: | |
return CDSPosition(fmts['pre-CDS'] % (gpos - self.exons.end + 1)) | |
# intron | |
# set start of first intron | |
prev_end = self.exons.parts[0].end | |
for part in self.exons.parts[1:]: | |
# not in this intron | |
if gpos > part.start: | |
prev_end = part.end | |
continue | |
len_intron = part.start - prev_end | |
# second half (exclusive) of intron | |
# offset > middle of intron | |
if gpos - prev_end > floor(len_intron / 2.0): | |
anchor = _simple_g2c(part.start) | |
offset = gpos - part.start | |
assert offset < 0 | |
# first half (inclusive) of intron | |
else: | |
anchor = _simple_g2c(prev_end - 1) | |
offset = gpos - prev_end + 1 | |
assert offset > 0 | |
assert self.check_intron(anchor, offset) | |
return CDSPosition(fmts['intron'] % (anchor, offset)) | |
assert False # function should return for every integer | |
# TODO verify that values of offset are sane | |
def check_intron(self, anchor, offset): | |
""" | |
Verify that CDS-relative intron position is valid with given exons. | |
""" | |
for exon in self.exons.parts: | |
start = int(self.g2c(exon.start)) | |
if anchor == start: | |
if offset > 0: | |
raise CDSPositionError("Invalid intron: offset from exon start must be negative.") | |
return True | |
end = int(self.g2c(exon.end - 1)) | |
if anchor == end: | |
if offset < 0: | |
raise CDSPositionError("Invalid intron: offset from exon end must be positive.") | |
return True | |
raise CDSPositionError("Invalid intron: anchor must be start or end of an exon.") | |
@pos_factory("CDS") | |
def c2g(self, cpos, dialect=None): | |
"Convert from CDS to genomic coordinates" | |
if cpos.pos_type == "pre-CDS": | |
return GenomePosition(self.exons.start + cpos.offset) | |
elif cpos.pos_type == "post-CDS": | |
return GenomePosition(self.exons.end - 1 + cpos.offset) | |
g_anchor = self.exon_list[cpos.anchor] | |
if cpos.pos_type == "exon": | |
return GenomePosition(g_anchor) | |
elif cpos.pos_type == "intron": | |
offset = cpos.offset | |
if self.check_intron(cpos.anchor, offset): | |
return GenomePosition(g_anchor + offset) | |
assert False # all positions should be one of the 4 types | |
@pos_factory("CDS") | |
def c2p(self, cpos, dialect=None): | |
"""Convert from CDS to protein coordinates""" | |
try: | |
cpos = int(cpos) | |
except TypeError: | |
raise CDSPositionError("'%s' does not correspond to a protein" | |
% repr(cpos)) | |
return ProteinPosition(int(cpos / 3.0)) | |
@pos_factory("Genome") | |
def g2p(self, gpos, dialect=None): | |
"Convert integer from genomic to protein coordinates" | |
return self.c2p(self.g2c(gpos)) | |
@pos_factory("Protein") | |
def p2c(self, ppos, dialect=None): | |
"Convert integer from protein coordinate to CDS closed range" | |
try: | |
ppos = int(ppos) | |
except TypeError: | |
return None | |
if ppos < 0: | |
raise ProteinPositionError("'%s' should not be negative") | |
# FIXME is CDS guaranteed to have len % 3 == 0? | |
first_base = (ppos) * 3 | |
last_base = first_base + 2 | |
if last_base > len(self.exons): | |
raise ProteinPositionError("'%s' is too large") | |
return (CDSPosition(first_base), CDSPosition(last_base)) | |
@pos_factory("Protein") | |
def p2g(self, ppos, dialect=None): | |
"Convert integer from protein to genomic coordinates" | |
#first, last = self.p2c(ppos) | |
#return (self.c2g(first), self.c2g(last)) | |
return tuple(self.c2g(x) for x in self.p2c(ppos)) | |
if __name__ == '__main__': | |
# The following exons are from AB026906.1. | |
# test case: g.7872 -> c.274 -> p.92 | |
# N.B. These are python counting coordinates (0-based) | |
exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)] | |
def test_list(g_range): | |
cm = CoordinateMapper(exons) | |
for g1 in g_range: | |
print g1, | |
c1 = cm.g2c(g1) | |
print c1, | |
p1 = cm.c2p(c1) | |
print p1, | |
if p1: | |
c2 = cm.p2c(p1)[0] | |
else: | |
c2 = c1 | |
print ' | ', c2, | |
g2 = cm.c2g(c2) | |
print g2 | |
print cm.g2p(7872) | |
print cm.p2g(92) | |
def test_simple(): | |
simple_exons = SeqFeature.SeqFeature(sum([FeatureLocation(2, 4), FeatureLocation(8, 11), FeatureLocation(16, 18)]), type="CDS") | |
cm = CoordinateMapper(seqrecord=simple_exons) | |
print cm.exons | |
print list(cm.exons) | |
print range(len(cm.exons)) | |
for i in range(len(cm.exons)): | |
print "%3s" % cm.c2g(i), | |
for i in xrange(20): | |
print "%3d" % i, | |
for i in xrange(20): | |
print "%3s" % cm.g2c(i), | |
r1 = (7870, 7871, 7872, 7873, 7874) | |
r2 = (5807, 5808, 5809, | |
6871, 6872, 6873, 6874, 6875, | |
7766, 7767, 7768, 7769, | |
13784, 13785, 13786) | |
test_list(r1) | |
#test_list(r2) | |
test_simple() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from copy import copy | |
import re | |
class InvalidPositionError(ValueError): | |
"Exception for bad coordinates" | |
class GenomePositionError(InvalidPositionError): | |
"Exception for bad genome coordinates" | |
class CDSPositionError(InvalidPositionError): | |
"Exception for bad CDS coordinates" | |
class ProteinPositionError(InvalidPositionError): | |
"Exception for bad protein coordinates" | |
class MapPosition(object): | |
def __init__(self, pos, index=None, **kwargs): | |
self.index = index | |
if pos and self.index: | |
pos -= index | |
self.pos = pos | |
# TODO classmethod from, like to | |
@classmethod | |
def from_hgvs(cls, hgvs_pos, strand=None): | |
return cls(hgvs_pos, index=1, strand=strand, post_fmt="*%d") | |
@classmethod | |
def from_genbank(cls, gbk_pos, strand=None): | |
return cls(gbk_pos, index=1, strand=strand) | |
def to(self, dialect): | |
if dialect is None: | |
return self.to_str() | |
try: | |
to_func = getattr(self, "to_" + dialect.lower()) | |
except AttributeError: | |
raise ValueError("Dialect '%s' not valid" % dialect) | |
return to_func() | |
def to_hgvs(self): | |
if self.pos or self.pos == 0: | |
return self.pos + 1 | |
return None | |
def to_genbank(self): | |
if self.pos or self.pos == 0: | |
return self.pos + 1 | |
return None | |
def to_str(self): | |
return self.pos | |
def __str__(self): | |
return str(self.to_str()) | |
def __int__(self): | |
return self.pos | |
def __repr__(self): | |
return "%s(%s)" % (self.__class__.__name__, self.to_str()) | |
def __eq__(self, other): | |
if isinstance(other, int): | |
return self.pos == other | |
return self.pos == other.pos | |
class GenomePosition(MapPosition): | |
def __init__(self, gpos, index=None, strand=None, **kwargs): | |
# FIXME if index is string, error may be raised | |
if gpos < (index or 0): | |
raise GenomePositionError("Genome position cannot be negative.") | |
# call superclass constructor | |
MapPosition.__init__(self, gpos, index) | |
self.strand = strand | |
class ProteinPosition(MapPosition): | |
def __init__(self, ppos, index=None, **kwargs): | |
# call superclass constructor | |
MapPosition.__init__(self, ppos, index) | |
class CDSPosition(MapPosition): | |
def __init__(self, cpos, index=None, strand=None, | |
pre_fmt=None, post_fmt=None): | |
# Dispatch types and return anchor, offset | |
if isinstance(cpos, int): | |
anchor, offset = self.parse_int(cpos) | |
elif isinstance(cpos, str): | |
anchor, offset = self.parse_str(cpos, pre_fmt, post_fmt) | |
else: | |
raise CDSPositionError("'%s' is of unknown type" % cpos) | |
# Set instance anchor and offset | |
# call superclass constructor | |
MapPosition.__init__(self, anchor, index) | |
self._offset = offset | |
self.validate() | |
@classmethod | |
def from_anchor(cls, anchor=None, offset=None): | |
if not anchor: | |
pos = cls("%+d" % offset) | |
elif anchor < 0: | |
raise CDSPositionError("Anchor cannot be negative.") | |
else: | |
pos = cls(anchor) | |
pos.offset = offset | |
return pos | |
@property | |
def offset(self): | |
return self._offset | |
@offset.setter | |
def offset(self, val): | |
"Validate new offset, then update" | |
self.validate(offset=val) | |
self._offset = val | |
@property | |
def anchor(self): | |
return self.pos | |
@anchor.setter | |
def anchor(self, val): | |
"Validate new anchor, then update pos" | |
self.validate(anchor=val) | |
self.pos = val | |
def validate(self, anchor=None, offset=None): | |
"Check whether anchor and offset yield a valid position" | |
anchor = anchor or self.anchor | |
offset = offset or self.offset | |
if offset == 0: | |
raise CDSPositionError("Offset may not be 0. For no offset, use None.") | |
if not anchor and anchor != 0 and not offset: | |
raise CDSPositionError("At least one of pos or offset must be defined") | |
if anchor and anchor < 0: | |
raise CDSPositionError("CDS anchor may not be negative.") | |
return True | |
@property | |
def pos_type(self): | |
"Type of CDS position, dynamically determined from values" | |
# inside CDS | |
if self.pos or self.pos == 0: | |
if not self.offset: | |
return "exon" | |
return "intron" | |
# outside CDS | |
elif self.offset > 0: | |
return "post-CDS" | |
return "pre-CDS" | |
assert False # all integers should return | |
@property | |
def sub_dict(self): | |
if self.pos_type == 'intron': | |
return {'pos': self.pos, 'offset': self.offset} | |
if self.pos_type == 'exon': | |
return {'pos': self.pos} | |
if self.pos_type == 'post-CDS' or self.pos_type == 'pre-CDS': | |
return {'offset': self.offset} | |
fmt_dict = { | |
'exon': "%d", | |
'intron': "%d%+d", | |
'post-CDS': "%+d", | |
'pre-CDS': "%+d", | |
} | |
@staticmethod | |
def _shift_index(pos_dict, idx): | |
if 'pos' in pos_dict and idx: | |
pos_dict['pos'] += idx | |
return pos_dict | |
def _make_str(self, val_dict=None, fmt_dict=None): | |
# set default dicts if parameter dicts are false | |
fmt_dict = fmt_dict or self.fmt_dict | |
val_dict = val_dict or self.sub_dict | |
return fmt_dict[self.pos_type] % tuple(val_dict.values()) | |
@staticmethod | |
def parse_int(cpos): | |
"parse int to anchor, offset tuple" | |
# treat negative int as offset | |
if cpos < 0: | |
return (None, cpos) | |
# treat positive int as anchor | |
return (cpos, None) | |
@staticmethod | |
def parse_str(cpos, pre_fmt, post_fmt): | |
"parse string to anchor, offset tuple" | |
delimiters = "\+\-" | |
if post_fmt and "*" in post_fmt: | |
delimiters += "\*" | |
# parenth causes split pattern to be kept | |
delim_rx = re.compile("([%s])" % delimiters) | |
parsed = delim_rx.split(cpos, 1) | |
if len(parsed) == 1: # may be int | |
return CDSPosition.parse_int(int(cpos)) | |
# 1 split is normally length 2 but delimiter is also kept | |
elif len(parsed) != 3: | |
raise CDSPositionError("String '%s' not parseable for this position. Please provide int or CDSPosition object." % cpos) | |
if parsed[0] == "": | |
anchor = None | |
else: | |
anchor = int(parsed[0]) | |
if post_fmt and parsed[1] == "*" == post_fmt[0]: | |
parsed[1] = "+" | |
offset = int("".join(parsed[1:])) | |
return (anchor, offset) | |
def to_hgvs(self): | |
fmt_dict = copy(self.fmt_dict) | |
fmt_dict['post-CDS'] = "*%d" | |
sub_dict = self._shift_index(self.sub_dict, 1) | |
return self._make_str(sub_dict, fmt_dict) | |
def to_genbank(self): | |
sub_dict = self._shift_index(self.sub_dict, 1) | |
return self._make_str(sub_dict) | |
def to_str(self): | |
return self._make_str() | |
def __int__(self): | |
if self.pos_type == "exon": | |
return MapPosition.__int__(self) | |
return NotImplemented | |
def __eq__(self, other): | |
if isinstance(other, int): | |
return MapPosition.__eq__(self, other) | |
return MapPosition.__eq__(self, other) and self.offset == other.offset | |
if __name__ == "__main__": | |
#def make_hgvs(foo): | |
#return str(foo.to_hgvs()) | |
#MapPosition.__str__ = make_hgvs | |
def print_pos(pos_obj): | |
print "object:", pos_obj | |
print "repr:", repr(pos_obj) | |
print "HGVS:", pos_obj.to_hgvs() | |
g = GenomePosition.from_hgvs(6) | |
print_pos(g) | |
test_g = GenomePosition(5) | |
#test_c = CDSPosition("6+1") | |
#test_c = CDSPosition.from_hgvs("6+1") | |
test_c = CDSPosition.from_hgvs("*1") | |
#test_c = CDSPosition(6) | |
#test_c = CDSPosition(-1) | |
print_pos(test_g) | |
print test_c.pos_type | |
print_pos(test_c) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from functools import wraps | |
import unittest | |
from CoordinateMapper import CoordinateMapper | |
from MapPositions import GenomePositionError | |
from MapPositions import ProteinPositionError | |
from MapPositions import CDSPosition, CDSPositionError | |
from SeqFeature import FeatureLocation, SeqFeature | |
exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)] | |
cmap = CoordinateMapper(exons) | |
g_exons = xrange(7868, 7875) # len 7 | |
c_exons = xrange(270, 279) # len 9 | |
p_exons = xrange(90, 93) # len 3 | |
p_exons_trip = [p for p in p_exons for n in range(3)] # len 9 | |
c_exon_prs = ((270, 272), (273, 275), (276, 278)) # len 3 | |
g_exon_prs = ((7868, 7870), (7871, 7873), (7874, 7876)) # len 3 | |
g_introns = {None: (5860, 5861, 6308, 6309, 6755, 6756), | |
'HGVS': (5861, 5862, 6309, 6310, 6756, 6757)} | |
c_introns = {None: ('51+1', '51+2', '51+449', '52-448', '52-2', '52-1'), | |
'HGVS': ('52+1', '52+2', '52+449', '53-448', '53-2', '53-1')} | |
c_intron_tups = ((51, 1), (51, 2), (51, 449), (52, -448), (52, -2), (52, -1)) | |
g_outside = {None: (0, 13785, 14000), 'HGVS': (1, 13786, 14001)} | |
c_outside = {None: ('-5808', '+1', '+216'), 'HGVS': ('-5808', '*1', '*216')} | |
c_outside_tups = ((None, -5808), (None, 1), (None, 216)) | |
def two_dialects(fn): | |
@wraps(fn) | |
def call_tests(self): | |
orig_dialect = self.dialect | |
for dialect in (None, 'HGVS'): | |
self.dialect = dialect | |
fn(self) | |
self.dialect = orig_dialect | |
return call_tests | |
class TestCDSPosition(unittest.TestCase): | |
"""Test that CDSPosition works properly""" | |
def setUp(self): | |
self.dialect = None | |
@two_dialects | |
def testGoodIntron(self): | |
"""CDSPosition should match good intron values""" | |
for c_args, c in zip(c_intron_tups, c_introns[self.dialect]): | |
actual = CDSPosition.from_anchor(*c_args).to(self.dialect) | |
self.assertEqual(actual, c) | |
@two_dialects | |
def testGoodOutside(self): | |
"""CDSPosition should match good outside-CDS values""" | |
for c_args, c in zip(c_outside_tups, c_outside[self.dialect]): | |
actual = CDSPosition.from_anchor(*c_args).to(self.dialect) | |
self.assertEqual(actual, c) | |
def testEqual(self): | |
"""CDSPosition should test equal with same args""" | |
for args in c_intron_tups: | |
CPos = CDSPosition.from_anchor | |
self.assertEqual(CPos(*args), CPos(*args)) | |
self.assertEqual(str(CPos(*args)), str(CPos(*args))) | |
#def testEqualDialects(self): | |
#for c_pos in c_outside: | |
#self.assertEqual(CDSPosition(c_pos), CDSPosition.from_hgvs(c_pos)) | |
#self.assertEqual(c_pos, CDSPosition.from_hgvs(c_pos).to_hgvs()) | |
def testBadAnchor(self): | |
"""CDSPosition should fail with negative CDS anchor""" | |
self.assertRaises(CDSPositionError, CDSPosition.from_anchor, -1, 1) | |
def testBadOffset(self): | |
"""CDSPosition should fail with zero offset""" | |
self.assertRaises(CDSPositionError, CDSPosition.from_anchor, None, 0) | |
class TestCoordinateMapper(unittest.TestCase): | |
"""Test that CoordinateMapper works properly""" | |
def testListInit(self): | |
"""CoordinateMapper should take list of exon pairs""" | |
CoordinateMapper(exons) | |
# TODO | |
def testSeqRecordInit(self): | |
pass | |
def testSeqFeatureInit(self): | |
"""CoordinateMapper should take a CDS SeqFeature""" | |
sf = SeqFeature(sum((FeatureLocation(*pr) for pr in exons)), | |
type="CDS") | |
CoordinateMapper(seqrecord=sf) | |
# FIXME Maybe RuntimeWarning printed to stderr isn't captured? | |
#def testNotCDS(self): | |
#sf = SeqFeature(sum((FeatureLocation(*pr) for pr in exons))) | |
#self.assertRaises(RuntimeWarning, CoordinateMapper._CDS_from_Seq, sf) | |
def testEmptyInit(self): | |
"""CoordinateMapper should fail with no arguments""" | |
self.assertRaises(Exception, CoordinateMapper) | |
# TODO HGVS and GenBank coords | |
# TODO 0 and 1 index should both work | |
# \*\d+ # XXX only allowed if * is in Config.post_CDS_fmt | |
# TODO in HGVS, base 0 doesn't exist | |
class Testg2c(unittest.TestCase): | |
"""Test success of good input and failure of bad input to g2c""" | |
def setUp(self): | |
self.dialect = None | |
# what it should do | |
# any integer should return | |
def testGoodExons(self): | |
"""g2c should work for exon positions""" | |
for arg, expected in zip(g_exons, c_exons): | |
self.assertEqual(cmap.g2c(arg), expected) | |
@two_dialects | |
def testGoodIntronsStr(self): | |
"""g2c should work for intron positions (string)""" | |
dia = self.dialect | |
for arg, expect in zip(g_introns[dia], c_introns[dia]): | |
actual = cmap.g2c(arg, dia).to(dia) | |
self.assertEqual(actual, expect) | |
@two_dialects | |
def testGoodIntrons(self): | |
"""g2c should work for intron positions (CDSPosition)""" | |
for arg, tup in zip(g_introns[self.dialect], c_intron_tups): | |
actual = cmap.g2c(arg, self.dialect) | |
expect = CDSPosition.from_anchor(*tup) | |
self.assertEqual(actual, expect) | |
self.assertEqual(str(actual), str(expect)) | |
@two_dialects | |
def testGoodOutsideStr(self): | |
"""g2c should work for outside positions (string)""" | |
dia = self.dialect | |
for arg, expected in zip(g_outside[dia], c_outside[dia]): | |
actual = cmap.g2c(arg, dia).to(dia) | |
self.assertEqual(actual, expected) | |
def testGoodOutside(self): | |
"""g2c should work for outside positions (CDSPosition)""" | |
for arg, tup in zip(g_outside[self.dialect], c_outside_tups): | |
actual = cmap.g2c(arg) | |
expect = CDSPosition.from_anchor(*tup) | |
self.assertEqual(actual, expect) | |
self.assertEqual(str(actual), str(expect)) | |
# what it shouldn't do | |
# should it handle non-exact positions? | |
def testBadArg(self): | |
"""g2c should fail on string, float, None, or negative""" | |
bad = ("string", None, 3.14, ) | |
for arg in bad: | |
self.assertRaises(Exception, cmap.g2c, arg) | |
self.assertRaises(GenomePositionError, cmap.g2c, -5) | |
class Testc2p(unittest.TestCase): | |
"""Test success of good input and failure of bad input to c2p""" | |
# what it should do | |
# integer within length of exon should return correct protein | |
def testGoodExons(self): | |
"""c2p should work for exon positions""" | |
for arg, expect in zip(c_exons, p_exons_trip): | |
self.assertEqual(cmap.c2p(arg), expect) | |
# FIXME should CDSPosition return None or raise error? | |
def testBadNotProtein(self): | |
"""c2p should fail for CDSPosition or str""" | |
bad = ("string", CDSPosition.from_anchor(7, -5)) | |
for arg in bad: | |
self.assertRaises(ValueError, cmap.c2p, arg) | |
class Testp2c(unittest.TestCase): | |
"""Test success of good input and failure of bad input to p2c""" | |
# what it should do | |
# integer within length of protein should return correct range | |
def testGoodExons(self): | |
"""p2c should work for exon positions""" | |
#for arg, expect in p2c_exons.iteritems(): | |
for arg, expect in zip(p_exons, c_exon_prs): | |
self.assertEqual(cmap.p2c(arg), expect) | |
def testBadTooLarge(self): | |
"""p2c should fail for positions longer than the max length""" | |
self.assertRaises(ProteinPositionError, cmap.p2c, 500) | |
def testBadNegative(self): | |
"""p2c should fail for negative protein coordinate""" | |
self.assertRaises(ProteinPositionError, cmap.p2c, -50) | |
class Testc2g(unittest.TestCase): | |
"""Test success of good input and failure of bad input to c2g""" | |
def setUp(self): | |
self.dialect = None | |
# what it should do | |
# integer within length of exon should return correct value | |
def testGoodExons(self): | |
"""c2g should work for exon positions""" | |
for arg, expected in zip(c_exons, g_exons): | |
self.assertEqual(cmap.c2g(arg), expected) | |
# CDSPosition object should return correct value | |
@two_dialects | |
def testGoodIntrons(self): | |
"""c2g should work for introns (CDSPosition)""" | |
dia = self.dialect | |
for c_args, g in zip(c_intron_tups, g_introns[dia]): | |
actual = cmap.c2g(CDSPosition.from_anchor(*c_args), dia) | |
self.assertEqual(actual.to(dia), g) | |
# FIXME doesn't work with HGVS | |
#@two_dialects | |
def testGoodOutside(self): | |
"""c2g should work for outside (CDSPosition)""" | |
for args, expect in zip(c_outside_tups, g_outside[self.dialect]): | |
self.assertEqual(cmap.c2g(CDSPosition.from_anchor(*args)), | |
expect) | |
# simple string should return correct value | |
# \d*[+-]\d+ | |
@two_dialects | |
def testGoodIntronsStr(self): | |
"""c2g should work for intron positions (string)""" | |
dia = self.dialect | |
# c_str from dialect to *dialect* matches *dialect* g_str | |
for c_str, g in zip(c_introns[dia], g_introns[dia]): | |
self.assertEqual(cmap.c2g(c_str, dia).to(dia), g) | |
# c_str from dialect as *default* matches *default* g_str | |
for c_str, g in zip(c_introns[dia], g_introns[None]): | |
self.assertEqual(cmap.c2g(c_str, dia), g) | |
@two_dialects | |
def testGoodOutsideStr(self): | |
"""c2g should work for outside positions (string)""" | |
dia = self.dialect | |
for expected, arg in zip(g_outside[dia], c_outside[dia]): | |
self.assertEqual(cmap.c2g(arg, dia).to(dia), expected) | |
def testNegativeInt(self): | |
"""c2g should treat negative integer as pre-CDS""" | |
# XXX negative integers are treated as pre-CDS | |
self.assertEqual(cmap.c2g(-2), 5806) | |
# what it shouldn't do | |
# bad positions in form 123+4 where 123 is not end of an exon | |
# or where sign is wrong | |
@two_dialects | |
def testBadNotEnd(self): | |
"""c2g should fail on invalid introns""" | |
bad_introns = {None: ("160+5", # 160 is exonic | |
"51-6", # 51 is end | |
"52+10", # 52 is start | |
), 'HGVS': ("160+5", "52-6", "53+10")} | |
for arg in bad_introns[self.dialect]: | |
self.assertRaises(CDSPositionError, cmap.c2g, arg, self.dialect) | |
# TODO test both dialects | |
def testBadString(self): | |
"""c2g should fail on arbitrary string""" | |
# for non-HGVS, treat * as meaningless | |
#assert CMConfig.post_CDS_fmt[0] != "*" | |
bad_vals = ["xxx", "4-5'", "*10"] | |
for arg in bad_vals: | |
self.assertRaises(ValueError, cmap.c2g, arg) | |
# integers outside length of exon should raise IndexError | |
def testBadTooLarge(self): | |
"""c2g should fail on values beyond the exon""" | |
num = 400 | |
assert num > len(cmap.exons) | |
self.assertRaises(IndexError, cmap.c2g, num) | |
self.assertRaises(Exception, cmap.c2g, "%d%d" % (num, -12)) | |
class Testp2g(unittest.TestCase): | |
def testGoodStr(self): | |
"""p2g should return expected ranges""" | |
for p_arg, gpos in zip(p_exons, g_exon_prs): | |
self.assertEqual(cmap.p2g(p_arg), gpos) | |
class Testg2p(unittest.TestCase): | |
def testGoodStr(self): | |
"""g2p should return correct position for exons""" | |
for g_arg, ppos in zip(g_exons, p_exons_trip): | |
self.assertEqual(cmap.g2p(g_arg), ppos) | |
if __name__ == "__main__": | |
#unittest.main() | |
suite0 = unittest.TestLoader().loadTestsFromTestCase(TestCDSPosition) | |
suite1 = unittest.TestLoader().loadTestsFromTestCase(TestCoordinateMapper) | |
suite2 = unittest.TestLoader().loadTestsFromTestCase(Testc2g) | |
suite3 = unittest.TestLoader().loadTestsFromTestCase(Testg2c) | |
suite4 = unittest.TestLoader().loadTestsFromTestCase(Testc2p) | |
suite5 = unittest.TestLoader().loadTestsFromTestCase(Testp2c) | |
suite6 = unittest.TestLoader().loadTestsFromTestCase(Testp2g) | |
suite7 = unittest.TestLoader().loadTestsFromTestCase(Testg2p) | |
all_suites = [ | |
suite0, | |
suite1, | |
suite2, | |
suite3, | |
suite4, | |
suite5, | |
suite6, | |
suite7, | |
] | |
all = unittest.TestSuite(all_suites) | |
unittest.TextTestRunner(verbosity=3).run(all) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment