Skip to content

Instantly share code, notes, and snippets.

@lennax
Last active August 11, 2021 16:14
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lennax/3172753 to your computer and use it in GitHub Desktop.
Save lennax/3172753 to your computer and use it in GitHub Desktop.
# CoordinateMapper -- map between genomic, cds, and protein coordinates
# AUTHOR: Reece Hart <reecehart@gmail.com>
# Modifications: Lenna X. Peterson <arklenna@gmail.com>
# LICENSE: BioPython
# Examples:
# AB026906.1:g.7872G>T
# AB026906.1:c.274G>T
# BA...:p.Asp92Tyr
#
# All refer to congruent variants. A coordinate mapper is needed to
# translate between at least these three coordinate frames. The mapper
# should deal with specialized syntax for splicing and UTR (e.g., 88+1,
# 89-2, -14, *46). In addition, care should be taken to ensure consistent 0-
# or 1-based numbering (0 internally, as with Python/BioPython and
# Perl/BioPerl).
#
# g -----------00000000000-----1111111----------22222222222*222-----
# s0 e0 s1 e1 s2 e2
# \ \ | | / /
# +--+ +--+ | | +-------+ +-------+
# \ \| |/ /
# c 00000000000111111122222222222*222
# c0 c1 c2
# aaabbbcccdddeeefffggghhhiiijj*kkk
# p A B C D E F G H I J K
# p0 p1 p2 ... pn
#
#
# TODO:
# * g2c returns index + extended syntax
# * c2g accepts index + extended syntax
from functools import wraps
from math import floor
import warnings
import MapPositions # required for pos_factory decorator
from MapPositions import CDSPositionError, ProteinPositionError
from MapPositions import GenomePosition, CDSPosition, ProteinPosition
from Bio import BiopythonParserWarning
# FIXME change to absolute import once CompoundLocation is merged to master
from SeqFeature import FeatureLocation
class CoordinateMapper(object):
"""Convert positions between genomic, CDS, and protein coordinates."""
def __init__(self, selist):
"""Set exons to be used for mapping.
Parameters
----------
selist : SeqRecord, SeqFeature, list
Object containing exon information
"""
self._exons = self._get_exons(selist)
@staticmethod
def _get_exons(seq):
"""Extract exons from SeqRecord, SeqFeature, or list of pairs.
Parameters
----------
seq : SeqRecord, SeqFeature, list
Object containing exon information.
Returns
-------
SeqFeature.FeatureLocation
"""
# Try as SeqRecord
if hasattr(seq, 'features'):
# generator
cdsf = (f for f in seq.features if f.type == 'CDS').next()
return cdsf.location
# Try as SeqFeature
elif hasattr(seq, 'location'):
if seq.type != 'CDS':
# FIXME should this be a fatal error?
warnings.warn("Provided SeqFeature should be CDS",
BiopythonParserWarning)
return seq.location
# Try as list of pairs
return sum([FeatureLocation(s, e) for s, e in seq])
@property # read-only
def exons(self):
return self._exons
@property # read-only
def exon_list(self):
return list(self.exons)
def pos_factory(pos_type):
"""
Convert string or int pos to appropriate Position object
Parameters
----------
pos_type : str
Position type (Genome, CDS, Protein)
"""
def wrapper(fn):
@wraps(fn)
def make_pos(self, pos, dialect=None):
# retrieve Position object
_obj = getattr(MapPositions, pos_type + "Position")
# if pos is not already Position object, make it one
if not isinstance(pos, _obj):
# no dialect: use default constructor
if dialect is None:
pos = _obj(pos)
# use dialect alternate constructor
else:
pos = _obj.from_dialect(dialect, pos)
# call function with new pos
return fn(self, pos, dialect)
return make_pos
return wrapper
@pos_factory("Genome")
def g2c(self, gpos, dialect=None):
"""Convert integer from genomic to CDS coordinates
Parameters
----------
gpos : int
Genomic position
dialect : str
Coordinate dialect (GenBank or HGVS, default None)
Returns
-------
CDSPosition
"""
gpos = int(gpos)
fmts = CDSPosition.fmt_dict
def _simple_g2c(g):
return CDSPosition(self.exon_list.index(g))
# within exon
if gpos in self.exons:
return _simple_g2c(gpos)
# before CDS
if gpos < self.exons.start:
return CDSPosition(fmts['post-CDS'] % (gpos - self.exons.start))
# after CDS
if gpos >= self.exons.end:
return CDSPosition(fmts['pre-CDS'] % (gpos - self.exons.end + 1))
# intron
# set start of first intron
prev_end = self.exons.parts[0].end
for part in self.exons.parts[1:]:
# not in this intron
if gpos > part.start:
prev_end = part.end
continue
len_intron = part.start - prev_end
# second half (exclusive) of intron
# offset > middle of intron
if gpos - prev_end > floor(len_intron / 2.0):
anchor = _simple_g2c(part.start)
offset = gpos - part.start
assert offset < 0
# first half (inclusive) of intron
else:
anchor = _simple_g2c(prev_end - 1)
offset = gpos - prev_end + 1
assert offset > 0
assert self.check_intron(anchor, offset)
return CDSPosition(fmts['intron'] % (anchor, offset))
assert False # function should return for every integer
# TODO verify that values of offset are sane
def check_intron(self, anchor, offset):
"""
Verify that CDS-relative intron position is valid with given exons.
Parameters
----------
anchor : int
Intron anchor (closest CDS position)
offset : int
Intron offset (distance to anchor)
Returns
-------
bool
"""
for exon in self.exons.parts:
start = int(self.g2c(exon.start))
if anchor == start:
if offset > 0:
raise CDSPositionError(
"Invalid intron: offset from exon start must be negative.")
return True
end = int(self.g2c(exon.end - 1))
if anchor == end:
if offset < 0:
raise CDSPositionError(
"Invalid intron: offset from exon end must be positive.")
return True
raise CDSPositionError(
"Invalid intron: anchor must be start or end of an exon.")
def get_strand(self, gpos):
for exon in self.exons.parts:
if gpos in exon:
return exon.strand
raise ValueError("Provided gpos must be exon")
@pos_factory("CDS")
def c2g(self, cpos, dialect=None):
"""Convert from CDS to genomic coordinates
Parameters
----------
cpos : int
CDS position
dialect : str
Coordinate dialect (GenBank or HGVS, default None)
Returns
-------
GenomePosition
"""
if cpos.pos_type == "pre-CDS":
return GenomePosition(self.exons.start + cpos.offset)
elif cpos.pos_type == "post-CDS":
return GenomePosition(self.exons.end - 1 + cpos.offset)
g_anchor = self.exon_list[cpos.anchor]
if cpos.pos_type == "exon":
strand = self.get_strand(g_anchor)
return GenomePosition(g_anchor, strand=strand)
elif cpos.pos_type == "intron":
offset = cpos.offset
if self.check_intron(cpos.anchor, offset):
return GenomePosition(g_anchor + offset)
assert False # all positions should be one of the 4 types
@pos_factory("CDS")
def c2p(self, cpos, dialect=None):
"""Convert from CDS to protein coordinates
Parameters
----------
cpos : int
CDS position
dialect : str
Coordinate dialect (GenBank or HGVS, default None)
Returns
-------
ProteinPosition
"""
try:
cpos = int(cpos)
except TypeError:
raise CDSPositionError("'%s' does not correspond to a protein"
% repr(cpos))
return ProteinPosition(int(cpos / 3.0))
@pos_factory("Genome")
def g2p(self, gpos, dialect=None):
"""Convert integer from genomic to protein coordinates
Parameters
----------
gpos : int
Genomic position
dialect : str
Coordinate dialect (GenBank or HGVS, default None)
Returns
-------
ProteinPosition
"""
return self.c2p(self.g2c(gpos))
@pos_factory("Protein")
def p2c(self, ppos, dialect=None):
"""Convert integer from protein coordinate to CDS closed range
Parameters
----------
ppos : int
Protein position
dialect : str
Coordinate dialect (GenBank or HGVS, default None)
Returns
-------
CDSPosition
"""
try:
ppos = int(ppos)
except TypeError:
return None
if ppos < 0:
raise ProteinPositionError("'%s' should not be negative")
# FIXME is CDS guaranteed to have len % 3 == 0?
first_base = (ppos) * 3
last_base = first_base + 2
if last_base > len(self.exons):
raise ProteinPositionError("'%s' is too large")
return (CDSPosition(first_base), CDSPosition(last_base))
@pos_factory("Protein")
def p2g(self, ppos, dialect=None):
"""Convert integer from protein to genomic coordinates
Parameters
----------
ppos : int
Protein position
dialect : str
Coordinate dialect (GenBank or HGVS, default None)
Returns
-------
GenomePosition
"""
return tuple(self.c2g(x) for x in self.p2c(ppos))
if __name__ == '__main__':
# The following exons are from AB026906.1.
# test case: g.7872 -> c.274 -> p.92
# N.B. These are python counting coordinates (0-based)
exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]
def test_list(g_range):
cm = CoordinateMapper(exons)
for g1 in g_range:
print g1,
c1 = cm.g2c(g1)
print c1,
p1 = cm.c2p(c1)
print p1,
if p1:
c2 = cm.p2c(p1)[0]
else:
c2 = c1
print ' | ', c2,
g2 = cm.c2g(c2)
print g2
print cm.g2p(7872)
print cm.p2g(92)
def test_simple():
from SeqFeature import SeqFeature
location = sum([FeatureLocation(2, 4, +1),
FeatureLocation(8, 11, +1),
FeatureLocation(16, 18, +1)])
simple_exons = SeqFeature(location, type="CDS")
cm = CoordinateMapper(simple_exons)
print cm.exons
print list(cm.exons)
print range(len(cm.exons))
for i in range(len(cm.exons)):
print "%3s" % cm.c2g(i),
print
for i in xrange(20):
print "%3d" % i,
print
for i in xrange(20):
print "%3s" % cm.g2c(i),
print
r1 = (7870, 7871, 7872, 7873, 7874)
r2 = (5807, 5808, 5809,
6871, 6872, 6873, 6874, 6875,
7766, 7767, 7768, 7769,
13784, 13785, 13786)
test_list(r1)
#test_list(r2)
print
test_simple()
from copy import copy
import re
class InvalidPositionError(ValueError):
"Exception for bad coordinates"
class GenomePositionError(InvalidPositionError):
"Exception for bad genome coordinates"
class CDSPositionError(InvalidPositionError):
"Exception for bad CDS coordinates"
class ProteinPositionError(InvalidPositionError):
"Exception for bad protein coordinates"
class MapPosition(object):
"""Generic position for coordinate mapping."""
def __init__(self, pos, index=None, **kwargs):
"""Init class from HGVS position.
Parameters
----------
pos : int, str
Position to convert
index : int
Start of counting (e.g. 1 for GenBank or HGVS)
"""
self.index = index
if pos and self.index:
pos -= index
self.pos = pos
@classmethod
def from_dialect(cls, dialect, pos, strand=None):
"""Init class from given dialect.
Parameters
----------
dialect : str
input dialect (HGVS or GenBank)
pos : int, str
dialect position to convert
strand : int
Strand of position (-1 or +1, default None)
Returns
-------
cls
"""
if dialect is None:
return cls(pos, strand=strand)
try:
from_func = getattr(cls, "from_" + dialect.lower())
except AttributeError:
raise ValueError("Dialect '%s' not valid" % dialect)
return from_func(pos, strand)
@classmethod
def from_hgvs(cls, hgvs_pos, strand=None):
"""Init class from HGVS position.
Parameters
----------
hgvs_pos : int, str
HGVS position to convert
strand : int
Strand of position (-1 or +1, default None)
Returns
-------
cls
"""
return cls(hgvs_pos, index=1, strand=strand, post_fmt="*%d")
@classmethod
def from_genbank(cls, gbk_pos, strand=None):
"""Init class from GenBank position.
Parameters
----------
gbk_pos : int, str
GenBank position to convert
strand : int
Strand of position (-1 or +1, default None)
Returns
-------
cls
"""
return cls(gbk_pos, index=1, strand=strand)
def to(self, dialect):
"""Convert position to specified dialect.
Parameters
----------
dialect : str
Output dialect (HGVS or GenBank)
"""
if dialect is None:
return self.to_str()
try:
to_func = getattr(self, "to_" + dialect.lower())
except AttributeError:
raise ValueError("Dialect '%s' not valid" % dialect)
return to_func()
def to_hgvs(self):
"""Convert position to HGVS"""
if self.pos or self.pos == 0:
return self.pos + 1
return None
def to_genbank(self):
"""Convert position to GenBank"""
if self.pos or self.pos == 0:
return self.pos + 1
return None
def to_str(self):
"""Make string representation without conversion"""
return self.pos
def __str__(self):
"""String representation"""
return str(self.to_str())
def __int__(self):
"""Integer representation"""
return self.pos
def __repr__(self):
"""Detailed representation for debugging"""
return "%s(%s)" % (self.__class__.__name__, self.to_str())
class GenomePosition(MapPosition):
"""Genome position for coordinate mapping"""
def __init__(self, gpos, index=None, strand=None, **kwargs):
# FIXME if index is string, error may be raised
if gpos < (index or 0):
raise GenomePositionError("Genome position cannot be negative.")
# call superclass constructor
MapPosition.__init__(self, gpos, index)
self.strand = strand
def __eq__(self, other):
"""Compare equal to other GenomePosition with same pos
or integer equal to pos"""
if isinstance(other, int):
return self.pos == other
return isinstance(other, GenomePosition) and self.pos == other.pos
class ProteinPosition(MapPosition):
"""Protein position for coordinate mapping"""
def __init__(self, ppos, index=None, **kwargs):
# call superclass constructor
MapPosition.__init__(self, ppos, index)
def __eq__(self, other):
"""Compare equal to other ProteinPosition with same pos
or integer equal to pos"""
if isinstance(other, int):
return self.pos == other
return isinstance(other, ProteinPosition) and self.pos == other.pos
class CDSPosition(MapPosition):
"""CDS position for coordinate mapping"""
def __init__(self, cpos, index=None,
pre_fmt=None, post_fmt=None, **kwargs):
# Dispatch types and return anchor, offset
if isinstance(cpos, int):
anchor, offset = self.parse_int(cpos)
elif isinstance(cpos, str):
anchor, offset = self.parse_str(cpos, pre_fmt, post_fmt)
else:
raise CDSPositionError("'%s' is of unknown type" % cpos)
# Set instance anchor and offset
# call superclass constructor
MapPosition.__init__(self, anchor, index)
self._offset = offset
self.validate()
@classmethod
def from_anchor(cls, anchor=None, offset=None):
"""Init CDSPosition with anchor, offset pair.
Parameters
----------
anchor : int
CDS anchor (coordinate of nearest exon position)
offset : int
Offset from nearest exon position
Returns
-------
CDSPosition
"""
if not anchor:
pos = cls("%+d" % offset)
elif anchor < 0:
raise CDSPositionError("Anchor cannot be negative.")
else:
pos = cls(anchor)
pos.offset = offset
return pos
@property
def offset(self):
return self._offset
@offset.setter
def offset(self, val):
"Validate new offset, then update"
self.validate(offset=val)
self._offset = val
@property
def anchor(self):
return self.pos
@anchor.setter
def anchor(self, val):
"Validate new anchor, then update pos"
self.validate(anchor=val)
self.pos = val
def validate(self, anchor="sentinel", offset="sentinel"):
"""Check whether anchor and offset yield a valid position.
Parameters
----------
anchor : int
CDS anchor (coordinate of nearest exon position)
offset : int
Offset from nearest exon position
Returns
-------
bool
"""
if anchor == "sentinel":
anchor = self.anchor
if offset == "sentinel":
offset = self.offset
if offset == 0:
raise CDSPositionError(
"Offset may not be 0. For no offset, use None.")
if not anchor and anchor != 0 and not offset:
raise CDSPositionError(
"At least one of pos or offset must be defined")
if anchor and anchor < 0:
raise CDSPositionError("CDS anchor may not be negative.")
return True
@property
def pos_type(self):
"Type of CDS position, dynamically determined from values"
# inside CDS
if self.pos or self.pos == 0:
if not self.offset:
return "exon"
return "intron"
# outside CDS
elif self.offset > 0:
return "post-CDS"
return "pre-CDS"
assert False # all integers should return
@property
def sub_dict(self):
if self.pos_type == 'intron':
return {'pos': self.pos, 'offset': self.offset}
if self.pos_type == 'exon':
return {'pos': self.pos}
if self.pos_type == 'post-CDS' or self.pos_type == 'pre-CDS':
return {'offset': self.offset}
fmt_dict = {
'exon': "%d",
'intron': "%d%+d",
'post-CDS': "%+d",
'pre-CDS': "%+d",
}
@staticmethod
def _shift_index(pos_dict, idx):
"""Increment value of dict key 'pos' by given index.
Parameters
----------
pos_dict : dict
Dictionary to search for 'pos'
idx : int
Index to add to pos_dict['pos']
Returns
-------
dict
"""
if 'pos' in pos_dict and idx:
pos_dict['pos'] += idx
return pos_dict
def _make_str(self, val_dict=None, fmt_dict=None):
"""Retrieve format string and substitute values.
Parameters
----------
val_dict : dict
Dictionary of values to substitute into string
fmt_dict : dict
Dictionary of format strings for each pos type
Returns
-------
str
"""
# set default dicts if parameter dicts are false
fmt_dict = fmt_dict or self.fmt_dict
val_dict = val_dict or self.sub_dict
return fmt_dict[self.pos_type] % tuple(val_dict.values())
@staticmethod
def parse_int(cpos):
"""Parse int to anchor, offset pair
Parameters
----------
cpos : int
Integer position to convert
Returns
-------
tuple
"""
# treat negative int as offset
if cpos < 0:
return (None, cpos)
# treat positive int as anchor
return (cpos, None)
@staticmethod
def parse_str(cpos, pre_fmt, post_fmt):
"""Parse string to anchor, offset pair
Parameters
----------
cpos : str
String position to convert
pre_fmt : str
Format string for pre-CDS positions
post_fmt : str
Format string for post-CDS positions
Returns
-------
tuple
"""
delimiters = "\+\-"
if post_fmt and "*" in post_fmt:
delimiters += "\*"
# parenth causes split pattern to be kept
delim_rx = re.compile("([%s])" % delimiters)
parsed = delim_rx.split(cpos, 1)
if len(parsed) == 1: # may be int
return CDSPosition.parse_int(int(cpos))
# 1 split is normally length 2 but delimiter is also kept
elif len(parsed) != 3:
raise CDSPositionError(
"String '%s' not parseable for this position." % cpos)
if parsed[0] == "":
anchor = None
else:
anchor = int(parsed[0])
if post_fmt and parsed[1] == "*" == post_fmt[0]:
parsed[1] = "+"
offset = int("".join(parsed[1:]))
return (anchor, offset)
def to_hgvs(self):
"""Convert CDS position to HGVS"""
fmt_dict = copy(self.fmt_dict)
fmt_dict['post-CDS'] = "*%d"
sub_dict = self._shift_index(self.sub_dict, 1)
return self._make_str(sub_dict, fmt_dict)
def to_genbank(self):
"""Convert CDS position to GenBank"""
sub_dict = self._shift_index(self.sub_dict, 1)
return self._make_str(sub_dict)
def to_str(self):
"""Make string representation of CDS position"""
return self._make_str()
def __int__(self):
"""Integer representation of CDS exon, otherwise NotImplemented"""
if self.pos_type == "exon":
return MapPosition.__int__(self)
return NotImplemented
def __eq__(self, other):
"""Compare equal to other MapPosition with same pos and offset
or int if exon"""
if isinstance(other, int) and self.pos_type == "exon":
return self.pos == other
return isinstance(other, CDSPosition) and \
self.pos == other.pos and \
self.offset == other.offset
if __name__ == "__main__":
def print_pos(pos_obj):
print "object:", pos_obj
print "repr:", repr(pos_obj)
print "HGVS:", pos_obj.to_hgvs()
print
g = GenomePosition.from_hgvs(6)
print_pos(g)
test_g = GenomePosition(5)
#test_c = CDSPosition("6+1")
#test_c = CDSPosition.from_hgvs("6+1")
test_c = CDSPosition.from_hgvs("*1")
#test_c = CDSPosition(6)
#test_c = CDSPosition(-1)
print_pos(test_g)
print test_c.pos_type
print_pos(test_c)
#!/usr/bin/python
from functools import wraps
import unittest
from CoordinateMapper import CoordinateMapper
from MapPositions import GenomePositionError
from MapPositions import ProteinPositionError
from MapPositions import CDSPosition, CDSPositionError
from SeqFeature import FeatureLocation, SeqFeature
from Bio.SeqRecord import SeqRecord
exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]
cmap = CoordinateMapper(exons)
g_exons = xrange(7868, 7875) # len 7
c_exons = xrange(270, 279) # len 9
p_exons = xrange(90, 93) # len 3
p_exons_trip = [p for p in p_exons for n in range(3)] # len 9
c_exon_prs = ((270, 272), (273, 275), (276, 278)) # len 3
g_exon_prs = ((7868, 7870), (7871, 7873), (7874, 7876)) # len 3
g_introns = {None: (5860, 5861, 6308, 6309, 6755, 6756),
'HGVS': (5861, 5862, 6309, 6310, 6756, 6757)}
c_introns = {None: ('51+1', '51+2', '51+449', '52-448', '52-2', '52-1'),
'HGVS': ('52+1', '52+2', '52+449', '53-448', '53-2', '53-1')}
c_intron_tups = ((51, 1), (51, 2), (51, 449), (52, -448), (52, -2), (52, -1))
g_outside = {None: (0, 13785, 14000), 'HGVS': (1, 13786, 14001)}
c_outside = {None: ('-5808', '+1', '+216'), 'HGVS': ('-5808', '*1', '*216')}
c_outside_tups = ((None, -5808), (None, 1), (None, 216))
def two_dialects(fn):
@wraps(fn)
def call_tests(self):
orig_dialect = self.dialect
for dialect in (None, 'HGVS'):
self.dialect = dialect
fn(self)
self.dialect = orig_dialect
return call_tests
class TestCDSPosition(unittest.TestCase):
"""Test that CDSPosition works properly"""
def setUp(self):
self.dialect = None
@two_dialects
def testGoodIntron(self):
"""CDSPosition should match good intron values"""
for c_args, c in zip(c_intron_tups, c_introns[self.dialect]):
actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
self.assertEqual(actual, c)
@two_dialects
def testGoodOutside(self):
"""CDSPosition should match good outside-CDS values"""
for c_args, c in zip(c_outside_tups, c_outside[self.dialect]):
actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
self.assertEqual(actual, c)
def testEqual(self):
"""CDSPosition should test equal with same args"""
for args in c_intron_tups:
CPos = CDSPosition.from_anchor
self.assertEqual(CPos(*args), CPos(*args))
self.assertEqual(str(CPos(*args)), str(CPos(*args)))
#def testEqualDialects(self):
#for c_pos in c_outside:
#self.assertEqual(CDSPosition(c_pos), CDSPosition.from_hgvs(c_pos))
#self.assertEqual(c_pos, CDSPosition.from_hgvs(c_pos).to_hgvs())
def testBadAnchor(self):
"""CDSPosition should fail with negative CDS anchor"""
self.assertRaises(CDSPositionError, CDSPosition.from_anchor, -1, 1)
def testBadOffset(self):
"""CDSPosition should fail with zero offset"""
self.assertRaises(CDSPositionError, CDSPosition.from_anchor, None, 0)
def testValidate(self):
"""Setting CDSPosition anchor offset should fail for invalid cases"""
intron = CDSPosition("22+4")
self.assertRaises(CDSPositionError, setattr, intron, "offset", 0)
self.assertRaises(CDSPositionError, setattr, intron, "anchor", -5)
exon = CDSPosition("60")
self.assertRaises(CDSPositionError, setattr, exon, "anchor", None)
class TestCoordinateMapper(unittest.TestCase):
"""Test that CoordinateMapper works properly"""
def setUp(self):
self.sf = SeqFeature(sum((FeatureLocation(s, e) for s, e in exons)),
type="CDS")
def testListInit(self):
"""CoordinateMapper should take list of exon pairs"""
cm = CoordinateMapper(exons)
# FIXME CompoundLocation does not have __eq__
self.assertEqual(str(cm.exons), str(self.sf.location))
def testSeqRecordInit(self):
"""CoordinateMapper should use first CDS feature of SeqRecord"""
sr = SeqRecord("", features=[self.sf])
cm = CoordinateMapper(sr)
self.assertEqual(str(cm.exons), str(self.sf.location))
def testSeqFeatureInit(self):
"""CoordinateMapper should take a CDS SeqFeature"""
cm = CoordinateMapper(self.sf)
self.assertEqual(str(cm.exons), str(self.sf.location))
def testEmptyInit(self):
"""CoordinateMapper should fail with no arguments"""
self.assertRaises(Exception, CoordinateMapper)
class Testg2c(unittest.TestCase):
"""Test success of good input and failure of bad input to g2c"""
def setUp(self):
self.dialect = None
# what it should do
# any integer should return
def testGoodExons(self):
"""g2c should work for exon positions"""
for arg, expected in zip(g_exons, c_exons):
self.assertEqual(cmap.g2c(arg), expected)
@two_dialects
def testGoodIntronsStr(self):
"""g2c should work for intron positions (string)"""
dia = self.dialect
for arg, expect in zip(g_introns[dia], c_introns[dia]):
actual = cmap.g2c(arg, dia).to(dia)
self.assertEqual(actual, expect)
@two_dialects
def testGoodIntrons(self):
"""g2c should work for intron positions (CDSPosition)"""
for arg, tup in zip(g_introns[self.dialect], c_intron_tups):
actual = cmap.g2c(arg, self.dialect)
expect = CDSPosition.from_anchor(*tup)
self.assertEqual(actual, expect)
self.assertEqual(str(actual), str(expect))
@two_dialects
def testGoodOutsideStr(self):
"""g2c should work for outside positions (string)"""
dia = self.dialect
for arg, expected in zip(g_outside[dia], c_outside[dia]):
actual = cmap.g2c(arg, dia).to(dia)
self.assertEqual(actual, expected)
def testGoodOutside(self):
"""g2c should work for outside positions (CDSPosition)"""
for arg, tup in zip(g_outside[self.dialect], c_outside_tups):
actual = cmap.g2c(arg)
expect = CDSPosition.from_anchor(*tup)
self.assertEqual(actual, expect)
self.assertEqual(str(actual), str(expect))
# what it shouldn't do
# should it handle non-exact positions?
@two_dialects
def testZeroArg(self):
"""g2c should work for 0 in no dialect and fail for 1-index"""
args = (0, self.dialect)
if self.dialect is None:
cmap.g2c(*args)
else:
self.assertRaises(GenomePositionError, cmap.g2c, *args)
@two_dialects
def testBadArg(self):
"""g2c should fail on string, float, None, or negative"""
bad = ("string", None, 3.14, -5)
for arg in bad:
self.assertRaises(Exception, cmap.g2c, arg)
#self.assertRaises(GenomePositionError, cmap.g2c, -5)
class Testc2p(unittest.TestCase):
"""Test success of good input and failure of bad input to c2p"""
# what it should do
# integer within length of exon should return correct protein
def testGoodExons(self):
"""c2p should work for exon positions"""
for arg, expect in zip(c_exons, p_exons_trip):
self.assertEqual(cmap.c2p(arg), expect)
# FIXME should CDSPosition return None or raise error?
def testBadNotProtein(self):
"""c2p should fail for CDSPosition or str"""
bad = ("string", CDSPosition.from_anchor(7, -5))
for arg in bad:
self.assertRaises(ValueError, cmap.c2p, arg)
class Testp2c(unittest.TestCase):
"""Test success of good input and failure of bad input to p2c"""
# what it should do
# integer within length of protein should return correct range
def testGoodExons(self):
"""p2c should work for exon positions"""
#for arg, expect in p2c_exons.iteritems():
for arg, expect in zip(p_exons, c_exon_prs):
self.assertEqual(cmap.p2c(arg), expect)
def testBadTooLarge(self):
"""p2c should fail for positions longer than the max length"""
self.assertRaises(ProteinPositionError, cmap.p2c, 500)
def testBadNegative(self):
"""p2c should fail for negative protein coordinate"""
self.assertRaises(ProteinPositionError, cmap.p2c, -50)
class Testc2g(unittest.TestCase):
"""Test success of good input and failure of bad input to c2g"""
def setUp(self):
self.dialect = None
# what it should do
# integer within length of exon should return correct value
def testGoodExons(self):
"""c2g should work for exon positions"""
for arg, expected in zip(c_exons, g_exons):
self.assertEqual(cmap.c2g(arg), expected)
# CDSPosition object should return correct value
@two_dialects
def testGoodIntrons(self):
"""c2g should work for introns (CDSPosition)"""
dia = self.dialect
for c_args, g in zip(c_intron_tups, g_introns[dia]):
actual = cmap.c2g(CDSPosition.from_anchor(*c_args), dia)
self.assertEqual(actual.to(dia), g)
# FIXME doesn't work with HGVS
#@two_dialects
def testGoodOutside(self):
"""c2g should work for outside (CDSPosition)"""
for args, expect in zip(c_outside_tups, g_outside[self.dialect]):
self.assertEqual(cmap.c2g(CDSPosition.from_anchor(*args)),
expect)
# simple string should return correct value
# \d*[+-]\d+
@two_dialects
def testGoodIntronsStr(self):
"""c2g should work for intron positions (string)"""
dia = self.dialect
# c_str from dialect to *dialect* matches *dialect* g_str
for c_str, g in zip(c_introns[dia], g_introns[dia]):
self.assertEqual(cmap.c2g(c_str, dia).to(dia), g)
# c_str from dialect as *default* matches *default* g_str
for c_str, g in zip(c_introns[dia], g_introns[None]):
self.assertEqual(cmap.c2g(c_str, dia), g)
@two_dialects
def testGoodOutsideStr(self):
"""c2g should work for outside positions (string)"""
dia = self.dialect
for expected, arg in zip(g_outside[dia], c_outside[dia]):
self.assertEqual(cmap.c2g(arg, dia).to(dia), expected)
def testNegativeInt(self):
"""c2g should treat negative integer as pre-CDS"""
# XXX negative integers are treated as pre-CDS
self.assertEqual(cmap.c2g(-2), 5806)
# what it shouldn't do
# bad positions in form 123+4 where 123 is not end of an exon
# or where sign is wrong
@two_dialects
def testBadNotEnd(self):
"""c2g should fail on invalid introns"""
bad_introns = {None: ("160+5", # 160 is exonic
"51-6", # 51 is end
"52+10", # 52 is start
), 'HGVS': ("160+5", "52-6", "53+10")}
for arg in bad_introns[self.dialect]:
self.assertRaises(CDSPositionError, cmap.c2g, arg, self.dialect)
@two_dialects
def testBadString(self):
"""c2g should fail on arbitrary string"""
bad_vals = {None: ["xxx", "4-5'", "*10"], 'HGVS': ["xxx", "4-5"]}
for arg in bad_vals[self.dialect]:
self.assertRaises(ValueError, cmap.c2g, arg, self.dialect)
# integers outside length of exon should raise IndexError
def testBadTooLarge(self):
"""c2g should fail on values beyond the exon"""
num = 400
assert num > len(cmap.exons)
self.assertRaises(IndexError, cmap.c2g, num)
self.assertRaises(Exception, cmap.c2g, "%d%d" % (num, -12))
class Testp2g(unittest.TestCase):
def testGoodStr(self):
"""p2g should return expected ranges"""
for p_arg, gpos in zip(p_exons, g_exon_prs):
self.assertEqual(cmap.p2g(p_arg), gpos)
class Testg2p(unittest.TestCase):
def testGoodStr(self):
"""g2p should return correct position for exons"""
for g_arg, ppos in zip(g_exons, p_exons_trip):
self.assertEqual(cmap.g2p(g_arg), ppos)
class TestStrand(unittest.TestCase):
def setUp(self):
locations = FeatureLocation(24, 30, -1) + FeatureLocation(35, 40, +1)
feature = SeqFeature(locations, type="CDS")
self.cm = CoordinateMapper(feature)
self.g_exons = [29, 28, 27, 26, 25, 24,
35, 36, 37, 38, 39]
assert list(self.cm.exons) == self.g_exons
c_len = 11
assert c_len == len(list(self.cm.exons))
self.c_exons = xrange(c_len)
def testg2c(self):
"c2g and g2c should work for mixed-strand exons"
for c, g in zip(self.c_exons, self.g_exons):
self.assertEqual(c, self.cm.g2c(g))
self.assertEqual(g, self.cm.c2g(c))
if c < 6:
self.assertEqual(self.cm.c2g(c).strand, -1)
else:
self.assertEqual(self.cm.c2g(c).strand, +1)
if __name__ == "__main__":
#unittest.main()
suite0 = unittest.TestLoader().loadTestsFromTestCase(TestCDSPosition)
suite1 = unittest.TestLoader().loadTestsFromTestCase(TestCoordinateMapper)
suite2 = unittest.TestLoader().loadTestsFromTestCase(Testc2g)
suite3 = unittest.TestLoader().loadTestsFromTestCase(Testg2c)
suite4 = unittest.TestLoader().loadTestsFromTestCase(Testc2p)
suite5 = unittest.TestLoader().loadTestsFromTestCase(Testp2c)
suite6 = unittest.TestLoader().loadTestsFromTestCase(Testp2g)
suite7 = unittest.TestLoader().loadTestsFromTestCase(Testg2p)
suite8 = unittest.TestLoader().loadTestsFromTestCase(TestStrand)
all_suites = [
suite0,
suite1,
suite2,
suite3,
suite4,
suite5,
suite6,
suite7,
suite8,
]
all = unittest.TestSuite(all_suites)
unittest.TextTestRunner(verbosity=3).run(all)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment