Skip to content

Instantly share code, notes, and snippets.

@saulshanabrook
Last active August 29, 2015 14:01
Show Gist options
  • Save saulshanabrook/94fc5328853b86562f63 to your computer and use it in GitHub Desktop.
Save saulshanabrook/94fc5328853b86562f63 to your computer and use it in GitHub Desktop.
from collections import namedtuple
Parents = namedtuple('Parents', ['mother', 'father'])
from itertools import zip_longest
from collections import Counter, abc
class FrozenCounter(Counter):
'''
from http://stackoverflow.com/q/10045562/907060
'''
def __hash__(self):
"Implements hash(self) -> int"
if not hasattr(self, '_hash'):
self._hash = hash(frozenset(self.items()))
return self._hash
def chunks(l, n):
"""
from http://stackoverflow.com/a/312464/907060
Yield successive n-sized chunks from l.
"""
return zip(*[iter(l)] * n)
class Gene(abc.Hashable, abc.Set):
def __init__(self, alleles):
self.alleles = tuple(sorted(alleles))
self._verify()
def _verify(self):
assert len(self.alleles) == 2
assert self.alleles[0].lower() == self.alleles[1].lower()
def __contains__(self, allele):
return allele in self.alleles
def __iter__(self):
return iter(self.alleles)
def __len__(self):
return 2
def __hash__(self):
return hash(self.alleles)
def __repr__(self):
return 'Gene(\'' + ''.join(self.alleles) + '\')'
def __str__(self):
return ''.join(self.alleles)
@property
def dominant(self):
return self.alleles[0].isupper()
@property
def recessive(self):
return not self.dominant
assert repr(Gene('aA') == "Gene('Aa')")
assert repr(Gene('rR') == "Gene('rR')")
assert Gene('rr').recessive
assert Gene('Rr').dominant
assert Gene('RR').dominant
from itertools import product, chain
class Genotype(abc.Hashable, abc.Set):
def __init__(self, alleles):
self._traits = tuple(map(Gene, chunks(alleles, 2)))
def __contains__(self, gene):
return gene in self._traits
def __iter__(self):
return iter(self._traits)
def __len__(self):
return len(self._traits)
def __hash__(self):
return hash(self._traits)
def __repr__(self):
return 'Genotype(\'' + ''.join(map(str, self._traits)) + '\')'
def __str__(self):
return ''.join(map(str, self._traits))
@classmethod
def from_nested(cls, nested_genes):
return cls(chain(*nested_genes))
assert repr(Genotype('AabB') == "Genotype('AaBb')")
assert repr(Gene('rR')) == repr(Gene('Rr'))
assert Genotype.from_nested([['A', 'a'], ['B', 'b']]) == Genotype('AaBb')
from functools import partial
def all_genotype_parents_from_structure(structure):
'''
Returns all the possible parents given a phenotypicaly structure
'''
# ['Aa', 'Bb', ...]
all_alleles = [([trait[0][0].upper(), trait[0][0].lower()]) for trait in structure]
# [['Aa', 'aa', ...], ['Bb', ...]]
product_twice = lambda l: tuple(product(l, repeat=2))
all_possible_gene_pairs = map(product_twice, all_alleles)
# [[['A', 'A'], ['B', 'b']], [['A', 'A'], ['B', 'b']], ...]
all_parents = product(*all_possible_gene_pairs)
# [Genotype('AABb'), Genotype('AABb'), ...]
all_parents = list(map(Genotype.from_nested, all_parents))
# [[Genotype('AABb'), Genotype('AABb')], [Genotype('AABb'), Genotype('AABB')], ...]
parent_combinations = product(all_parents, all_parents)
return set(map(FrozenCounter, parent_combinations))
assert FrozenCounter([Genotype('RRSS'), Genotype('RRSS',)]) in all_genotype_parents_from_structure((('red', 'yellow'), ('smooth', 'r')))
assert all_genotype_parents_from_structure((('red', 'yellow'), ),) == {FrozenCounter([Genotype('RR'), Genotype('RR',)]),
FrozenCounter([Genotype('RR'), Genotype('Rr',)]),
FrozenCounter([Genotype('RR'), Genotype('rr',)]),
FrozenCounter([Genotype('rr'), Genotype('rr',)]),
FrozenCounter([Genotype('Rr'), Genotype('Rr',)]),
FrozenCounter([Genotype('Rr'), Genotype('rr',)])}
def phenotype_from_genotype(genotype, structure):
return tuple(expression[0] if gene.dominant else expression[1] for gene, expression in zip(genotype, structure))
assert phenotype_from_genotype(Genotype('bbll'), (('black', 'white'), ('long', 'short'))) == ('white', 'short')
assert phenotype_from_genotype(Genotype('bb'), (('black', 'white'),)) == ('white', )
assert phenotype_from_genotype(Genotype('Bb'), (('black', 'white'),)) == ('black', )
def normalize_values(dictionary):
sum_values = sum(dictionary.values())
return dictionary.__class__({key: value / sum_values for key, value in dictionary.items()})
assert normalize_values({'_': 1.0})['_'] == 1.0
assert normalize_values({'_': 1.0, 'h': 1.0})['_'] == 0.5
product_from_iter = lambda l: product(*l)
def offspring_genotypes(parent_genotypes):
# [(Gene('Aa'), Gene('Aa')), (Gene('Bb'), Gene('Bb'))]
gathered_genes = zip(*parent_genotypes)
# [[('A', 'A'), ('A', 'a'), ('a', 'A'), ('a', 'a')], [('B', 'B'), ('B', 'b'), ('b', 'B'), ('b', 'b')]]
gene_combinations = map(product_from_iter, gathered_genes)
# [[('A', 'A'), ('B', 'B')], [('A', 'A'), ('B', 'b')], ...]
offspring = product_from_iter(gene_combinations)
# [Genotype('AABB'), Genotype('AABb'), ...]
offspring = map(Genotype.from_nested, offspring)
return Counter(offspring)
assert offspring_genotypes((Genotype('Aa'), Genotype('aa'))) == Counter({Genotype('Aa'): 2, Genotype('aa'): 2})
assert offspring_genotypes((Genotype('aa'), Genotype('aa'))) == Counter({Genotype('aa'): 4})
assert offspring_genotypes((Genotype('Aa'), Genotype('Aa'))) == Counter({Genotype('Aa'): 2, Genotype('aa'): 1, Genotype('AA'): 1})
assert offspring_genotypes((Genotype('AaBb'), Genotype('AaBb'))) == Counter({Genotype('AaBb'): 4,
Genotype('Aabb'): 2,
Genotype('AaBB'): 2,
Genotype('aaBb'): 2,
Genotype('AABb'): 2,
Genotype('aabb'): 1,
Genotype('AABB'): 1,
Genotype('aaBB'): 1,
Genotype('AAbb'): 1})
def pprint_parents_genotypes(parents_genotypes):
return ' (x) '.join(map(str, parents_genotypes))
assert pprint_parents_genotypes(Parents(Genotype('AA'), Genotype(('aa')))) == 'AA (x) aa'
def round_values(dictionary, round_p):
return dictionary.__class__({key: round(value, round_p) for key, value in dictionary.items()})
assert(round_values({'_': 1.9898}, 1) == {'_': 2.0})
from itertools import repeat
from collections import Counter
def parent_genotypes(parents_phenotypes, offspring_phenotypes, structure):
'''
Returns the probabiblity of different parent genotypes, based on the phenotypes of the known children
and parents.
'''
parents_genotypes_probability = Counter()
# lets try a brute force technique. get a combination of every possible parents genotypes and
# try them on the parents and offspring phenotypes to see if they match.
for trial_parents_genotypes in all_genotype_parents_from_structure(structure):
# first make sure that the parents work
trial_parents_phenotypes = map(phenotype_from_genotype, trial_parents_genotypes.elements(), repeat(structure))
if Counter(trial_parents_phenotypes) == Counter(parents_phenotypes):
# then we want to see if the real offspring are a subset of the children of those parents
trial_offspring_genotypes = offspring_genotypes(trial_parents_genotypes.elements())
trial_offspring_phenotypes = Counter(map(phenotype_from_genotype, trial_offspring_genotypes.elements(), repeat(structure)))
if set(offspring_phenotypes).issubset(set(trial_offspring_phenotypes)):
# then we know that those parents could actually be the right ones, however they might not be,
# so we also need to compute, how probable it is.
# one way to look at it is, if the `trial_parents` had `len(offspring_phenotypes)` kids, what percentage
# of the time would the phenotypes of their kids be `offspring_phenotypes`?
# that gives us the probability of getting the actual litter we have with these parents.
# then we can compare that probability to the other options, and see which one is more likely.
chance_that_offspring_came_from_trial_parents = 1
trial_offspring_phenotypes_percentiles = normalize_values(trial_offspring_phenotypes)
for offspring_phenotype in offspring_phenotypes:
# calculate the chance of getting each child, and since we are talking about succesion we multiply them
chance_that_offspring_came_from_trial_parents *= trial_offspring_phenotypes_percentiles[offspring_phenotype]
parents_genotypes_probability[pprint_parents_genotypes(trial_parents_genotypes.elements())] = chance_that_offspring_came_from_trial_parents
return normalize_values(parents_genotypes_probability)
assert parent_genotypes(Parents(('black',), ('black',)), (('black',),), (('orange', 'black'),),) == {'oo (x) oo': 1.0}
assert round_values(parent_genotypes(
parents_phenotypes=(
('orange',),
('orange',)
),
offspring_phenotypes=list(Counter({
('orange',): 100,
}).elements()),
structure=(
('orange', 'black'),
),
), 1) == {'Oo (x) Oo': 0.0, 'OO (x) OO': 0.5, 'OO (x) Oo': 0.5}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment